将text文件转换为sequenceFile
./mahout seqdirectory -c UTF-8 -i /Users/ruihaidong/Documents/workspace_java/lda/data/sourceFilesource/ -o /Users/ruihaidong/Documents/workspace_java/lda/data/seqfiles
将sequenceFile文件转换为vector
./mahout seq2sparse -i /Users/ruihaidong/Documents/workspace_java/lda/data/seqfiles/ -o /Users/ruihaidong/Documents/workspace_java/lda/data/vectors -ow
运行lda
./mahout lda -i /Users/ruihaidong/Documents/workspace_java/lda/data/vectors/tf-vectors/ -o /Users/ruihaidong/Documents/workspace_java/lda/data/ldaresult/ -k 20
打印lda topics
./mahout ldatopics -i /Users/ruihaidong/Documents/workspace_java/lda/data/ldaresult/state-44/ -d /Users/ruihaidong/Documents/workspace_java/lda/data/vectors/dictionary.file-0 -dt sequencefile -w 10
No comments:
Post a Comment