dict_fp = "../data/sdg_v2.dict" corpus_fp = "../data/sdg_v2.mm" model_fp = "../data/sdg_v2.lda_model" # loading dictionary, corpus, lda_model dictionary = corpora.Dictionary.load(dict_fp) corpus = corpora.MmCorpus(corpus_fp) lda = models.ldamodel.LdaModel.load(model_fp) cosSim = CosineSimilarity(dictionary=dictionary, corpus=corpus, model=lda) cosSim.save_index("../data/sdg_v2.index") # loading undaf data undaf_fp = "../data/undaf_v2.json" p = Preprocessor(undaf_fp) p.loadjson() undaf_docs = p.json # loading metatopic data sdg = Preprocessor(clean_data_fp) sdg.loadjson() sdg_metatopics = sdg.json[0].keys() for topic in sdg_metatopics: print topic for k, v in p.json[0].items(): simScore = cosSim.calculate_sim(v, 10) print "\n", k {"y": 0.089070708, "x": 0, "doc": 2},
def preprocess(input_fp, output_fp): p = Preprocessor(input_fp) p.loadjson() p.processjson() p.savejson(output_fp)
dict_fp = '../data/sdg_v2.dict' corpus_fp = '../data/sdg_v2.mm' model_fp = '../data/sdg_v2.lda_model' # loading dictionary, corpus, lda_model dictionary = corpora.Dictionary.load(dict_fp) corpus = corpora.MmCorpus(corpus_fp) lda = models.ldamodel.LdaModel.load(model_fp) cosSim = CosineSimilarity(dictionary=dictionary, corpus=corpus, model=lda) cosSim.save_index('../data/sdg_v2.index') # loading undaf data undaf_fp = '../data/undaf_v2.json' p = Preprocessor(undaf_fp) p.loadjson() undaf_docs = p.json # loading metatopic data sdg = Preprocessor(clean_data_fp) sdg.loadjson() sdg_metatopics = sdg.json[0].keys() for topic in sdg_metatopics: print topic for k, v in p.json[0].items(): simScore = cosSim.calculate_sim(v, 10) print '\n', k {'y': 0.089070708, 'x': 0, 'doc': 2},