def getMappingDicts_reGen(corpusdir, mapsdir, cons): # check the old constraint.dict exists or not cons_file = corpusdir + "/constraint.set" if (not os.path.exists(cons_file)): # Regenerate (word_wid_dic, wid_did_dic, did_doc_dic) = \ getNewMappingDicts(corpusdir, mapsdir) else: # check whether the old constraint is the same as consdict old_cons = read_pickle(cons_file) if checkSame(cons, old_cons): # check the mapping dicts exist or not word_wid = mapsdir + "/word_wid.dict" wid_did = mapsdir + "/wid_did.dict" did_doc = mapsdir + "/did_doc.dict" if (os.path.exists(word_wid) and os.path.exists(wid_did) \ and os.path.exists(did_doc)): word_wid_dic = read_pickle(word_wid) wid_did_dic = read_pickle(wid_did) did_doc_dic = read_pickle(did_doc) else: (word_wid_dic, wid_did_dic, did_doc_dic) = \ getNewMappingDicts(corpusdir, mapsdir) else: (word_wid_dic, wid_did_dic, did_doc_dic) = \ getNewMappingDicts(corpusdir, mapsdir) write_pickle(cons, cons_file) return (word_wid_dic, wid_did_dic, did_doc_dic)
def getMappingDicts(corpusdir, mapsdir): # check the mapping dicts exist or not word_wid = mapsdir + "/word_wid.dict" wid_did = mapsdir + "/wid_did.dict" did_doc = mapsdir + "/did_doc.dict" if (os.path.exists(word_wid) and os.path.exists(wid_did) \ and os.path.exists(did_doc)): word_wid_dic = read_pickle(word_wid) wid_did_dic = read_pickle(wid_did) did_doc_dic = read_pickle(did_doc) else: (word_wid_dic, wid_did_dic, did_doc_dic) = \ getNewMappingDicts(corpusdir, mapsdir) return (word_wid_dic, wid_did_dic, did_doc_dic)
def getNewAddedCons(corpusdir, cons_set, cons_list): # check the old constraint.list exists or not cons_file = corpusdir + "/constraint.set" if (not os.path.exists(cons_file)): cons_added_set = cons_set else: cons_old_set = read_pickle(cons_file) cons_added_set = cons_set.difference(cons_old_set) # save the new cons set to file write_pickle(cons_set, cons_file) cons_file = corpusdir + "/constraint.list" write_pickle(cons_list, cons_file) return cons_added_set
flags.define_int("num_topics", 0, "Current number of topics") if __name__ == "__main__": flags.InitFlags() if re.search("doc", flags.update_strategy): update_strategy = 1 elif re.search("term", flags.update_strategy): update_strategy = 0 else: print "Wrong update strategy!" exit() # Build index if it doesn't already exist if os.path.exists(flags.mapping): index = read_pickle(flags.mapping) else: index = build_index(flags.corpus, flags.mapping) # Remove offending assignments if re.search("clear", flags.resume_type): # Read in constraints cons_set = get_constraints(flags.cons_file) print cons_set clear_assignments(flags.input_base, flags.output_base, index, cons_set, update_strategy) elif re.search("split", flags.resume_type): # Read in constraints print flags.wordnet [cons_list, cons_set] = get_constraints_from_wn(flags.wordnet) print cons_set