def men_articles_coref_psql(data_source): men_art_tuple = Dictionaries.db_conn_obj.fetch_all_men_articles_source(data_source) men_ent_dict = dict(men_art_tuple) doc_idx = 0 coref = Coreference() for men, m_text in men_ent_dict.items(): doc_idx += 1 print("Processsing doc {}, progress {}/{} = {:.2f}".format(men, doc_idx, 500, doc_idx / 500)) doc_id_hash = Dictionaries.doc_to_hash(m_text) coref.coref_input_doc(m_text, men, doc_id_hash, data_source)
mention, ent_tok_entropy_dict, redis_key='sm50') Dictionaries.redis_db_obj.save_men_ents_tokens_idf_redis(mention, ent_tok_idf_dict, redis_key='sm50') Dictionaries.redis_db_obj.save_men_ents_tokens_tf_redis(mention, ent_tok_tf_dict, redis_key='sm50') return ent_tok_entropy_dict, ent_tok_idf_dict, ent_tok_tf_dict def cal_fq(tok_idxs_dict): total = 0 for tok, idxs in tok_idxs_dict.items(): total += len(idxs) return total if __name__ == '__main__': # Dictionaries.spacy_init = spacy.load('en') Dictionaries.init_dictionaries() # ents = ['Japan', 'Beijing'] # ss = Dictionaries.spacy_init.tokenizer('Beijing is a city') # print(ss) # ent_article_tok(ents) # texts = 'Beijig is a great city, EB, Japanese, Asia, Japanese, Asia, Japanese, Asia, Japanese, Asia, Japanese, Asia, Pacific sdfdf Japan japnasfd dfa Japan.' # ent_tok_dict, men_toks_idx = ent_article_tok(ents), men_toks('Beijing', texts) # gen_comm_tokens_web(ent_tok_dict, men_toks_idx) # ent_articles_tok_redis() # men_articles_tok_redis() # ent_articles_tok_redis_single_test()