Пример #1
0
def main(q1, q2, articles, batch, input_type, outputFileName, dictType):
    num_articles = int(articles)
    query = queries.main(q1, q2)

    if batch == "yes":
        q1_id_list, q2_id_list = pmids.main(query, num_articles)
        q1_file_paths = run_tees_batch(q1, q1_id_list)
        q2_file_paths = run_tees_batch(q2, q2_id_list)

    if batch == "no":
        q1_id_list, q2_id_list = pmids.main(query, num_articles)
        q1_file_paths = run_tees(q1, q1_id_list)
        q2_file_paths = run_tees(q2, q2_id_list)

    q1_dict = get_info_from_interaction_xml(q1_file_paths)
    q2_dict = get_info_from_interaction_xml(q2_file_paths)

    if dictType == 'all' or dictType == 'both':
        all_words_dict = get_all_words_dict(q1, q2, q1_dict, q2_dict)
        normalized_all_words_dict = normalize_dict(all_words_dict, query)
        angle_list_all = Cosine_Sim.main(normalized_all_words_dict, q1, q2)
        print_pair_score_dict(angle_list_all, normalized_all_words_dict, q1,
                              q2, input_type, outputFileName)

    if dictType == 'protein' or dictType == 'both':
        query_dicts = [q1_dict, q2_dict]
        combined_dict = combine_dictionaries(query_dicts)
        normalized_protein_dict = normalize_dict(combined_dict, query)
        angle_list_protein = Cosine_Sim.main(normalized_protein_dict, q1, q2)
        print_pair_score_dict(angle_list_protein, normalized_protein_dict, q1,
                              q2, input_type, outputFileName)
Пример #2
0
def get_all_predicted_relations_dict(hprd50_paper_dict, max_sentences):
    all_predicted_relations_dict = {}  
    for key in hprd50_paper_dict: 
        found_relation_score_tuples_list = [] 
        for relation in hprd50_paper_dict[key].possible_relations_Tnumber:
            q1 = relation[0]
            q2 = relation[1]
            found_relation_score_tuple = ((q1, q2), 0)
            if q1 == q2:
                continue
            query = queries.main(q1,q2)
            ID_sentence_list = []
            ID_sentence_list = find_cooccurrence_sents(hprd50_paper_dict[key], query)
            print 'ID_sentence_list: ',ID_sentence_list
            if not ID_sentence_list:
                found_relation_score_tuple = ((q1, q2), 0)
                print found_relation_score_tuple
                found_relation_score_tuples_list.append(found_relation_score_tuple)
            if ID_sentence_list:
                sentences_with_score1 = Score1.rank_sentences(ID_sentence_list, query, max_sentences)
                sentences_with_score2 = Score2.main(sentences_with_score1, query)
                sorted_sentences_with_score2 = list(sorted(sentences_with_score2, key=operator.attrgetter('score'), reverse=True))
                #------------------------------ if sorted_sentences_with_score2:
                    #----------------- if len(sorted_sentences_with_score2) > 1:
                        #-------------------------------------------------- pass
                    #----------------------------------------------------- else:
                for sentence_object in sorted_sentences_with_score2:
                    found_relation_score_tuple = ((q1, q2), sentence_object.score, sentence_object.order_in_abstract)
                    print found_relation_score_tuple
                    found_relation_score_tuples_list.append(found_relation_score_tuple)
        all_predicted_relations_dict[key]=found_relation_score_tuples_list
    return all_predicted_relations_dict                      
Пример #3
0
def index(a1_file, articles, max_sentences):
   
    global a_file
    a_file = a1_file

    q1= a1_file.protein1
    q2= a1_file.protein2 
    query = queries.main(q1,q2)    # Creates Queries
    q1_syns = query.q1_syns        # Retrieves Q1 and Q2 synonyms
    q2_syns = query.q2_syns
    print a1_file.protein1, ' synonyms = ', q1_syns
    print a1_file.protein2, ' synonyms = ', q2_syns
    
    ID_sentence_position_list = Papers.main(query, articles)
    if len(ID_sentence_position_list) > 0:
        print str(len(ID_sentence_position_list)) + " sentences with co-occurrence found"
    
    sentences_with_score1 = Syntax_Scorer.main(ID_sentence_position_list, query, max_sentences) 
    sentences_with_score2 = Semantics_Scorer.main(sentences_with_score1, query)
    sorted_sentences_with_score2 = sorted(sentences_with_score2, key=operator.attrgetter('score'), reverse=True)
    if sorted_sentences_with_score2:
        with open (r'txt_files_Testing\calibration unlimited sentences','a') as f:
            f.write(query.q1+'\t'+query.q2+'\n')
            for sent in sorted_sentences_with_score2:
                sent_w_replaced_queries = Organize.insert_syns(sent.sentence,q1,q1_syns,q2,q2_syns)
                if str(sent.sentence)[0] != '<': 
                    f.write(str(sent.score) +' '+ str(sent.method_scored)+'\t'+ sent_w_replaced_queries + '\n')
                    print str(sent.score) +' '+ sent_w_replaced_queries
            f.write('\n') 
            
    print_output_to_file(sorted_sentences_with_score2, q1_syns, q1, q2_syns, q2, a1_file) 
    print ""
Пример #4
0
def main(q1, q2, articles, batch, input_type, outputFileName, dictType):
    num_articles = int(articles)
    query = queries.main(q1,q2)
    
    if batch == "yes":
        q1_id_list, q2_id_list = pmids.main(query, num_articles)
        q1_file_paths= run_tees_batch(q1, q1_id_list)
        q2_file_paths= run_tees_batch(q2, q2_id_list)
    
    if batch == "no":
        q1_id_list, q2_id_list = pmids.main(query, num_articles)  
        q1_file_paths= run_tees(q1, q1_id_list)
        q2_file_paths= run_tees(q2, q2_id_list)

    q1_dict = get_info_from_interaction_xml(q1_file_paths)
    q2_dict = get_info_from_interaction_xml(q2_file_paths)
    
    if dictType == 'all' or dictType == 'both':
        all_words_dict = get_all_words_dict(q1, q2, q1_dict, q2_dict)
        normalized_all_words_dict = normalize_dict(all_words_dict, query)
        angle_list_all = Cosine_Sim.main(normalized_all_words_dict, q1, q2)
        print_pair_score_dict(angle_list_all, normalized_all_words_dict, q1, q2, input_type, outputFileName)        
    
    if dictType == 'protein' or dictType == 'both':
        query_dicts = [q1_dict, q2_dict]
        combined_dict = combine_dictionaries(query_dicts)
        normalized_protein_dict = normalize_dict(combined_dict, query)
        angle_list_protein = Cosine_Sim.main(normalized_protein_dict, q1, q2)
        print_pair_score_dict(angle_list_protein, normalized_protein_dict, q1, q2, input_type, outputFileName)
Пример #5
0
def get_all_predicted_relations_dict(hprd50_paper_dict, max_sentences):
    all_predicted_relations_dict = {}
    for key in hprd50_paper_dict:
        found_relation_score_tuples_list = []
        for relation in hprd50_paper_dict[key].possible_relations_Tnumber:
            q1 = relation[0]
            q2 = relation[1]
            found_relation_score_tuple = ((q1, q2), 0)
            if q1 == q2:
                continue
            query = queries.main(q1, q2)
            ID_sentence_list = []
            ID_sentence_list = find_cooccurrence_sents(hprd50_paper_dict[key],
                                                       query)
            print 'ID_sentence_list: ', ID_sentence_list
            if not ID_sentence_list:
                found_relation_score_tuple = ((q1, q2), 0)
                print found_relation_score_tuple
                found_relation_score_tuples_list.append(
                    found_relation_score_tuple)
            if ID_sentence_list:
                sentences_with_score1 = Score1.rank_sentences(
                    ID_sentence_list, query, max_sentences)
                sentences_with_score2 = Score2.main(sentences_with_score1,
                                                    query)
                sorted_sentences_with_score2 = list(
                    sorted(sentences_with_score2,
                           key=operator.attrgetter('score'),
                           reverse=True))
                #------------------------------ if sorted_sentences_with_score2:
                #----------------- if len(sorted_sentences_with_score2) > 1:
                #-------------------------------------------------- pass
                #----------------------------------------------------- else:
                for sentence_object in sorted_sentences_with_score2:
                    found_relation_score_tuple = (
                        (q1, q2), sentence_object.score,
                        sentence_object.order_in_abstract)
                    print found_relation_score_tuple
                    found_relation_score_tuples_list.append(
                        found_relation_score_tuple)
        all_predicted_relations_dict[key] = found_relation_score_tuples_list
    return all_predicted_relations_dict
Пример #6
0
def index(a1_file, articles, max_sentences):

    global a_file
    a_file = a1_file

    q1 = a1_file.protein1
    q2 = a1_file.protein2
    query = queries.main(q1, q2)  # Creates Queries
    q1_syns = query.q1_syns  # Retrieves Q1 and Q2 synonyms
    q2_syns = query.q2_syns
    print a1_file.protein1, ' synonyms = ', q1_syns
    print a1_file.protein2, ' synonyms = ', q2_syns

    ID_sentence_position_list = Papers.main(query, articles)
    if len(ID_sentence_position_list) > 0:
        print str(len(
            ID_sentence_position_list)) + " sentences with co-occurrence found"

    sentences_with_score1 = Syntax_Scorer.main(ID_sentence_position_list,
                                               query, max_sentences)
    sentences_with_score2 = Semantics_Scorer.main(sentences_with_score1, query)
    sorted_sentences_with_score2 = sorted(sentences_with_score2,
                                          key=operator.attrgetter('score'),
                                          reverse=True)
    if sorted_sentences_with_score2:
        with open(r'txt_files_Testing\calibration unlimited sentences',
                  'a') as f:
            f.write(query.q1 + '\t' + query.q2 + '\n')
            for sent in sorted_sentences_with_score2:
                sent_w_replaced_queries = Organize.insert_syns(
                    sent.sentence, q1, q1_syns, q2, q2_syns)
                if str(sent.sentence)[0] != '<':
                    f.write(
                        str(sent.score) + ' ' + str(sent.method_scored) +
                        '\t' + sent_w_replaced_queries + '\n')
                    print str(sent.score) + ' ' + sent_w_replaced_queries
            f.write('\n')

    print_output_to_file(sorted_sentences_with_score2, q1_syns, q1, q2_syns,
                         q2, a1_file)
    print ""
Пример #7
0
def main(q1, q2, articles, batch, input_type, outputFileName, dictType,
         outputType, evaluation_mode, stemmed, model, text_file):
    models = model.split(' ')
    num_articles = int(articles)
    query = queries.main(q1, q2)
    q1_dict = {}
    q2_dict = {}

    q1_already_downloaded_ids = get_already_downloaded_ids(q1, models)
    q2_already_downloaded_ids = get_already_downloaded_ids(q2, models)
    q1_already_downloaded_file_path_list = get_already_downloaded_file_paths(
        q1, models, num_articles)
    q2_already_downloaded_file_path_list = get_already_downloaded_file_paths(
        q2, models, num_articles)

    q1_already_dl_slice = None
    q2_already_dl_slice = None
    q1_file_paths = None
    q2_file_paths = None

    #     if num_articles <= len(q1_already_downloaded_file_path_list):
    #         q1_already_dl_slice = q1_already_downloaded_file_path_list[:num_articles]
    #         q1_dict = get_info_from_interaction_xml(q1_already_dl_slice)
    #    else:

    if num_articles * 100 <= len(q1_already_downloaded_file_path_list):
        q1_already_dl_slice = q1_already_downloaded_file_path_list[:
                                                                   num_articles]
        q1_dict = get_info_from_interaction_xml(q1_already_dl_slice)
    else:
        q1_id_list = pmids.main(query.q1, num_articles, query.q1_search_string,
                                evaluation_mode)
        if len(q1_id_list) == len(q1_already_downloaded_file_path_list):
            q1_dict = get_info_from_interaction_xml(
                q1_already_downloaded_file_path_list)
        else:
            if batch == 'yes':
                q1_file_paths = run_tees_batch(q1, q1_id_list, models,
                                               text_file)
            elif batch == 'no':
                q1_file_paths = run_tees(q1, q1_id_list, models, text_file)
            if not q1_file_paths:
                q1_file_paths = q1_already_downloaded_file_path_list[:
                                                                     num_articles]
            q1_dict = get_info_from_interaction_xml(q1_file_paths)

    if num_articles * 100 <= len(q2_already_downloaded_file_path_list):
        q2_already_dl_slice = q2_already_downloaded_file_path_list[:
                                                                   num_articles]
        q2_dict = get_info_from_interaction_xml(q2_already_dl_slice)
    else:
        q2_id_list = pmids.main(query.q2, num_articles, query.q2_search_string,
                                evaluation_mode)
        if len(q2_id_list) == len(q2_already_downloaded_file_path_list):
            q2_dict = get_info_from_interaction_xml(
                q2_already_downloaded_file_path_list)
        else:
            if batch == 'yes':
                q2_file_paths = run_tees_batch(q2, q2_id_list, models,
                                               text_file)
            elif batch == 'no':
                q2_file_paths = run_tees(q2, q2_id_list, models, text_file)
            if not q2_file_paths:
                q2_file_paths = q2_already_downloaded_file_path_list[:
                                                                     num_articles]
            q2_dict = get_info_from_interaction_xml(q2_file_paths)

    if q1_already_dl_slice:
        q1_num_docs_processed = len(q1_already_dl_slice)
    elif q1_file_paths:
        q1_num_docs_processed = len(q1_file_paths)
    else:
        q1_num_docs_processed = len(q1_already_downloaded_file_path_list)

    if q2_already_dl_slice:
        q2_num_docs_processed = len(q2_already_dl_slice)
    elif q2_file_paths:
        q2_num_docs_processed = len(q2_file_paths)
    else:
        q2_num_docs_processed = len(q2_already_downloaded_file_path_list)

    print q1, 'num_docs_processed', q1_num_docs_processed
    print q2, 'num_docs_processed', q2_num_docs_processed
    num_docs_processed = [q1_num_docs_processed, q2_num_docs_processed]

    return_dict_s = []
    if dictType == 'all':
        all_words_dict = get_all_words_dict(q1, q2, q1_dict, q2_dict)
        normalized_all_words_dict = normalize_dict(all_words_dict, query,
                                                   stemmed)
        return_dict_s.append(normalized_all_words_dict)
        if len(normalized_all_words_dict[query.q1.lower()]) < 1 or len(
                normalized_all_words_dict[query.q2.lower()]) < 1:
            angle_list = [90.00]
        else:
            angle_list = Cosine_Sim.main(normalized_all_words_dict, q1, q2)

    if dictType == 'protein':
        query_dicts = [q1_dict, q2_dict]
        combined_dict = combine_dictionaries(query_dicts)
        normalized_protein_dict = normalize_dict(combined_dict, query, stemmed)
        return_dict_s.append(normalized_protein_dict)
        if len(normalized_protein_dict[query.q1.lower()]) < 1 or len(
                normalized_protein_dict[query.q2.lower()]) < 1:
            angle_list = [90.00]
        else:
            angle_list = Cosine_Sim.main(normalized_protein_dict, q1, q2)

    return angle_list, return_dict_s, num_docs_processed
Пример #8
0
    def get_results_pack(self, pack, dbvar, event_string, ignore_numbers):
        self.log("Start results_pack generation.")

        need_mirror = False
        compare_days_str = pack["result_options"]["compare_to_days"]
        try:
            compare_int = int(compare_days_str)
        except ValueError:
            self.bug(
                "Cannot convert compare_to_days value: {} into int.".format(
                    compare_days_str))
        else:
            if compare_int > 0:
                need_mirror = True

        agg_len = pack["result_options"]["aggregation_period"]
        agg_type = pack["result_options"]["aggregation_type"]

        # COLLECT PRIME DATA
        date_list = []
        m_datelist = []
        list_of_plot_tuples = []
        date_list, result_dict = queries.main(pack,
                                              dbvar,
                                              ignore_numbers=ignore_numbers)

        date_list = [chunk[0] for chunk in self.get_chunks(date_list, agg_len)]
        list_of_plot_tuples = self.generate_list_of_plot_tuples(
            result_dict, list_of_plot_tuples, agg_len, agg_type)

        if pack["result_options"]["use_new_breakdowns"]:
            breakdown_keys = None
        else:
            breakdown_keys = list(result_dict["lines"].keys())

        # COLLECT MIRROR DATA IF NEEDED
        if need_mirror:
            pack["data_filters"]["start_datetime"] = (
                pack["data_filters"]["start_datetime"] -
                datetime.timedelta(compare_int))
            pack["data_filters"]["end_datetime"] = (
                pack["data_filters"]["end_datetime"] -
                datetime.timedelta(compare_int))
            m_datelist, m_resultdict = queries.main(
                pack,
                dbvar,
                breakdown_keys=breakdown_keys,
                ignore_numbers=ignore_numbers)

            m_datelist = [
                chunk[0] for chunk in self.get_chunks(m_datelist, agg_len)
            ]
            list_of_plot_tuples = self.generate_list_of_plot_tuples(
                m_resultdict,
                list_of_plot_tuples,
                agg_len,
                agg_type,
                is_mirror=True)

        self.log("Completed request_pack generation.")
        #        print(list_of_plot_tuples)
        return date_list, m_datelist, list_of_plot_tuples
Пример #9
0
def main(q1, q2, articles, batch, input_type, outputFileName, dictType, outputType, evaluation_mode, stemmed, model, text_file):
    models = model.split(' ')
    num_articles = int(articles)
    query = queries.main(q1,q2)
    q1_dict = {}
    q2_dict = {}

    q1_already_downloaded_ids = get_already_downloaded_ids(q1, models)
    q2_already_downloaded_ids = get_already_downloaded_ids(q2, models)
    q1_already_downloaded_file_path_list = get_already_downloaded_file_paths(q1, models, num_articles)
    q2_already_downloaded_file_path_list = get_already_downloaded_file_paths(q2, models, num_articles)
    
    q1_already_dl_slice = None
    q2_already_dl_slice = None
    q1_file_paths = None
    q2_file_paths = None 
    
    
#     if num_articles <= len(q1_already_downloaded_file_path_list):
#         q1_already_dl_slice = q1_already_downloaded_file_path_list[:num_articles]
#         q1_dict = get_info_from_interaction_xml(q1_already_dl_slice)
#    else:

    if num_articles * 100 <= len(q1_already_downloaded_file_path_list):
        q1_already_dl_slice = q1_already_downloaded_file_path_list[:num_articles]
        q1_dict = get_info_from_interaction_xml(q1_already_dl_slice)
    else:
        q1_id_list = pmids.main(query.q1, num_articles, query.q1_search_string, evaluation_mode)
        if len(q1_id_list) == len(q1_already_downloaded_file_path_list):
            q1_dict = get_info_from_interaction_xml(q1_already_downloaded_file_path_list)
        else:
            if batch == 'yes':
                q1_file_paths = run_tees_batch(q1, q1_id_list, models, text_file)
            elif batch == 'no':
                q1_file_paths = run_tees(q1, q1_id_list, models, text_file)
            if not q1_file_paths:
                q1_file_paths = q1_already_downloaded_file_path_list[:num_articles]
            q1_dict = get_info_from_interaction_xml(q1_file_paths)
    
    if num_articles * 100 <= len(q2_already_downloaded_file_path_list):
        q2_already_dl_slice = q2_already_downloaded_file_path_list[:num_articles]
        q2_dict = get_info_from_interaction_xml(q2_already_dl_slice)
    else:
        q2_id_list = pmids.main(query.q2, num_articles, query.q2_search_string, evaluation_mode)
        if len(q2_id_list) == len(q2_already_downloaded_file_path_list):
            q2_dict = get_info_from_interaction_xml(q2_already_downloaded_file_path_list)
        else:
            if batch == 'yes':
                q2_file_paths= run_tees_batch(q2, q2_id_list, models, text_file)
            elif batch == 'no':
                q2_file_paths= run_tees(q2, q2_id_list, models, text_file)
            if not q2_file_paths:
                q2_file_paths = q2_already_downloaded_file_path_list[:num_articles]
            q2_dict = get_info_from_interaction_xml(q2_file_paths)


    if q1_already_dl_slice:
        q1_num_docs_processed = len(q1_already_dl_slice)
    elif q1_file_paths:
        q1_num_docs_processed = len(q1_file_paths)
    else:
        q1_num_docs_processed = len(q1_already_downloaded_file_path_list)
        
    if q2_already_dl_slice:
        q2_num_docs_processed = len(q2_already_dl_slice)
    elif q2_file_paths:
        q2_num_docs_processed = len(q2_file_paths)
    else:
        q2_num_docs_processed = len(q2_already_downloaded_file_path_list)
        
    print q1, 'num_docs_processed', q1_num_docs_processed
    print q2, 'num_docs_processed', q2_num_docs_processed
    num_docs_processed = [q1_num_docs_processed,q2_num_docs_processed]
    
    return_dict_s = []
    if dictType == 'all':
        all_words_dict = get_all_words_dict(q1, q2, q1_dict, q2_dict)
        normalized_all_words_dict = normalize_dict(all_words_dict, query, stemmed)
        return_dict_s.append(normalized_all_words_dict)
        if len(normalized_all_words_dict[query.q1.lower()]) < 1 or len(normalized_all_words_dict[query.q2.lower()]) < 1:
            angle_list = [90.00]
        else:
            angle_list = Cosine_Sim.main(normalized_all_words_dict, q1, q2)
        
    if dictType == 'protein':
        query_dicts = [q1_dict, q2_dict]
        combined_dict = combine_dictionaries(query_dicts)
        normalized_protein_dict = normalize_dict(combined_dict, query, stemmed)
        return_dict_s.append(normalized_protein_dict)
        if len(normalized_protein_dict[query.q1.lower()]) < 1 or len(normalized_protein_dict[query.q2.lower()]) < 1:
            angle_list = [90.00]
        else:
            angle_list = Cosine_Sim.main(normalized_protein_dict, q1, q2)


    return angle_list, return_dict_s, num_docs_processed
Пример #10
0
 def send_query(self):
     queries.main(arg[1:])
     return True