def nucleus_search(dict_occ_ref, candidates, nucleus_threshold, log_file_path): dict_nucleus = {} windows = ana_useful.define_windows(dict_occ_ref, candidates, 3, 2) valid_windows = [] for window in windows: valid_window = nucleus_valid_window(window) if valid_window: valid_windows.append(valid_window) windowR = ana_useful.symmetric_window(window) valid_windowR = nucleus_valid_window(windowR) if valid_windowR: valid_window = ana_useful.symmetric_window(valid_windowR) valid_windows.append(valid_window) dict_aword = dict_found_words(valid_windows) dict_occ_cand = nucleus_find_cand(dict_aword, nucleus_threshold) if dict_occ_cand != {}: for shortshape, occ_cand_list in dict_occ_cand.items(): new_cand, occ_count = ana_useful.new_cand_nucleus(occ_cand_list) dict_nucleus.setdefault(new_cand, []).append(occ_cand_list) ana_useful.write_log( log_file_path, 'NOYAU TROUVE ' + str(new_cand) + ' ' + str(occ_count)) # TODO retrouver les fenetres valides qui ont permis de créer le noyau ana_useful.write_log(log_file_path, ' LISTE DES OCCURRENCES') for occ_cand in occ_cand_list: ana_useful.write_log(log_file_path, ' ' + str(occ_cand)) return dict_nucleus
def expression_search(dict_occ_ref, candidates, expression_threshold, log_file_path): dict_expre = {} for candidate in candidates: candidate = [candidate] # in order to use the define_windows windows = ana_useful.define_windows( dict_occ_ref, candidate, 3, 1 ) #fenetre du type `CAND1 + (cand ou mot quelconque) + (cand ou mot quelconque)`. Les mots stop ("v") ne sont pas représentés valid_windows = [] windows_cand_list = [] valid_windows = expression_valid_windows(windows, candidate[0]) if valid_windows != []: dict_cand_windows = expression_find_cand(valid_windows, expression_threshold) if dict_cand_windows != {}: for shortshape, windows_cand_list in dict_cand_windows.items(): new_cand, occ_count = ana_useful.new_cand_expression( windows_cand_list) dict_expre[new_cand] = windows_cand_list # dict_expre.setdefault(new_cand,[]).append(windows_cand_list) ana_useful.write_log( log_file_path, 'EXPRESSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count)) ana_useful.write_log(log_file_path, ' LISTE DES OCCURRENCES ') for window_cand in windows_cand_list: ana_useful.write_log(log_file_path, ' ' + str(window_cand)) return dict_expre
def nucleus_search(dict_occ_ref, candidates, nucleus_threshold, log_file_path): dict_nucleus = {} windows = ana_useful.define_windows(dict_occ_ref, candidates, 3, 2) valid_windows = [] for window in windows: valid_window = nucleus_valid_window(window) if valid_window: valid_windows.append(valid_window) windowR = ana_useful.symmetric_window(window) valid_windowR = nucleus_valid_window(windowR) if valid_windowR: valid_window = ana_useful.symmetric_window(valid_windowR) valid_windows.append(valid_window) dict_aword = dict_found_words(valid_windows) dict_occ_cand = nucleus_find_cand(dict_aword, nucleus_threshold) if dict_occ_cand != {}: for shortshape, occ_cand_list in dict_occ_cand.items(): new_cand, occ_count = ana_useful.new_cand_nucleus(occ_cand_list) dict_nucleus.setdefault(new_cand,[]).append(occ_cand_list) ana_useful.write_log(log_file_path, 'NOYAU TROUVE ' + str(new_cand) + ' ' + str(occ_count)) # TODO retrouver les fenetres valides qui ont permis de créer le noyau ana_useful.write_log(log_file_path, ' LISTE DES OCCURRENCES') for occ_cand in occ_cand_list: ana_useful.write_log(log_file_path, ' ' + str(occ_cand)) return dict_nucleus
def expansion_search(dict_occ_ref, candidates, expansion_threshold, log_file_path): dict_expa = {} windows = ana_useful.define_windows(dict_occ_ref,candidates,3,2) valid_windows = expansion_valid_window(windows) dict_cand_windows = expansion_cand_search(valid_windows, expansion_threshold) # Find the new cand and build a new dict and write in the log, what there is at this step. for shape in dict_cand_windows: new_cand,occ_count = ana_useful.new_cand(dict_cand_windows[shape]) ana_useful.write_log(log_file_path, 'EXPANSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count)) ana_useful.write_log(log_file_path, ' LISTE DES OCCURRENCES ') for window_cand in dict_cand_windows[shape]: ana_useful.write_log(log_file_path, ' ' + str(window_cand)) # dict_expa.setdefault(new_cand,[]).append(dict_cand_windows[shape]) dict_expa[new_cand] = dict_cand_windows[shape] return dict_expa
def expansion_search(dict_occ_ref, candidates, expansion_threshold, log_file_path): dict_expa = {} windows = ana_useful.define_windows(dict_occ_ref, candidates, 3, 2) valid_windows = expansion_valid_window(windows) dict_cand_windows = expansion_cand_search(valid_windows, expansion_threshold) # Find the new cand and build a new dict and write in the log, what there is at this step. for shape in dict_cand_windows: new_cand, occ_count = ana_useful.new_cand(dict_cand_windows[shape]) ana_useful.write_log( log_file_path, 'EXPANSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count)) ana_useful.write_log(log_file_path, ' LISTE DES OCCURRENCES ') for window_cand in dict_cand_windows[shape]: ana_useful.write_log(log_file_path, ' ' + str(window_cand)) # dict_expa.setdefault(new_cand,[]).append(dict_cand_windows[shape]) dict_expa[new_cand] = dict_cand_windows[shape] return dict_expa
def expression_search(dict_occ_ref, candidates, expression_threshold, log_file_path): dict_expre = {} for candidate in candidates: candidate = [candidate] # in order to use the define_windows windows = ana_useful.define_windows(dict_occ_ref, candidate, 3, 1) #fenetre du type `CAND1 + (cand ou mot quelconque) + (cand ou mot quelconque)`. Les mots stop ("v") ne sont pas représentés valid_windows = [] windows_cand_list = [] valid_windows = expression_valid_windows(windows, candidate[0]) if valid_windows != []: dict_cand_windows = expression_find_cand(valid_windows, expression_threshold) if dict_cand_windows != {}: for shortshape, windows_cand_list in dict_cand_windows.items(): new_cand, occ_count = ana_useful.new_cand_expression(windows_cand_list) dict_expre[new_cand] = windows_cand_list # dict_expre.setdefault(new_cand,[]).append(windows_cand_list) ana_useful.write_log(log_file_path, 'EXPRESSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count)) ana_useful.write_log(log_file_path, ' LISTE DES OCCURRENCES ') for window_cand in windows_cand_list: ana_useful.write_log(log_file_path, ' ' + str(window_cand)) return dict_expre
#SEUILS################################################################# # nucleus_threshold = [3,5,5,10] # nucleus_threshold = [2,4,4,6] nucleus_threshold = config['nucleus_threshold'] expansion_threshold = int(config['expansion_threshold']) expression_threshold = int(config['expression_threshold']) recession_threshold = int(config['recession_threshold']) #STEPS######################################################################## global_steps = int(config['global_steps']) nucleus_steps = int(config['nucleus_nestedsteps']) automaticsteps = config['automaticsteps'] # True ou False with open(log_file_path, 'w', encoding='utf8') as logfile: ana_useful.write_log(log_file_path, "########################################\n") ana_useful.write_log(log_file_path, "FICHIER LOG\n") ana_useful.write_log(log_file_path, "ANALYSE DU FICHIER : " + txt_file_path + "\n") ana_useful.write_log(log_file_path, "BOOTSTRAP : " + str(cands) + "\n") ana_useful.write_log( log_file_path, "\n\nTraitement démarré le " + time.strftime("%c") + " \n") ana_useful.write_log(log_file_path, "########################################\n") stop = False nb_passe = 0 while not stop: nb_passe += 1
#SEUILS################################################################# # nucleus_threshold = [3,5,5,10] # nucleus_threshold = [2,4,4,6] nucleus_threshold = config['nucleus_threshold'] expansion_threshold = int(config['expansion_threshold']) expression_threshold = int(config['expression_threshold']) recession_threshold = int(config['recession_threshold']) #STEPS######################################################################## global_steps = int(config['global_steps']) nucleus_steps = int(config['nucleus_nestedsteps']) automaticsteps = config['automaticsteps'] # True ou False with open(log_file_path, 'w', encoding = 'utf8') as logfile: ana_useful.write_log(log_file_path,"########################################\n") ana_useful.write_log(log_file_path,"FICHIER LOG\n") ana_useful.write_log(log_file_path,"ANALYSE DU FICHIER : " + txt_file_path + "\n") ana_useful.write_log(log_file_path,"BOOTSTRAP : " + str(cands) + "\n") ana_useful.write_log(log_file_path,"\n\nTraitement démarré le " + time.strftime("%c") + " \n") ana_useful.write_log(log_file_path,"########################################\n") stop = False nb_passe = 0 while not stop: nb_passe += 1 global_steps -= 1 dict_expa = {} dict_expre = {} print('\n\n\n################# step n°', str(nb_passe), '#################\n')