def expansion_cand_search(valid_windows, expansion_threshold): shape_list = [] dict_cand_windows = {} for window in valid_windows: shape = '' for occurrence in window: if (occurrence[2] not in ['t', 'v']): shape += occurrence[2] + ' ' if (occurrence[2] == 't'): shape += occurrence[1] + ' ' shape.strip() dict_cand_windows.setdefault(shape, []).append(window) print( '\n\n\n################# RECHERCHE EXPANSIONS : DEBUT RECHERCHE CANDIDATS #################\n' ) # TODO Note : L'étape suivante est TRES longue dict_cand_windows_norm = ana_useful.merge_egal_sple_dictkeys( dict_cand_windows) print( '\n\n\n################# RECHERCHE EXPANSIONS : FIN RECHERCHE CANDIDATS #################\n' ) # Vérification du dépassement de seuil, expansion est une "expansion potentielle" avant d'être validée et insérée dans le final_dict final_dict = {} for expansion in dict_cand_windows_norm: if ((len(dict_cand_windows_norm[expansion])) >= expansion_threshold): final_dict[expansion] = dict_cand_windows_norm[expansion] return final_dict
def dict_found_words(valid_windows): dict_aword = {} # On ne peut pas modifier au fil de l'eau un dict sur lequel on itère # Donc on construit d'abord un dict avec tous les mots t # Peu importe s'ils sont égaux à l'égalité souple près for window in valid_windows: for occurrence in window: #a priori il n'y a qu'un seul t dans chaque fenetre' if occurrence[2] == 't': dict_aword.setdefault(occurrence[1], []).append(window) final_dict = ana_useful.merge_egal_sple_dictkeys(dict_aword) return final_dict
def dict_found_words(valid_windows): dict_aword = {} # On ne peut pas modifier au fil de l'eau un dict sur lequel on itère # Donc on construit d'abord un dict avec tous les mots t # Peu importe s'ils sont égaux à l'égalité souple près for window in valid_windows: for occurrence in window: #a priori il n'y a qu'un seul t dans chaque fenetre' if occurrence[2] == 't': dict_aword.setdefault(occurrence[1],[]).append(window) final_dict = ana_useful.merge_egal_sple_dictkeys(dict_aword) return final_dict
def not_expa_inside_expre(windows): dict_awords_shape_seen = {} valid_windows_t3 = [] awords_shapes_list = [] for window in windows: aword_shape = ana_useful.aword_shape(window) dict_awords_shape_seen.setdefault(aword_shape, []).append(window) # the strict eguality (on the aword) is ok. But remains the eglité souple. dict_awords_shape = ana_useful.merge_egal_sple_dictkeys(dict_awords_shape_seen) if dict_awords_shape != {}: for aword_shape, windows in dict_awords_shape.items(): if (0 < len(windows) < 3): valid_windows_t3.extend(windows) return valid_windows_t3
def not_expa_inside_expre(windows): dict_awords_shape_seen = {} valid_windows_t3 = [] awords_shapes_list = [] for window in windows: aword_shape = ana_useful.aword_shape(window) dict_awords_shape_seen.setdefault(aword_shape, []).append( window ) # the strict eguality (on the aword) is ok. But remains the eglité souple. dict_awords_shape = ana_useful.merge_egal_sple_dictkeys( dict_awords_shape_seen) if dict_awords_shape != {}: for aword_shape, windows in dict_awords_shape.items(): if (0 < len(windows) < 3): valid_windows_t3.extend(windows) return valid_windows_t3
def expansion_cand_search(valid_windows, expansion_threshold): shape_list = [] dict_cand_windows = {} for window in valid_windows: shape = '' for occurrence in window: if (occurrence[2] not in ['t','v']): shape += occurrence[2] + ' ' if (occurrence[2] == 't'): shape += occurrence[1] + ' ' shape.strip() dict_cand_windows.setdefault(shape,[]).append(window) dict_cand_windows_norm = ana_useful.merge_egal_sple_dictkeys(dict_cand_windows) # Vérification du dépassement de seuil, expansion est une "expansion potentielle" avant d'être validée et insérée dans le final_dict final_dict = {} for expansion in dict_cand_windows_norm: if ( (len(dict_cand_windows_norm[expansion])) >= expansion_threshold ): final_dict[expansion] = dict_cand_windows_norm[expansion] return final_dict
def expansion_cand_search(valid_windows, expansion_threshold): shape_list = [] dict_cand_windows = {} for window in valid_windows: shape = '' for occurrence in window: if (occurrence[2] not in ['t', 'v']): shape += occurrence[2] + ' ' if (occurrence[2] == 't'): shape += occurrence[1] + ' ' shape.strip() dict_cand_windows.setdefault(shape, []).append(window) dict_cand_windows_norm = ana_useful.merge_egal_sple_dictkeys( dict_cand_windows) # Vérification du dépassement de seuil, expansion est une "expansion potentielle" avant d'être validée et insérée dans le final_dict final_dict = {} for expansion in dict_cand_windows_norm: if ((len(dict_cand_windows_norm[expansion])) >= expansion_threshold): final_dict[expansion] = dict_cand_windows_norm[expansion] return final_dict
def expansion_cand_search(valid_windows, expansion_threshold): shape_list = [] dict_cand_windows = {} for window in valid_windows: shape = '' for occurrence in window: if (occurrence[2] not in ['t','v']): shape += occurrence[2] + ' ' if (occurrence[2] == 't'): shape += occurrence[1] + ' ' shape.strip() dict_cand_windows.setdefault(shape,[]).append(window) print('\n\n\n################# RECHERCHE EXPANSIONS : DEBUT RECHERCHE CANDIDATS #################\n') # TODO Note : L'étape suivante est TRES longue dict_cand_windows_norm = ana_useful.merge_egal_sple_dictkeys(dict_cand_windows) print('\n\n\n################# RECHERCHE EXPANSIONS : FIN RECHERCHE CANDIDATS #################\n') # Vérification du dépassement de seuil, expansion est une "expansion potentielle" avant d'être validée et insérée dans le final_dict final_dict = {} for expansion in dict_cand_windows_norm: if ( (len(dict_cand_windows_norm[expansion])) >= expansion_threshold ): final_dict[expansion] = dict_cand_windows_norm[expansion] return final_dict