Пример #1
0
def nucleus_search(dict_occ_ref, candidates, nucleus_threshold, log_file_path):
    dict_nucleus = {}
    windows = ana_useful.define_windows(dict_occ_ref, candidates, 3, 2)
    valid_windows = []
    for window in windows:
        valid_window = nucleus_valid_window(window)
        if valid_window:
            valid_windows.append(valid_window)

        windowR = ana_useful.symmetric_window(window)
        valid_windowR = nucleus_valid_window(windowR)
        if valid_windowR:
            valid_window = ana_useful.symmetric_window(valid_windowR)
            valid_windows.append(valid_window)

    dict_aword = dict_found_words(valid_windows)
    dict_occ_cand = nucleus_find_cand(dict_aword, nucleus_threshold)

    if dict_occ_cand != {}:
        for shortshape, occ_cand_list in dict_occ_cand.items():
            new_cand, occ_count = ana_useful.new_cand_nucleus(occ_cand_list)
            dict_nucleus.setdefault(new_cand, []).append(occ_cand_list)

            ana_useful.write_log(
                log_file_path,
                'NOYAU TROUVE ' + str(new_cand) + ' ' + str(occ_count))
            # TODO retrouver les fenetres valides qui ont permis de créer le noyau
            ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES')
            for occ_cand in occ_cand_list:
                ana_useful.write_log(log_file_path, '   ' + str(occ_cand))
    return dict_nucleus
Пример #2
0
def expression_search(dict_occ_ref, candidates, expression_threshold,
                      log_file_path):
    dict_expre = {}
    for candidate in candidates:
        candidate = [candidate]  # in order to use the define_windows
        windows = ana_useful.define_windows(
            dict_occ_ref, candidate, 3, 1
        )  #fenetre du type `CAND1 + (cand ou mot quelconque) + (cand ou mot quelconque)`. Les mots stop ("v") ne sont pas représentés
        valid_windows = []
        windows_cand_list = []

        valid_windows = expression_valid_windows(windows, candidate[0])

        if valid_windows != []:
            dict_cand_windows = expression_find_cand(valid_windows,
                                                     expression_threshold)

            if dict_cand_windows != {}:
                for shortshape, windows_cand_list in dict_cand_windows.items():
                    new_cand, occ_count = ana_useful.new_cand_expression(
                        windows_cand_list)
                    dict_expre[new_cand] = windows_cand_list
                    # dict_expre.setdefault(new_cand,[]).append(windows_cand_list)

                    ana_useful.write_log(
                        log_file_path, 'EXPRESSION TROUVEE ' + str(new_cand) +
                        ' ' + str(occ_count))
                    ana_useful.write_log(log_file_path,
                                         '   LISTE DES OCCURRENCES ')
                    for window_cand in windows_cand_list:
                        ana_useful.write_log(log_file_path,
                                             '   ' + str(window_cand))
    return dict_expre
Пример #3
0
def nucleus_search(dict_occ_ref, candidates, nucleus_threshold, log_file_path):
    dict_nucleus = {}
    windows = ana_useful.define_windows(dict_occ_ref, candidates, 3, 2)
    valid_windows = []
    for window in windows:
        valid_window = nucleus_valid_window(window)
        if valid_window:
            valid_windows.append(valid_window)

        windowR = ana_useful.symmetric_window(window)
        valid_windowR = nucleus_valid_window(windowR)
        if valid_windowR:
            valid_window = ana_useful.symmetric_window(valid_windowR)
            valid_windows.append(valid_window)

    dict_aword = dict_found_words(valid_windows)
    dict_occ_cand = nucleus_find_cand(dict_aword, nucleus_threshold)

    if dict_occ_cand != {}:
        for shortshape, occ_cand_list in dict_occ_cand.items():
            new_cand, occ_count = ana_useful.new_cand_nucleus(occ_cand_list)
            dict_nucleus.setdefault(new_cand,[]).append(occ_cand_list)

            ana_useful.write_log(log_file_path, 'NOYAU TROUVE ' + str(new_cand) + ' ' + str(occ_count))
            # TODO retrouver les fenetres valides qui ont permis de créer le noyau
            ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES')
            for occ_cand in occ_cand_list:
                ana_useful.write_log(log_file_path, '   ' + str(occ_cand))
    return dict_nucleus
Пример #4
0
def expansion_search(dict_occ_ref, candidates, expansion_threshold, log_file_path):
    dict_expa = {}
    windows = ana_useful.define_windows(dict_occ_ref,candidates,3,2)
    valid_windows = expansion_valid_window(windows)
    dict_cand_windows = expansion_cand_search(valid_windows, expansion_threshold)

    # Find the new cand and build a new dict and write in the log, what there is at this step.
    for shape in dict_cand_windows:
        new_cand,occ_count = ana_useful.new_cand(dict_cand_windows[shape])
        ana_useful.write_log(log_file_path, 'EXPANSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count))
        ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES ')
        for window_cand in dict_cand_windows[shape]:
            ana_useful.write_log(log_file_path, '   ' + str(window_cand))
        # dict_expa.setdefault(new_cand,[]).append(dict_cand_windows[shape])
        dict_expa[new_cand] = dict_cand_windows[shape]
    return dict_expa
Пример #5
0
def expansion_search(dict_occ_ref, candidates, expansion_threshold,
                     log_file_path):
    dict_expa = {}
    windows = ana_useful.define_windows(dict_occ_ref, candidates, 3, 2)
    valid_windows = expansion_valid_window(windows)
    dict_cand_windows = expansion_cand_search(valid_windows,
                                              expansion_threshold)

    # Find the new cand and build a new dict and write in the log, what there is at this step.
    for shape in dict_cand_windows:
        new_cand, occ_count = ana_useful.new_cand(dict_cand_windows[shape])
        ana_useful.write_log(
            log_file_path,
            'EXPANSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count))
        ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES ')
        for window_cand in dict_cand_windows[shape]:
            ana_useful.write_log(log_file_path, '   ' + str(window_cand))
        # dict_expa.setdefault(new_cand,[]).append(dict_cand_windows[shape])
        dict_expa[new_cand] = dict_cand_windows[shape]
    return dict_expa
Пример #6
0
def expression_search(dict_occ_ref, candidates, expression_threshold, log_file_path):
    dict_expre = {}
    for candidate in candidates:
        candidate = [candidate] # in order to use the define_windows
        windows = ana_useful.define_windows(dict_occ_ref, candidate, 3, 1) #fenetre du type `CAND1 + (cand ou mot quelconque) + (cand ou mot quelconque)`. Les mots stop ("v") ne sont pas représentés
        valid_windows = []
        windows_cand_list = []

        valid_windows = expression_valid_windows(windows, candidate[0])

        if valid_windows != []:
            dict_cand_windows = expression_find_cand(valid_windows, expression_threshold)

            if dict_cand_windows != {}:
                for shortshape, windows_cand_list in dict_cand_windows.items():
                    new_cand, occ_count = ana_useful.new_cand_expression(windows_cand_list)
                    dict_expre[new_cand] = windows_cand_list
                    # dict_expre.setdefault(new_cand,[]).append(windows_cand_list)

                    ana_useful.write_log(log_file_path, 'EXPRESSION TROUVEE ' + str(new_cand) + ' ' + str(occ_count))
                    ana_useful.write_log(log_file_path, '   LISTE DES OCCURRENCES ')
                    for window_cand in windows_cand_list:
                        ana_useful.write_log(log_file_path, '   ' + str(window_cand))
    return dict_expre
Пример #7
0
#SEUILS#################################################################
# nucleus_threshold = [3,5,5,10]
# nucleus_threshold = [2,4,4,6]
nucleus_threshold = config['nucleus_threshold']
expansion_threshold = int(config['expansion_threshold'])
expression_threshold = int(config['expression_threshold'])
recession_threshold = int(config['recession_threshold'])

#STEPS########################################################################
global_steps = int(config['global_steps'])
nucleus_steps = int(config['nucleus_nestedsteps'])
automaticsteps = config['automaticsteps']  # True ou False

with open(log_file_path, 'w', encoding='utf8') as logfile:
    ana_useful.write_log(log_file_path,
                         "########################################\n")
    ana_useful.write_log(log_file_path, "FICHIER LOG\n")
    ana_useful.write_log(log_file_path,
                         "ANALYSE DU FICHIER : " + txt_file_path + "\n")
    ana_useful.write_log(log_file_path, "BOOTSTRAP : " + str(cands) + "\n")
    ana_useful.write_log(
        log_file_path,
        "\n\nTraitement démarré le " + time.strftime("%c") + " \n")
    ana_useful.write_log(log_file_path,
                         "########################################\n")

stop = False
nb_passe = 0

while not stop:
    nb_passe += 1
Пример #8
0
#SEUILS#################################################################
# nucleus_threshold = [3,5,5,10]
# nucleus_threshold = [2,4,4,6]
nucleus_threshold = config['nucleus_threshold']
expansion_threshold = int(config['expansion_threshold'])
expression_threshold = int(config['expression_threshold'])
recession_threshold = int(config['recession_threshold'])

#STEPS########################################################################
global_steps = int(config['global_steps'])
nucleus_steps = int(config['nucleus_nestedsteps'])
automaticsteps = config['automaticsteps'] # True ou False

with open(log_file_path, 'w', encoding = 'utf8') as logfile:
    ana_useful.write_log(log_file_path,"########################################\n")
    ana_useful.write_log(log_file_path,"FICHIER LOG\n")
    ana_useful.write_log(log_file_path,"ANALYSE DU FICHIER : " + txt_file_path + "\n")
    ana_useful.write_log(log_file_path,"BOOTSTRAP : " + str(cands) + "\n")
    ana_useful.write_log(log_file_path,"\n\nTraitement démarré le " + time.strftime("%c") + " \n")
    ana_useful.write_log(log_file_path,"########################################\n")

stop = False
nb_passe = 0

while not stop:
    nb_passe += 1
    global_steps -= 1
    dict_expa = {}
    dict_expre = {}
    print('\n\n\n################# step n°', str(nb_passe), '#################\n')