示例#1
0
def validate_with_more(model):
    file_list = ft.list_file_paths_under_dir(const.TEST_DIR, ['txt'])
    # file_list = [os.path.join(const.TEST_DIR, '34-71576.txt')]
    conf_dict = oneshot.base_conf_dict
    # example_file2 = const.VALIDATION_DIR + '/34-43389.txt'

    scores = []

    for validate_file in ft.list_file_paths_under_dir(const.VALIDATION_DIR, ['txt']):
        entity_dict = oneshot.get_entity_dict_from_file(validate_file)
        if type(entity_dict) is dict:
            if len(entity_dict.keys()) > 2:
                print(validate_file)
                scores.append(run_for_epochs(validate_file, file_list, model, config_dict=conf_dict, epochs=1))
    print(scores)
    print(sum(scores)/len(scores))
示例#2
0
 def _load_aaer_test_data(self, doc_length, one_to_n=False):
     # data only contains test files, to save computing & memory costs
     self.save_dir = const.GENERATED_DATA_DIR
     if one_to_n:
         self.dict_save_fname = os.path.join(
             self.save_dir, "%s%s_1_to_%d.%s" %
             (const.DL_DOC_DICT_PREFIX, self.__class__.__name__, doc_length,
              const.PICKLE_FILE_EXTENSION))
     else:
         self.dict_save_fname = os.path.join(
             self.save_dir, "%s%s_%d.%s" %
             (const.DL_DOC_DICT_PREFIX, self.__class__.__name__, doc_length,
              const.PICKLE_FILE_EXTENSION))
     try:
         logging.info("loading saved data from %s" % self.dict_save_fname)
         with open(self.dict_save_fname, 'rb') as f:
             self._docvec_dict = pickle.load(f)
     except FileNotFoundError:
         logging.info("%s not found. Start building..." %
                      self.dict_save_fname)
         test_files = ft.list_file_paths_under_dir(const.TEST_DIR, ['txt'])
         docs = []
         for test_file in test_files:
             if one_to_n:
                 docs += utils.flatten_list(
                     ex_parsing.one_to_n_grams_from_file(
                         ft.get_source_file_by_example_file(test_file),
                         n=doc_length))
             else:
                 docs += ex_parsing.ngrams_from_file(
                     ft.get_source_file_by_example_file(test_file),
                     n=doc_length)
         # print(docs[0])
         self._make_docvec_dict(docs)
示例#3
0
def dir_to_file_without_punctuations(dir_path,
                                     extension='txt',
                                     file_name=False):
    file_names = ft.list_file_paths_under_dir(dir_path, [extension])
    tokens = []
    for fname in file_names:
        temp_tokens, _ = parse_file(fname)
        tokens.extend(util.flatten_list(temp_tokens))

    if not file_name:
        file_name = '_'.join(dir_path.split('/')[-2:])
    with open(file_name, 'w') as f:
        print('saving to:', file_name)
        f.write(' '.join(tokens))
示例#4
0
def sentences_from_dir(dir_path):
    sentences = []
    for path in ft.list_file_paths_under_dir(dir_path, ["txt"]):
        sentences += sentences_from_file(path)
    return sentences
示例#5
0
def tokens_from_dir(dir_path):
    tokens = []
    for path in ft.list_file_paths_under_dir(dir_path, ["txt"]):
        tokens += tokens_from_file(path)
    return tokens
示例#6
0
 def tokens_from_aaer_corpus(self):
     sentences = word2vec.sentences_from_file_list(
         ft.list_file_paths_under_dir(self.corpus_dir, ['txt']))
     return sentences
示例#7
0
 def path_list_from_dir(self):
     return ft.list_file_paths_under_dir(self.corpus_dir,
                                         const.TEXT_EXTENSIONS)
示例#8
0
def grid_search(example_path, model_class, enable_saving=True, epochs=1):
    files = ft.list_file_paths_under_dir(const.VALIDATION_DIR, ['txt'])

    for conf in grid_conf_dict_generator():
        run_for_epochs(example_path, files, model_class, config_dict=conf,
                       enable_saving=enable_saving, epochs=epochs)
示例#9
0
def validate_with_more(model):
    file_list = ft.list_file_paths_under_dir(const.TEST_DIR, ['txt'])
    # file_list = [os.path.join(const.TEST_DIR, '34-71576.txt')]
    conf_dict = oneshot.base_conf_dict
    # example_file2 = const.VALIDATION_DIR + '/34-43389.txt'

    scores = []

    for validate_file in ft.list_file_paths_under_dir(const.VALIDATION_DIR, ['txt']):
        entity_dict = oneshot.get_entity_dict_from_file(validate_file)
        if type(entity_dict) is dict:
            if len(entity_dict.keys()) > 2:
                print(validate_file)
                scores.append(run_for_epochs(validate_file, file_list, model, config_dict=conf_dict, epochs=1))
    print(scores)
    print(sum(scores)/len(scores))


# models = [oneshot.OneShotTestWVSumWVPhraseBi]
# for m in models:
#     validate_with_more(m)
file_list = ft.list_file_paths_under_dir(const.TEST_DIR, ['txt'])
models = [oneshot.OneShotTestWVSumWVPhraseBi]
for m in models:
    run_for_epochs(const.EXAMPLE_FILE, file_list, m, config_dict=oneshot.base_conf_dict, epochs=1)

for conf in grid_conf_dict_generator():
    run_for_epochs(const.EXAMPLE_FILE, file_list, oneshot.OneShotTestWVSumWVPhraseBi, config_dict=conf,
                   enable_saving=False, epochs=1)