def can_find_graph_with_query(self, query): pm = PathMover() pm.go_or_create_and_go_to(constants.GRAPH_DIR_NAME) pm.go_or_create_and_go_to(query) if os.path.exists(query + '_graph_zero.pkl'): pm.go_up() pm.go_up() return True pm.go_up() pm.go_up() return False
def can_find_page_with_query(self, query): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_PAGES_O_DIR_NAME) pm.go_or_create_and_go_to(query) if os.path.exists('%s_10.pkl' % query): pm.go_up() pm.go_up() return True pm.go_up() pm.go_up() return False
def load_answerer_with_query(self, query): pm = PathMover() pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME) pm.go_or_create_and_go_to(query) graph = self.load_file(query + '_answerer_first.pkl') pm.go_up() pm.go_up() return graph
def load_queries_with_original_query(self, original_query): pm = PathMover() pm.go_or_create_and_go_to(constants.QUERIES_DIR_NAME) filenames = os.listdir() queries = [] for filename in filenames: if filename == '.DS_Store': continue try: with open(filename, 'rb') as f: query = pickle.load(f) queries.append(query) except IsADirectoryError: pdb.set_trace() pm.go_up() return queries
def save_answerer_with_query(self, answerer, query): pm = PathMover() pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME) pm.go_or_create_and_go_to(query) with open('%s_answerer_zero.pkl' % query, 'wb') as f: pickle.dump(answerer, f) print('%s_answerer_zero.pklの保存完了!' % query) pm.go_up() pm.go_up()
def save_ads_with_query(self, ads, query): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_ADS_DIR_NAME) pm.go_or_create_and_go_to(query) for i, ad in enumerate(ads): with open('%s_%i.pkl' % (ad.title, i), 'wb') as f: pickle.dump(obj=ad, file=f) print('%sの保存完了' % ad.title) pm.go_up() pm.go_up()
def can_find_pages_with_query_dir(self, query, words): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME) pm.go_or_create_and_go_to(query) for word in words: if os.path.exists('%s %s' % (query, word)): # 拡張クエリのディレクトリ発見! pm.go_or_create_and_go_to(word) if os.path.exists('%s %s_1.pkl' % (query, word)): pm.go_up() pm.go_up() pm.go_up() print('すでにある') return True pm.go_up() pm.go_up() pm.go_up() print('ない1') return False pm.go_up() pm.go_up() print('ない2') return False pm.go_up() pm.go_up()
from pickle_file_loader_for_original import PickleFileLoaderForOriginal from task_graph_zero_answerer import TaskGraphZeroAnswerer from answer_printer import AnswererPrinter from path_mover import PathMover import constants import pdb if __name__ == "__main__": queries = constants.QUERIES_4 for query in queries: pfl = PickleFileLoaderForOriginal() g = pfl.load_graph_with_query(query) noun, cmp, verb = query.split(" ") query_task = "_".join([noun, cmp, verb]) pm = PathMover() print("zeroの結果です") answerer = TaskGraphZeroAnswerer(graph=g, query_task=query_task) print("zero_answererをinstance化しました") answerer.set_result_tasks() print("set_result_tasks") answerer.set_task_scores() answerer.remove_generalized_tasks() print("set_task_scores") answerer.set_united_results() simple_results = [] for united_result in answerer.united_results: tasks = united_result[0][0] result_tasks = []
def save_pages_with_query(self, pages_dict, original_query): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME) pm.go_or_create_and_go_to(original_query) for expanded_query in pages_dict: pm.go_or_create_and_go_to(expanded_query) for i in range(constants.NUM_OF_FETCHED_PAGES): with open('%s_%i.pkl' % (expanded_query, i), 'wb') as f: try: pickle.dump(pages_dict[expanded_query][i], f) print('%s_%i.pklの保存完了!' % (expanded_query, i)) except (TypeError, IndexError): print('%sは%i個までしかありません!' % (expanded_query, i)) break pm.go_up() pm.go_up() pm.go_up()
first_answerer.remove_generalized_tasks() first_answerer.set_united_results() simple_results = [] for united_result in first_answerer.united_results: tasks = united_result[0][0] result_tasks = [] for task in tasks: aspects = first_answerer.graph.node[task]['aspects'] task_noun = task.split('_')[0] task_verb = task.split('_')[2] if len(aspects) > 2: if not noun in task_noun: if not verb in task_noun: if not verb in task_verb: if not noun in task_verb: if not task_noun in verb: result_tasks.append(task) if not result_tasks in simple_results: if result_tasks: simple_results.append(result_tasks) first_answerer.simple_results = simple_results printer = AnswererPrinter(answerer=first_answerer, query=query) pm = PathMover() pm.go_or_create_and_go_to('results') pm.go_or_create_and_go_to(query) printer.output(method_name='first') pm.go_up() pm.go_up()
# -*- coding: utf-8 -*- import constants from pickle_file_loader_for_ex import PickleFileLoaderForExpandedQuery from pickle_file_saver_for_ex import PickleFileSaverForEx from path_mover import PathMover import pdb import os import constants if __name__ == '__main__': pfl = PickleFileLoaderForExpandedQuery() pfs = PickleFileSaverForEx() pm = PathMover() original_queries = [ '野球 が 上手くなる', 'ビリヤード が 上手くなる', 'サッカー が 上手くなる', 'ハンドボール が 上手くなる' ] pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME) for original_query in original_queries: pm.go_or_create_and_go_to(original_query) expanded_queries = os.listdir() for expanded_query in expanded_queries: if expanded_query == '.DS_Store': continue pm.go_or_create_and_go_to(expanded_query) filenames = os.listdir() for i, filename in enumerate(filenames): if filename == '.DS_Store': continue try:
# -*- coding: utf-8 -*- import constants from pickle_file_loader_for_original import PickleFileLoaderForOriginal from pickle_file_saver_for_original import PickleFileSaverForOriginal from path_mover import PathMover from page_data_inserter import PageDataInserter import pdb if __name__ == '__main__': queries = constants.QUERIES_4 pfl = PickleFileLoaderForOriginal() saver = PickleFileSaverForOriginal() pm = PathMover() di = PageDataInserter() for i, query in enumerate(queries): pages = pfl.load_fetched_pages_with_query(query) pm.go_or_create_and_go_to(constants.FETCHED_PAGES_O_DIR_NAME) pm.go_or_create_and_go_to(query) for i, page in enumerate(pages): if '.pdf' in page.url: continue if di.has_body(page.query, page.url): print(str(i)) continue try: print('%i番目の%sのページをフェッチします' % (i, query)) page.fetch_html() print('%sのフェッチ完了!' % page.title) page.set_text_from_html_body() #page.set_sentences_from_text() #filename = '%s_%i.pkl' % (query, i)
from pickle_file_loader_for_original import PickleFileLoaderForOriginal from task_graph_zero_answerer import TaskGraphZeroAnswerer from answer_printer import AnswererPrinter from path_mover import PathMover import constants import pdb if __name__ == '__main__': queries = constants.QUERIES_4 for query in queries: pfl = PickleFileLoaderForOriginal() g = pfl.load_graph_with_query(query) noun, cmp, verb = query.split(' ') query_task = '_'.join([noun, cmp, verb]) pm = PathMover() print('zeroの結果です') answerer = TaskGraphZeroAnswerer(graph=g, query_task=query_task) print('zero_answererをinstance化しました') answerer.set_result_tasks() print('set_result_tasks') answerer.set_task_scores() answerer.remove_generalized_tasks() print('set_task_scores') answerer.set_united_results() simple_results = [] for united_result in answerer.united_results: tasks = united_result[0][0] result_tasks = []
# -*- coding: utf-8 -*- import constants from pickle_file_loader import PickleFileLoader from pickle_file_saver import PickleFileSaver from path_mover import PathMover import pdb if __name__ == '__main__': query = 'ネコ 預ける' dirname = constants.FETCHED_PAGES_O_DIR_NAME pfl = PickleFileLoader() saver = PickleFileSaver() pages = pfl.load_fetched_pages_with_query(query) pm = PathMover() pm.go_or_create_and_go_to(dirname) pm.go_or_create_and_go_to(query) # クエリ拡張するのならもう一度深くへ for i, page in enumerate(pages): if hasattr(page, 'sentences'): if page.sentences: print('%sはもうsentencesがあります' % page.title) continue try: page.fetch_html() print('%sのフェッチ完了!' % page.title) page.set_text_from_html_body() page.set_sentences_from_text() filename = '%s_%i.pkl' % (query, i) saver.save_file(obj=page, filename=filename) print('%sの保存完了!' % page.title) #pfs.save_pages_with_query_expansion() except (ValueError, IndexError):
# -*- coding: utf-8 -*- import constants from pickle_file_loader_for_ex import PickleFileLoaderForExpandedQuery from pickle_file_saver_for_ex import PickleFileSaverForEx from path_mover import PathMover import pdb import os import constants if __name__ == "__main__": pfl = PickleFileLoaderForExpandedQuery() pfs = PickleFileSaverForEx() pm = PathMover() original_queries = ["野球 が 上手くなる", "ビリヤード が 上手くなる", "サッカー が 上手くなる", "ハンドボール が 上手くなる"] pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME) for original_query in original_queries: pm.go_or_create_and_go_to(original_query) expanded_queries = os.listdir() for expanded_query in expanded_queries: if expanded_query == ".DS_Store": continue pm.go_or_create_and_go_to(expanded_query) filenames = os.listdir() for i, filename in enumerate(filenames): if filename == ".DS_Store": continue try: page = pfl.load_file(filename) except EOFError: