def load_answerer_with_query(self, query): pm = PathMover() pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME) pm.go_or_create_and_go_to(query) graph = self.load_file(query + '_answerer_first.pkl') pm.go_up() pm.go_up() return graph
def load_answerer_with_query(self, query): pm = PathMover() pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME) pm.go_or_create_and_go_to(query) graph = self.load_file(query + '_answerer_first.pkl') pm.go_up() pm.go_up() return graph
def save_answerer_with_query(self, answerer, query): pm = PathMover() pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME) pm.go_or_create_and_go_to(query) with open('%s_answerer_zero.pkl' % query, 'wb') as f: pickle.dump(answerer, f) print('%s_answerer_zero.pklの保存完了!' % query) pm.go_up() pm.go_up()
def save_answerer_with_query(self, answerer, query): pm = PathMover() pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME) pm.go_or_create_and_go_to(query) with open('%s_answerer_zero.pkl' % query, 'wb') as f: pickle.dump(answerer, f) print('%s_answerer_zero.pklの保存完了!' % query) pm.go_up() pm.go_up()
def save_ads_with_query(self, ads, query): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_ADS_DIR_NAME) pm.go_or_create_and_go_to(query) for i, ad in enumerate(ads): with open('%s_%i.pkl' % (ad.title, i), 'wb') as f: pickle.dump(obj=ad, file=f) print('%sの保存完了' % ad.title) pm.go_up() pm.go_up()
def save_ads_with_query(self, ads, query): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_ADS_DIR_NAME) pm.go_or_create_and_go_to(query) for i, ad in enumerate(ads): with open('%s_%i.pkl' % (ad.title, i), 'wb') as f: pickle.dump(obj=ad, file=f) print('%sの保存完了' % ad.title) pm.go_up() pm.go_up()
def can_find_page_with_query(self, query): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_PAGES_O_DIR_NAME) pm.go_or_create_and_go_to(query) if os.path.exists('%s_10.pkl' % query): pm.go_up() pm.go_up() return True pm.go_up() pm.go_up() return False
def can_find_graph_with_query(self, query): pm = PathMover() pm.go_or_create_and_go_to(constants.GRAPH_DIR_NAME) pm.go_or_create_and_go_to(query) if os.path.exists(query + '_graph_zero.pkl'): pm.go_up() pm.go_up() return True pm.go_up() pm.go_up() return False
def can_find_graph_with_query(self, query): pm = PathMover() pm.go_or_create_and_go_to(constants.GRAPH_DIR_NAME) pm.go_or_create_and_go_to(query) if os.path.exists(query + '_graph_zero.pkl'): pm.go_up() pm.go_up() return True pm.go_up() pm.go_up() return False
def can_find_page_with_query(self, query): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_PAGES_O_DIR_NAME) pm.go_or_create_and_go_to(query) if os.path.exists('%s_10.pkl' % query): pm.go_up() pm.go_up() return True pm.go_up() pm.go_up() return False
def load_queries_with_original_query(self, original_query): pm = PathMover() pm.go_or_create_and_go_to(constants.QUERIES_DIR_NAME) filenames = os.listdir() queries = [] for filename in filenames: if filename == '.DS_Store': continue try: with open(filename, 'rb') as f: query = pickle.load(f) queries.append(query) except IsADirectoryError: pdb.set_trace() pm.go_up() return queries
def load_queries_with_original_query(self, original_query): pm = PathMover() pm.go_or_create_and_go_to(constants.QUERIES_DIR_NAME) filenames = os.listdir() queries = [] for filename in filenames: if filename == '.DS_Store': continue try: with open(filename, 'rb') as f: query = pickle.load(f) queries.append(query) except IsADirectoryError: pdb.set_trace() pm.go_up() return queries
def save_pages_with_query(self, pages_dict, original_query): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME) pm.go_or_create_and_go_to(original_query) for expanded_query in pages_dict: pm.go_or_create_and_go_to(expanded_query) for i in range(constants.NUM_OF_FETCHED_PAGES): with open('%s_%i.pkl' % (expanded_query, i), 'wb') as f: try: pickle.dump(pages_dict[expanded_query][i], f) print('%s_%i.pklの保存完了!' % (expanded_query, i)) except (TypeError, IndexError): print('%sは%i個までしかありません!' % (expanded_query, i)) break pm.go_up() pm.go_up() pm.go_up()
def save_pages_with_query(self, pages_dict, original_query): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME) pm.go_or_create_and_go_to(original_query) for expanded_query in pages_dict: pm.go_or_create_and_go_to(expanded_query) for i in range(constants.NUM_OF_FETCHED_PAGES): with open('%s_%i.pkl' % (expanded_query, i), 'wb') as f: try: pickle.dump(pages_dict[expanded_query][i], f) print('%s_%i.pklの保存完了!' % (expanded_query, i)) except (TypeError, IndexError): print('%sは%i個までしかありません!' % (expanded_query, i)) break pm.go_up() pm.go_up() pm.go_up()
query = 'ネコ 預ける' dirname = constants.FETCHED_PAGES_O_DIR_NAME pfl = PickleFileLoader() saver = PickleFileSaver() pages = pfl.load_fetched_pages_with_query(query) pm = PathMover() pm.go_or_create_and_go_to(dirname) pm.go_or_create_and_go_to(query) # クエリ拡張するのならもう一度深くへ for i, page in enumerate(pages): if hasattr(page, 'sentences'): if page.sentences: print('%sはもうsentencesがあります' % page.title) continue try: page.fetch_html() print('%sのフェッチ完了!' % page.title) page.set_text_from_html_body() page.set_sentences_from_text() filename = '%s_%i.pkl' % (query, i) saver.save_file(obj=page, filename=filename) print('%sの保存完了!' % page.title) #pfs.save_pages_with_query_expansion() except (ValueError, IndexError): print('%sのフェッチに失敗しました' % page.title) continue pm.go_up() pm.go_up()
def can_find_pages_with_query_dir(self, query, words): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME) pm.go_or_create_and_go_to(query) for word in words: if os.path.exists('%s %s' % (query, word)): # 拡張クエリのディレクトリ発見! pm.go_or_create_and_go_to(word) if os.path.exists('%s %s_1.pkl' % (query, word)): pm.go_up() pm.go_up() pm.go_up() print('すでにある') return True pm.go_up() pm.go_up() pm.go_up() print('ない1') return False pm.go_up() pm.go_up() print('ない2') return False pm.go_up() pm.go_up()
def can_find_pages_with_query_dir(self, query, words): pm = PathMover() pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME) pm.go_or_create_and_go_to(query) for word in words: if os.path.exists('%s %s' % (query, word)): # 拡張クエリのディレクトリ発見! pm.go_or_create_and_go_to(word) if os.path.exists('%s %s_1.pkl' % (query, word)): pm.go_up() pm.go_up() pm.go_up() print('すでにある') return True pm.go_up() pm.go_up() pm.go_up() print('ない1') return False pm.go_up() pm.go_up() print('ない2') return False pm.go_up() pm.go_up()