# -*- coding: utf-8 -*- import constants import pdb from bing_searcher import BingSearcher from pickle_file_saver_for_original import PickleFileSaverForOriginal if __name__ == '__main__': queries = constants.QUERIES_4 saver = PickleFileSaverForOriginal() for query in queries: if saver.can_find_page_with_query(query): print('%sはもうあります' % query) continue bs = BingSearcher(query) pages = bs.result_pages(page_num=1000) # len(pages)が1000ないこともある saver.save_pages_with_query(pages=pages, query=query)
# -*- coding: utf-8 -*- import pdb from posinega_graph_mapper import PosinegaGraphMapper from pickle_file_saver_for_original import PickleFileSaverForOriginal from pickle_file_loader_for_original import PickleFileLoaderForOriginal import networkx as nx import matplotlib.pyplot as plt import constants if __name__ == '__main__': original_queries = constants.QUERIES_4 pfs = PickleFileSaverForOriginal() pfl = PickleFileLoaderForOriginal() for query in original_queries: pages = pfl.load_fetched_pages_with_query(query) posinega_graph_mapper = PosinegaGraphMapper() for i, page in enumerate(pages): if -1 < page.rank < 5: if page.tasks: posinega_graph_mapper.add_edges_with_page(page) print('%i 番目のページのタスクをグラフに追加しました' % i) nx.draw(posinega_graph_mapper.graph) plt.show() print('added all edges!')
# -*- coding: utf-8 -*- import pdb from graph_task_mapper import GraphTaskMapper from pickle_file_saver_for_original import PickleFileSaverForOriginal from pickle_file_loader_for_original import PickleFileLoaderForOriginal from task_data_selector import TaskDataSelector from task_subtype_data_loader import TaskSubtypeDataLoader import constants from task import Task if __name__ == '__main__': original_queries = constants.QUERIES_4 pfs = PickleFileSaverForOriginal() pfl = PickleFileLoaderForOriginal() gtm = GraphTaskMapper() with TaskDataSelector() as selector: for query in original_queries: task_ids = selector.task_ids_with_query(query) for task_id in task_ids: with TaskSubtypeDataLoader() as task_subtype_loader: distance_subtype_pairs = task_subtype_loader.distance_from_subtype_with_task_id(task_id) distance_between_subtypes = {} for pair in distance_subtype_pairs: distance_between_subtypes[pair[0]] = pair[1] task_data = selector.taskdata_with_task_id(task_id) try: task = Task(distance_between_subtypes=distance_between_subtypes, object_term=task_data[0], cmp=task_data[1],
# -*- coding: utf-8 -*- import constants import pdb from pickle_file_saver_for_original import PickleFileSaverForOriginal from page_data_loader import PageDataLoader from web_page import WebPage if __name__ == '__main__': queries = constants.QUERIES_4 saver = PickleFileSaverForOriginal() with PageDataLoader() as page_loader: for query in queries: pages = [] page_ids = page_loader.page_ids_with_query(query) for page_id in page_ids: pagedata = page_loader.pagedata_with_id( page_id) # (id, url, snippet, body, rank) page = WebPage(id=page_id, url=pagedata[0], query=pagedata[1], snippet=pagedata[2], rank=pagedata[3]) pages.append(page) saver.save_pages_with_query(pages=pages, query=query)
# -*- coding: utf-8 -*- from pickle_file_loader_for_original import PickleFileLoaderForOriginal from pickle_file_saver_for_original import PickleFileSaverForOriginal from page_data_loader import PageDataLoader import constants from sentence import Sentence import pdb if __name__ == '__main__': queries = constants.QUERIES_4 for query in queries: pfl = PickleFileLoaderForOriginal() pages = pfl.load_fetched_pages_with_query(query) for i, page in enumerate(pages): with PageDataLoader() as page_loader: sentences = page_loader.sentences_with_id(page.id) page.sentences = [] for sentence in sentences: page.sentences.append(Sentence(sentence, page.query)) page.set_tasks_from_sentences() print('%s の %i 番目のページにtasksをセットしました!' % (page.query, i)) pfs = PickleFileSaverForOriginal() pfs.save_pages_with_query(pages=pages, query=query)
# -*- coding: utf-8 -*- import constants import pdb from pickle_file_saver_for_original import PickleFileSaverForOriginal from page_data_loader import PageDataLoader from web_page import WebPage if __name__ == '__main__': queries = constants.QUERIES_4 saver = PickleFileSaverForOriginal() with PageDataLoader() as page_loader: for query in queries: pages = [] page_ids = page_loader.page_ids_with_query(query) for page_id in page_ids: pagedata = page_loader.pagedata_with_id(page_id) # (id, url, snippet, body, rank) page = WebPage(id=page_id, url=pagedata[0], query=pagedata[1], snippet=pagedata[2], rank=pagedata[3]) pages.append(page) saver.save_pages_with_query(pages=pages, query=query)