Exemplo n.º 1
0
 def can_find_graph_with_query(self, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.GRAPH_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     if os.path.exists(query + '_graph_zero.pkl'):
         pm.go_up()
         pm.go_up()
         return True
     pm.go_up()
     pm.go_up()
     return False
Exemplo n.º 2
0
 def can_find_page_with_query(self, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.FETCHED_PAGES_O_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     if os.path.exists('%s_10.pkl' % query):
         pm.go_up()
         pm.go_up()
         return True
     pm.go_up()
     pm.go_up()
     return False
 def can_find_graph_with_query(self, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.GRAPH_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     if os.path.exists(query + '_graph_zero.pkl'):
         pm.go_up()
         pm.go_up()
         return True
     pm.go_up()
     pm.go_up()
     return False
 def can_find_page_with_query(self, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.FETCHED_PAGES_O_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     if os.path.exists('%s_10.pkl' % query):
         pm.go_up()
         pm.go_up()
         return True
     pm.go_up()
     pm.go_up()
     return False
Exemplo n.º 5
0
 def load_answerer_with_query(self, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     graph = self.load_file(query + '_answerer_first.pkl')
     pm.go_up()
     pm.go_up()
     return graph
 def load_queries_with_original_query(self, original_query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.QUERIES_DIR_NAME)
     filenames = os.listdir()
     queries = []
     for filename in filenames:
         if filename == '.DS_Store':
             continue
         try:
             with open(filename, 'rb') as f:
                 query = pickle.load(f)
                 queries.append(query)
         except IsADirectoryError:
             pdb.set_trace()
     pm.go_up()
     return queries
Exemplo n.º 7
0
 def save_answerer_with_query(self, answerer, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     with open('%s_answerer_zero.pkl' % query, 'wb') as f:
         pickle.dump(answerer, f)
         print('%s_answerer_zero.pklの保存完了!' % query)
     pm.go_up()
     pm.go_up()
Exemplo n.º 8
0
 def save_ads_with_query(self, ads, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.FETCHED_ADS_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     for i, ad in enumerate(ads):
         with open('%s_%i.pkl' % (ad.title, i), 'wb') as f:
             pickle.dump(obj=ad, file=f)
             print('%sの保存完了' % ad.title)
     pm.go_up()
     pm.go_up()
 def load_answerer_with_query(self, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     graph = self.load_file(query + '_answerer_first.pkl')
     pm.go_up()
     pm.go_up()
     return graph
 def save_answerer_with_query(self, answerer, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.ANSWERER_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     with open('%s_answerer_zero.pkl' % query, 'wb') as f:
         pickle.dump(answerer, f)
         print('%s_answerer_zero.pklの保存完了!' % query)
     pm.go_up()
     pm.go_up()
Exemplo n.º 11
0
 def save_ads_with_query(self, ads, query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.FETCHED_ADS_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     for i, ad in enumerate(ads):
         with open('%s_%i.pkl' % (ad.title, i), 'wb') as f:
             pickle.dump(obj=ad, file=f)
             print('%sの保存完了' % ad.title)
     pm.go_up()
     pm.go_up()
Exemplo n.º 12
0
 def load_queries_with_original_query(self, original_query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.QUERIES_DIR_NAME)
     filenames = os.listdir()
     queries = []
     for filename in filenames:
         if filename == '.DS_Store':
             continue
         try:
             with open(filename, 'rb') as f:
                 query = pickle.load(f)
                 queries.append(query)
         except IsADirectoryError:
             pdb.set_trace()
     pm.go_up()
     return queries
Exemplo n.º 13
0
 def can_find_pages_with_query_dir(self, query, words):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     for word in words:
         if os.path.exists('%s %s' % (query, word)):  # 拡張クエリのディレクトリ発見!
             pm.go_or_create_and_go_to(word)
             if os.path.exists('%s %s_1.pkl' % (query, word)):
                 pm.go_up()
                 pm.go_up()
                 pm.go_up()
                 print('すでにある')
                 return True
             pm.go_up()
             pm.go_up()
             pm.go_up()
             print('ない1')
             return False
         pm.go_up()
         pm.go_up()
         print('ない2')
         return False
     pm.go_up()
     pm.go_up()
Exemplo n.º 14
0
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from task_graph_zero_answerer import TaskGraphZeroAnswerer
from answer_printer import AnswererPrinter
from path_mover import PathMover
import constants
import pdb

if __name__ == "__main__":
    queries = constants.QUERIES_4
    for query in queries:
        pfl = PickleFileLoaderForOriginal()
        g = pfl.load_graph_with_query(query)
        noun, cmp, verb = query.split(" ")
        query_task = "_".join([noun, cmp, verb])

        pm = PathMover()

        print("zeroの結果です")

        answerer = TaskGraphZeroAnswerer(graph=g, query_task=query_task)
        print("zero_answererをinstance化しました")
        answerer.set_result_tasks()
        print("set_result_tasks")
        answerer.set_task_scores()
        answerer.remove_generalized_tasks()
        print("set_task_scores")
        answerer.set_united_results()
        simple_results = []
        for united_result in answerer.united_results:
            tasks = united_result[0][0]
            result_tasks = []
Exemplo n.º 15
0
 def save_pages_with_query(self, pages_dict, original_query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME)
     pm.go_or_create_and_go_to(original_query)
     for expanded_query in pages_dict:
         pm.go_or_create_and_go_to(expanded_query)
         for i in range(constants.NUM_OF_FETCHED_PAGES):
             with open('%s_%i.pkl' % (expanded_query, i), 'wb') as f:
                 try:
                     pickle.dump(pages_dict[expanded_query][i], f)
                     print('%s_%i.pklの保存完了!' % (expanded_query, i))
                 except (TypeError, IndexError):
                     print('%sは%i個までしかありません!' % (expanded_query, i))
                     break
         pm.go_up()
     pm.go_up()
     pm.go_up()
Exemplo n.º 16
0
 def can_find_pages_with_query_dir(self, query, words):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME)
     pm.go_or_create_and_go_to(query)
     for word in words:
         if os.path.exists('%s %s' % (query, word)):  # 拡張クエリのディレクトリ発見!
             pm.go_or_create_and_go_to(word)
             if os.path.exists('%s %s_1.pkl' % (query, word)):
                 pm.go_up()
                 pm.go_up()
                 pm.go_up()
                 print('すでにある')
                 return True
             pm.go_up()
             pm.go_up()
             pm.go_up()
             print('ない1')
             return False
         pm.go_up()
         pm.go_up()
         print('ない2')
         return False
     pm.go_up()
     pm.go_up()
Exemplo n.º 17
0
        first_answerer.remove_generalized_tasks()
        first_answerer.set_united_results()
        simple_results = []
        for united_result in first_answerer.united_results:
            tasks = united_result[0][0]
            result_tasks = []
            for task in tasks:
                aspects = first_answerer.graph.node[task]['aspects']
                task_noun = task.split('_')[0]
                task_verb = task.split('_')[2]
                if len(aspects) > 2:
                    if not noun in task_noun:
                        if not verb in task_noun:
                            if not verb in task_verb:
                                if not noun in task_verb:
                                    if not task_noun in verb:
                                        result_tasks.append(task)
            if not result_tasks in simple_results:
                if result_tasks:
                    simple_results.append(result_tasks)
        first_answerer.simple_results = simple_results
        printer = AnswererPrinter(answerer=first_answerer, query=query)

        pm = PathMover()
        pm.go_or_create_and_go_to('results')
        pm.go_or_create_and_go_to(query)

        printer.output(method_name='first')
        pm.go_up()
        pm.go_up()
Exemplo n.º 18
0
# -*- coding: utf-8 -*-
import constants
from pickle_file_loader_for_ex import PickleFileLoaderForExpandedQuery
from pickle_file_saver_for_ex import PickleFileSaverForEx
from path_mover import PathMover
import pdb
import os
import constants

if __name__ == '__main__':
    pfl = PickleFileLoaderForExpandedQuery()
    pfs = PickleFileSaverForEx()
    pm = PathMover()

    original_queries = [
        '野球 が 上手くなる', 'ビリヤード が 上手くなる', 'サッカー が 上手くなる', 'ハンドボール が 上手くなる'
    ]

    pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME)
    for original_query in original_queries:
        pm.go_or_create_and_go_to(original_query)
        expanded_queries = os.listdir()
        for expanded_query in expanded_queries:
            if expanded_query == '.DS_Store':
                continue
            pm.go_or_create_and_go_to(expanded_query)
            filenames = os.listdir()
            for i, filename in enumerate(filenames):
                if filename == '.DS_Store':
                    continue
                try:
# -*- coding: utf-8 -*-
import constants
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from pickle_file_saver_for_original import PickleFileSaverForOriginal
from path_mover import PathMover
from page_data_inserter import PageDataInserter
import pdb

if __name__ == '__main__':
    queries = constants.QUERIES_4
    pfl = PickleFileLoaderForOriginal()
    saver = PickleFileSaverForOriginal()
    pm = PathMover()
    di = PageDataInserter()
    for i, query in enumerate(queries):
        pages = pfl.load_fetched_pages_with_query(query)
        pm.go_or_create_and_go_to(constants.FETCHED_PAGES_O_DIR_NAME)
        pm.go_or_create_and_go_to(query)
        for i, page in enumerate(pages):
            if '.pdf' in page.url:
                continue
            if di.has_body(page.query, page.url):
                print(str(i))
                continue
            try:
                print('%i番目の%sのページをフェッチします' % (i, query))
                page.fetch_html()
                print('%sのフェッチ完了!' % page.title)
                page.set_text_from_html_body()
                #page.set_sentences_from_text()
                #filename = '%s_%i.pkl' % (query, i)
Exemplo n.º 20
0
 def save_pages_with_query(self, pages_dict, original_query):
     pm = PathMover()
     pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME)
     pm.go_or_create_and_go_to(original_query)
     for expanded_query in pages_dict:
         pm.go_or_create_and_go_to(expanded_query)
         for i in range(constants.NUM_OF_FETCHED_PAGES):
             with open('%s_%i.pkl' % (expanded_query, i), 'wb') as f:
                 try:
                     pickle.dump(pages_dict[expanded_query][i], f)
                     print('%s_%i.pklの保存完了!' % (expanded_query, i))
                 except (TypeError, IndexError):
                     print('%sは%i個までしかありません!' % (expanded_query, i))
                     break
         pm.go_up()
     pm.go_up()
     pm.go_up()
Exemplo n.º 21
0
from pickle_file_loader_for_original import PickleFileLoaderForOriginal
from task_graph_zero_answerer import TaskGraphZeroAnswerer
from answer_printer import AnswererPrinter
from path_mover import PathMover
import constants
import pdb

if __name__ == '__main__':
    queries = constants.QUERIES_4
    for query in queries:
        pfl = PickleFileLoaderForOriginal()
        g = pfl.load_graph_with_query(query)
        noun, cmp, verb = query.split(' ')
        query_task = '_'.join([noun, cmp, verb])

        pm = PathMover()

        print('zeroの結果です')

        answerer = TaskGraphZeroAnswerer(graph=g, query_task=query_task)
        print('zero_answererをinstance化しました')
        answerer.set_result_tasks()
        print('set_result_tasks')
        answerer.set_task_scores()
        answerer.remove_generalized_tasks()
        print('set_task_scores')
        answerer.set_united_results()
        simple_results = []
        for united_result in answerer.united_results:
            tasks = united_result[0][0]
            result_tasks = []
Exemplo n.º 22
0
# -*- coding: utf-8 -*-
import constants
from pickle_file_loader import PickleFileLoader
from pickle_file_saver import PickleFileSaver
from path_mover import PathMover
import pdb

if __name__ == '__main__':
    query = 'ネコ 預ける'
    dirname = constants.FETCHED_PAGES_O_DIR_NAME
    pfl = PickleFileLoader()
    saver = PickleFileSaver()
    pages = pfl.load_fetched_pages_with_query(query)
    pm = PathMover()
    pm.go_or_create_and_go_to(dirname)
    pm.go_or_create_and_go_to(query)  # クエリ拡張するのならもう一度深くへ
    for i, page in enumerate(pages):
        if hasattr(page, 'sentences'):
            if page.sentences:
                print('%sはもうsentencesがあります' % page.title)
                continue
        try:
            page.fetch_html()
            print('%sのフェッチ完了!' % page.title)
            page.set_text_from_html_body()
            page.set_sentences_from_text()
            filename = '%s_%i.pkl' % (query, i)
            saver.save_file(obj=page, filename=filename)
            print('%sの保存完了!' % page.title)
            #pfs.save_pages_with_query_expansion()
        except (ValueError, IndexError):
# -*- coding: utf-8 -*-
import constants
from pickle_file_loader_for_ex import PickleFileLoaderForExpandedQuery
from pickle_file_saver_for_ex import PickleFileSaverForEx
from path_mover import PathMover
import pdb
import os
import constants

if __name__ == "__main__":
    pfl = PickleFileLoaderForExpandedQuery()
    pfs = PickleFileSaverForEx()
    pm = PathMover()

    original_queries = ["野球 が 上手くなる", "ビリヤード が 上手くなる", "サッカー が 上手くなる", "ハンドボール が 上手くなる"]

    pm.go_or_create_and_go_to(constants.FETCHED_PAGES_DIR_NAME)
    for original_query in original_queries:
        pm.go_or_create_and_go_to(original_query)
        expanded_queries = os.listdir()
        for expanded_query in expanded_queries:
            if expanded_query == ".DS_Store":
                continue
            pm.go_or_create_and_go_to(expanded_query)
            filenames = os.listdir()
            for i, filename in enumerate(filenames):
                if filename == ".DS_Store":
                    continue
                try:
                    page = pfl.load_file(filename)
                except EOFError: