Пример #1
0
 def search(self, site):
     # スレッド検索
     subjects = Subject.get_from_url(site)
     method = getattr(self, site.title)
     subjects_dict = method(subjects, site)
     for key in subjects_dict:
         print(subjects_dict[key])
Пример #2
0
 def search(self, site):
     # スレッド検索
     subjects = Subject.get_from_url(site)
     method = getattr(self, site.title)
     subjects_dict = method(subjects, site)
     for key in subjects_dict:
         print(subjects_dict[key])
Пример #3
0
    def run(self, force=None):
        print('start')
        # 全サイト取得と重複排除
        sites = {}
        for site in Site.get_all():
            sites[site.url] = site

        # リストに対してignoreとkeywordマッチを排除
        sure = []
        for key in sites:
            site = sites[key]
            response = requests.get(site.subjects_url)
            assert (response.status_code == 200), response.text

            # parse
            data = list(response.text.split('\n'))
            for line in data:
                try:
                    _ = Subject(site, line)
                    sure.append(_)
                except:
                    pass

        print(sure)

        # リスト出力
        t = Tokenizer()
        r = defaultdict(int)
        r2 = defaultdict(list)
        r3 = defaultdict(int)
        for _sure in sure:
            try:
                for token in t.tokenize(_sure.title):
                    if not token_filter(token):
                        r[token.surface] += 1
                        r2[token.surface] += [_sure]
                        r3[token] += 0
            except:
                pass

        # sort
        sure = sorted(sure, key=lambda x: x.title)

        for _sure in sure:
            try:
                point = 0
                for token in t.tokenize(_sure.title):
                    if not token_filter(token):
                        point += r[token.surface]
                if not filter_title(point, _sure):
                    print(_sure.title, _sure.count_res)

            except:
                pass
Пример #4
0
    def search_and_scraping(self, site, force=None):
        # スレッド検索
        subjects = Subject.get_from_url(site)
        method = getattr(self, site.title)
        subjects_dict = method(subjects, site)

        # スクレイピング
        for key in subjects_dict:
            sub = subjects_dict[key]
            sub.execute_matome(force=force)

        # 参照を切る
        method = None
        del method
        return subjects_dict
Пример #5
0
    def search_and_scraping(self, site, force=None):
        # スレッド検索
        subjects = Subject.get_from_url(site)
        method = getattr(self, site.title)
        subjects_dict = method(subjects, site)

        # スクレイピング
        for key in subjects_dict:
            sub = subjects_dict[key]
            sub.execute_matome(force=force)

        # 参照を切る
        method = None
        del method
        return subjects_dict