示例#1
0
def main(subject):
    # 読み込み
    posts = {}
    for posted in dat_reader(subject.dat_url):
        posts[posted.num] = posted

    # 読み込み後のパースとエラー排除とレスによる重み付け
    posts = analyze_post(posts)

    # キーワード解析
    r_indexes = analyze_keyword(posts)

    # postsにキーワード解析内容を反映したあと、keywordデータをDBに一括登録
    insert_keyword(posts, r_indexes, subject.site.id)

    # 評価高い投稿を出力
    pages = []
    for key in posts:
        output = PageRepository(_type=PageType.POST_RANK.value)
        if posts[key].priority > 300:
            # 評価の高い投稿を出力
            print("++++++++++++++++++++")
            print(posts[key].priority)
            print("++++++++++++++++++++")
            posts[key].printer(posts=posts, output=output)
            # DB出力用に記録
            pages.append(output)

    # キーワード評価が高い投稿を出力
    for r_index in r_indexes:
        pages.append(printer_res(r_index, posts))

    # dbに記録するレコードの生成
    pages = filter_overlap(pages)
    keyword_record_dict = {
        r_index.keyword: r_index.keyword_record
        for r_index in r_indexes
    }
    bulk_pages = [
        page.output_for_page(subject, keyword_record_dict) for page in pages
        if page.is_enable
    ]

    # バルク!
    pages = Page.bulk_insert(bulk_pages)
    PageKeywordRelation.register(pages)

    # 公開日を最適に設定する
    set_start_at(pages)
示例#2
0
def keyword(site, keyword_id, start_keyword_id):
    _limit = 25

    # パラメータチェック
    keyword_id = int(keyword_id)
    start_keyword_id = int(start_keyword_id)
    keyword = Keyword.get(keyword_id)
    if start_keyword_id == 100000000:
        keyword_relation = PageKeywordRelation.get_from_new_keyword(keyword_id, _limit=_limit)
    else:
        keyword_relation = PageKeywordRelation.get_from_keyword(keyword_id, start_keyword_id, _limit=_limit)

    # pageが公開可能かチェックする
    now = datetime.datetime.now(tz=pytz.utc)
    keyword_relation = [r for r in keyword_relation if r.page.is_enable(now=now)]

    is_end = False
    if len(keyword_relation) >= 2:
        pages = [r.page for r in keyword_relation]
        contents = pages[0]
        prev_contents = pages[1]
        is_next = keyword_relation[1].id
        relations = keyword_relation[2:]
    elif len(keyword_relation) == 1:
        pages = [r.page for r in keyword_relation]
        contents = pages[0]
        prev_contents = None
        is_next = None
        is_end = True
        relations = keyword_relation[2:]
    else:
        contents = None
        prev_contents = None
        is_next = None
        relations = None

    return render_template('dat/keyword.html',
                           site=site,
                           keyword=keyword,
                           contents=contents,
                           prev_contents=prev_contents,
                           relations=relations,
                           is_next=is_next,
                           is_end=is_end)
示例#3
0
def main(subject):
    # 読み込み
    posts = {}
    for posted in dat_reader(subject.dat_url):
        posts[posted.num] = posted

    # 読み込み後のパースとエラー排除とレスによる重み付け
    posts = analyze_post(posts)

    # キーワード解析
    r_indexes = analyze_keyword(posts)

    # postsにキーワード解析内容を反映したあと、keywordデータをDBに一括登録
    insert_keyword(posts, r_indexes, subject.site.id)

    # 評価高い投稿を出力
    pages = []
    for key in posts:
        output = PageRepository(_type=PageType.POST_RANK.value)
        if posts[key].priority > 300:
            # 評価の高い投稿を出力
            print("++++++++++++++++++++")
            print(posts[key].priority)
            print("++++++++++++++++++++")
            posts[key].printer(posts=posts, output=output)
            # DB出力用に記録
            pages.append(output)

    # キーワード評価が高い投稿を出力
    for r_index in r_indexes:
        pages.append(printer_res(r_index, posts))

    # dbに記録するレコードの生成
    pages = filter_overlap(pages)
    keyword_record_dict = {r_index.keyword: r_index.keyword_record for r_index in r_indexes}
    bulk_pages = [page.output_for_page(subject, keyword_record_dict) for page in pages if page.is_enable]

    # バルク!
    pages = Page.bulk_insert(bulk_pages)
    PageKeywordRelation.register(pages)

    # 公開日を最適に設定する
    set_start_at(pages)
    def run(self):
        # PageKeywordRelationに1行でもレコードあれば実行しない
        if PageKeywordRelation.objects().filter().count() > 0:
            raise AssertionError("PageKeywordRelation data is exist")

        # pageとkeywordのデータ全取得
        page_all = Page.get_all()
        page_all = sorted(page_all, key=lambda x: x.id)

        # 10ページずつbulk
        count = 0
        pages = []
        for page in page_all:
            pages.append(page)
            count += 1
            if len(pages) > 10:
                PageKeywordRelation.register(pages)
                pages = []
                print("{}/{}".format(count, len(page_all)))
        PageKeywordRelation.register(pages)
示例#5
0
def sitemap():
    """
    googleクローラー用のsitemap.xml
    """
    all_sites = Site.get_all()
    new_pages = Page.gets_new(10000)
    new_keywords = PageKeywordRelation.gets_new(10000)
    now = datetime.datetime.now(pytz.utc) - datetime.timedelta(seconds=3600)
    new_keyword_pages = [keyword for keyword in new_keywords if keyword.page and keyword.page.is_enable(now)]

    return render_template('sitemap/sitemap.html',
                           url_base='http://www.niku.tokyo/',
                           new_site_date=max([site.created_at for site in all_sites]),
                           all_sites=all_sites,
                           new_pages=new_pages,
                           new_keyword_pages=new_keyword_pages,
                           one_days_ago=datetime.datetime.now() - datetime.timedelta(days=1),
                           three_days_ago=datetime.datetime.now() - datetime.timedelta(days=3),
                           )
示例#6
0
 def get_count(cls, keyword_id):
     from module.site.page_keyword import PageKeywordRelation
     return PageKeywordRelation.get_count(keyword_id)
示例#7
0
 def get_count(cls, keyword_id):
     from module.site.page_keyword import PageKeywordRelation
     return PageKeywordRelation.get_count(keyword_id)