Пример #1
0
def update_news():
    s = session()
    # 1. Получить данные с новостного сайта
    global pages
    newsdict = get_news(
        'https://news.ycombinator.com/' + "news?p=" + str(pages), 1)
    pages = pages + 1
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    ex_news = s.query(News).options(load_only("title", "author")).all()
    ex_tit_au = [(news.title, news.author) for news in ex_news]
    for news in newsdict:
        if (news['title'], news['author']) not in ex_tit_au:
            news_add = News(title=news['title'],
                            author=news['author'],
                            url=news['url'],
                            comments=news['comments'],
                            points=news['points'])
            print(news_add)
            print('/n')
            s.add(news_add)
    # 3. Сохранить в БД те новости, которых там нет
    s.commit()
    print('end')
    redirect("/news")
Пример #2
0
def update_news():

    # 1. Получить данные с новостного сайта
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    # 3. Сохранить в БД те новости, которых там нет

    news_ar = get_news('https://news.ycombinator.com/newest', n_pages=1)
    s = session
    news_in_base = s.query(News).all()
    for new in news_ar:
        counter = 0
        for new_in_base in news_in_base:
            if (new['title'] == new_in_base.title) and (new['author']
                                                        == new_in_base.author):
                counter = 1
        if counter == 0:
            new_new = News(title=new['title'],
                           author=new['author'],
                           url=new['url'],
                           comments=new['comments'],
                           points=new['points'])
            s.add(new_new)
            s.commit()

    redirect('/news')
Пример #3
0
def update_news():
    """
    # 1. Получить данные с новостного сайта
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    # 3. Сохранить в БД те новости, которых там нет
    """
    news = get_news("https://news.ycombinator.com/newest", 1)
    s = session()
    for new in news:
        (record_exists, ), = s.query(exists().where(
            News.title == new['title'] and News.author == new['author']))
        if record_exists:
            break
        print('Adding record...')
        record = News(title=new['title'],
                      author=new['author'],
                      url=new['url'],
                      comments=new['comments'],
                      points=new['points'],
                      cleaned=new['cleaned'])
        s.add(record)
    s.commit()
    redirect("/news")
Пример #4
0
def update_news():

    pages_parsed = 10
    s = session()
    old_news_list = s.query(News).all()
    new_rows = 0

    probable_news = get_news("https://news.ycombinator.com/newest",
                             pages_parsed)
    for news in probable_news:
        #is_new = True
        for old_news in old_news_list:
            if (news['title'] == old_news.title
                    and news['author'] == old_news.author):
                #is_new = False
                break
        #if is_new == True:
        else:
            new_news = News(title=news['title'],
                            author=news['author'],
                            url=news['url'],
                            points=news['points'])
            s.add(new_news)
            new_rows += 1
            if new_rows == 50:
                s.commit()
                print("Commited ", new_rows, " news")
                redirect("/news")
                return
Пример #5
0
def update_news():
    # 1. Получить данные с новостного сайта
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    # 3. Сохранить в БД те новости, которых там нет
    s = session()
    news = get_news("https://news.ycombinator.com/newest", n_pages=1)
    titles = s.query(News.title).all()
    authors = s.query(News.author).all()
    t = []
    for i in range(len(titles)):
        t.append(titles[i][0])
    au = []
    for i in range(len(authors)):
        au.append(authors[i][0])
    for i in range(len(news)):
        a = news[i].get('title')
        b = news[i].get('author')
        if a not in t and b not in au:
            s.add(
                News(author=news[i]['author'],
                     title=news[i]['title'],
                     url=news[i]['url'],
                     points=news[i]['points'],
                     comments=news[i]['comments']))
    time.sleep(0.333333334)
    s.commit()
    redirect('/news')
Пример #6
0
def update_news():
    s = session()
    news = get_news("https://news.ycombinator.com/newest", 10)
    top_news = get_news("https://news.ycombinator.com/", 11)
    news.extend(top_news)
    for n in range(len(news)):
        row = News(title=news[n]["title"],
                   author=news[n]["author"],
                   url=news[n]["url"],
                   comments=news[n]["comments"],
                   points=news[n]["points"])
        if s.query(News).filter(News.title == row.title
                                and News.author == row.author).all():
            continue
        s.add(row)
        s.commit()
    redirect("/news")
Пример #7
0
def transfer():
    for i in get_news('https://news.ycombinator.com/', 3):
        news = News(title=i['title'],
                    author=i['author'],
                    url=i["url"],
                    comments=i['comments'],
                    points=i['points'])
        s.add(news)
        s.commit()
Пример #8
0
def update_news():
    recent_news = get_news()
    authors = [news['author'] for news in recent_news]
    titles = s.query(News.title).filter(News.author.in_(authors)).subquery()
    existing_news = s.query(News).filter(News.title.in_(titles)).all()
    for news in recent_news:
        if not existing_news or news not in existing_news:
            fill(news)
    redirect("/news")
Пример #9
0
def db_News(url, n_pages=34):
    s = session()
    for news in get_news(url, n_pages=n_pages):
        row = News(title=news['title'],
                   author=news['author'],
                   url=news['url'],
                   comments=news['comments'],
                   points=news['points'])
        s.add(row)
    s.commit()
Пример #10
0
def fill_db(n_pages=35):
    s = Session()
    arr = get_news("https://news.ycombinator.com/newest", n_pages)
    for i in range(len(arr)):
        news = News(title=arr[i]['title'],
                    author=arr[i]['author'],
                    url=arr[i]['url'],
                    comments=arr[i]['comments'],
                    points=arr[i]['points'])
        s.add(news)
    s.commit()
Пример #11
0
def update_news():
    news_list = get_news('https://news.ycombinator.com/newest', 1)
    for news in news_list:
        s.add(
            News(title=news['title'],
                 author=news['author'],
                 url=news['url'],
                 comments=news['comments'],
                 points=news['points']))
    s.commit()
    redirect("/news")
Пример #12
0
def update_news():
    news = get_news('https://news.ycombinator.com/newest', 1)
    s = session()
    for item in news:
        if s.query(News).filter(News.title == item['title'],
                                News.author == item['author']).first():
            continue
        s.add(News(**item))

    s.commit()
    redirect("/news")
Пример #13
0
def update_news():
    s = session()
    last_news = get_news()
    for news in last_news:
        check = s.query(News).filter(News.author==news['author'], News.title==news['title']).count()
        if check == 0:
            new = News(title=news['title'], author=news['author'], url=news['url'],
                comments=news['comments'], points=news['points'])
            s.add(new)
    s.commit()
    redirect("/news")
Пример #14
0
def fill_database(n_pages=10):
    s = session()
    news_list = get_news('https://news.ycombinator.com/newest', n_pages)
    for n in news_list:
        news = News(title=n['title'],
                    author=n['author'],
                    url=n['url'],
                    comments=n['comments'],
                    points=n['points'])
        s.add(news)
    s.commit()
Пример #15
0
def update_news():
    recent_news = get_news()
    authors = [news['author'] for news in recent_news]
    titles = s.query(News.title).filter(News.author.in_(authors)).subquery()
    existing_news = s.query(News).filter(News.title.in_(titles)).all()
    titles_bd = [i.title for i in existing_news]
    authors_bd = [i.author for i in existing_news]
    for news in recent_news:
        if not existing_news or (news['title'] not in titles_bd
                                 and news["author"] not in authors_bd):
            fill(news)
    redirect("/news")
Пример #16
0
def update_news():
    s = session()
    news = get_news('https://news.ycombinator.com/newest')
    for i in range(0, 10):
        item = News(title = news[i]['title'],
                    author = news[i]['author'],
                    url = news[i]['url'],
                    comments = news[i]['comments'],
                    points = news[i]['score'])
        s.add(item)
        s.commit()
    redirect("/news")
Пример #17
0
def update_news():

    news = get_news(url="https://news.ycombinator.com/newest")
    make_news = []
    for i in news:
        title, author = i["title"], i["author"]
        if not list(
                session.query(News).filter(News.title == title, News.author
                                           == author)):
            make_news.append(i)
        make_table_news(session, make_news)
    redirect("/news")
Пример #18
0
def update_news():
    s = session()

    current_news = get_news('https://news.ycombinator.com/newest', 1)
    old_news = [n.id for n in s.query(News).options(load_only('id')).all()]

    for headline in current_news:
        if int(headline['id']) not in old_news:
            news_add = News(**headline)
            s.add(news_add)

    s.commit()
    redirect("/news")
Пример #19
0
def add_db():  # Добавление 1000 новостей
    news_list_ = scraputils.get_news('https://www.itnews.com.au', 50)
    q = 0
    for k in news_list_:
        s = db.session()
        news_ = db.News(title=k.get('title'),
                        author=k.get('author'),
                        url=k.get('url'),
                        comments=k.get('comments'))
        s.add(news_)
        s.commit()
        q += 1
        print("added news ", q)
Пример #20
0
def save_data(pages=1):
    news = scraputils.get_news("https://news.ycombinator.com/newest", pages)
    s = db.session()
    for i in news:
        obj = db.News(
            title=i["title"],
            author=i["author"],
            url=i["url"],
            comments=i["comments"],
            points=i["points"],
        )
        s.add(obj)
        s.commit()
Пример #21
0
def update_news():
    news = get_news()
    rows = s.query(News).all()
    for n in news:
        flag = True
        for row in rows:
            if n['title'] == row.title:
                flag = False
                break
        if flag:
            add_to_db(n)

    redirect("/news")
Пример #22
0
def update_news():
    news_lst = get_news('https://news.ycombinator.com/newest', 5)
    s = session()
    for i in range(len(news_lst)):
        if len(s.query(News).filter(News.title == news_lst[i]['title'],News.author == news_lst[i]['author']).all()) == 0:
            new_news = News(title=news_lst[i]['title'],
                            author=news_lst[i].get('author'),
                            points=news_lst[i]['points'],
                            comments=news_lst[i]['comments'],
                            url=news_lst[i]['url'])
            s.add(new_news)
    s.commit()
    redirect("/news")
Пример #23
0
def update_news():
    s = session()
    latest_news = get_news("https://news.ycombinator.com/newest", n_pages=1)
    titles = [new['title'] for new in latest_news]
    authors = [new['author'] for new in latest_news]
    existing_news = s.query(News).filter(
        and_((News.author.in_(authors)), (News.title.in_(titles)))).all()
    for item in latest_news:
        if item['title'] not in [
                n.title for n in existing_news
        ] and item['author'] not in [n.author for n in existing_news]:
            s.add(News(**item))
    s.commit()
    redirect("/news")
Пример #24
0
def recommendations():
    global classifier
    if classifier:
        news_list = get_news('https://news.ycombinator.com/newest', 1)
        titles = [n['title'] for n in news_list]
        normalized_titles = []
        for title in titles:
            normalized_titles.append(normalize(title))
        labels = classifier.predict(normalized_titles)
        for i in range(len(news_list)):
            news_list[i]['label'] = labels[i]
        return template('news_recommendations', rows=news_list)
    else:
        return redirect('/classify')
Пример #25
0
def update_news():
    s = session()
    news_list = get_news("https://news.ycombinator.com/newest", n_pages=5)
    news_list_db = s.query(News).all()
    for new_news in news_list:
        is_in_the_db = False
        for old_news_db in news_list_db:
            if new_news["author"] == old_news_db.author and new_news["title"] == old_news_db.title:
                is_in_the_db = True
                break
        if not is_in_the_db:
            s.add(News(**new_news))
    s.commit()
    redirect("/news")
Пример #26
0
def update_news():
    news = get_news("https://news.ycombinator.com/", 35)
    s = session()
    for n in news:
        if has(s, n["author"], n["title"]):
            s.add(
                News(
                    title=n["title"],
                    author=n["author"],
                    url=n["url"],
                    points=n["points"],
                    comments=n["comments"],
                ))
    s.commit()
    redirect("/news")
Пример #27
0
def update_news():
    news = get_news('https://news.ycombinator.com/newest')
    s = session()
    for new in news:
        if not len(
                s.query(News).filter(News.author == new['author'], News.title
                                     == new['title']).all()):
            s.add(
                News(author=new['author'],
                     title=new['title'],
                     points=new['points'],
                     comments=new['comments'],
                     url=new['url']))
        s.commit()
    redirect("/news")
Пример #28
0
def update_news():
    s = session()
    current_news = get_news()
    existing_news = s.query(News).options(load_only("title", "author")).all()
    existing_t_a = [(news.title, news.author) for news in existing_news]
    for news in current_news:
        if (news['title'], news['author']) not in existing_t_a:
            news_add = News(title=news['title'],
                            author=news['author'],
                            url=news['url'],
                            comments=news['comments'],
                            points=news['points'])
            s.add(news_add)
    s.commit()
    redirect("/news")
Пример #29
0
def update_news():
    # 1. Получить данные с новостного сайта
    # 2. Проверить, каких новостей еще нет в БД. Будем считать,
    #    что каждая новость может быть уникально идентифицирована
    #    по совокупности двух значений: заголовка и автора
    # 3. Сохранить в БД те новости, которых там нет
    updates = get_news('https://news.ycombinator.com/newest', n_pages=3)
    s = session()
    for d in updates:
        old = s.query(News).filter(News.title == d['title']).filter(News.author == d['author']).all()
        if not old:
            s.add(News(**d))
    s.commit()

    redirect("/news")
Пример #30
0
def update_news():
    s = session()
    news = get_news("https://news.ycombinator.com/", 5)
    for n in news:
        row = News(title=n["title"],
                   author=n["author"],
                   url=n["url"],
                   comments=n["comments"],
                   points=n["points"])
        if s.query(News).filter(News.title == row.title
                                and News.author == row.author).all():
            continue
        s.add(row)
        s.commit()
    redirect("/news")