Пример #1
0
def add_new_records(cur_date):
    """ Append new records in global_data """

    print('Adding new records...')
    try:
        conn = sqlite3.connect(DB_PATH)
        cur = conn.cursor()

        col = ['url', 'date', 'content']
        for r in range(30):
            col.append('rank_d' + str(r + 1))

        ques = []
        for r in range(33):
            ques.append('?')

        query = 'INSERT INTO sitedata (' + ', '.join(
            col) + ') VALUES (' + ', '.join(ques) + ')'

        for row in temp_read():
            values = [row[0], str(cur_date), row[2]]
            values.extend([-1] * 30)
            cur.execute(query, tuple(values))
            conn.commit()
            cur.execute(
                'INSERT INTO siteinfo (url, embedding, cluster, rank) VALUES (?,?,?,?)',
                (row[0], row[1], -1, -1))
            conn.commit()

        print("Successfully added new records")
    except sqlite3.Error as error:
        print("Error while adding new records ", error)
    finally:
        if (conn): conn.close()
Пример #2
0
def update(cur_date, urls):
    """ this will update kmeans, and web.db """

    new_urls = []
    embedding = []

    for data in newdomains.temp_read():
        new_urls.append(data[0])
        embedding.append(np.frombuffer(data[1]))

    print('\n', len(new_urls), 'new domains scrapped')

    print('\nAdjusting ranks...')
    ranks = newdomains.get_adjusted_ranks(cur_date, new_urls, urls)

    print('performing updates on global_data.....')

    globaldata.add_new_records(cur_date)
    globaldata.delete_records(cur_date)
    globaldata.update_rank(list(ranks.items()))
    globaldata.update_date(list(ranks.keys()), str(cur_date))

    print('updating trends.......')
    trends.update_trends(new_urls, embedding, str(cur_date))

    print('updating visited domains...')
    newdomains.update_visited_domains(list(ranks.keys()), new_urls, cur_date)

    print('clearing temporary data...')
    newdomains.temp_clear()

    print('SUCCESS')