def add_new_records(cur_date): """ Append new records in global_data """ print('Adding new records...') try: conn = sqlite3.connect(DB_PATH) cur = conn.cursor() col = ['url', 'date', 'content'] for r in range(30): col.append('rank_d' + str(r + 1)) ques = [] for r in range(33): ques.append('?') query = 'INSERT INTO sitedata (' + ', '.join( col) + ') VALUES (' + ', '.join(ques) + ')' for row in temp_read(): values = [row[0], str(cur_date), row[2]] values.extend([-1] * 30) cur.execute(query, tuple(values)) conn.commit() cur.execute( 'INSERT INTO siteinfo (url, embedding, cluster, rank) VALUES (?,?,?,?)', (row[0], row[1], -1, -1)) conn.commit() print("Successfully added new records") except sqlite3.Error as error: print("Error while adding new records ", error) finally: if (conn): conn.close()
def update(cur_date, urls): """ this will update kmeans, and web.db """ new_urls = [] embedding = [] for data in newdomains.temp_read(): new_urls.append(data[0]) embedding.append(np.frombuffer(data[1])) print('\n', len(new_urls), 'new domains scrapped') print('\nAdjusting ranks...') ranks = newdomains.get_adjusted_ranks(cur_date, new_urls, urls) print('performing updates on global_data.....') globaldata.add_new_records(cur_date) globaldata.delete_records(cur_date) globaldata.update_rank(list(ranks.items())) globaldata.update_date(list(ranks.keys()), str(cur_date)) print('updating trends.......') trends.update_trends(new_urls, embedding, str(cur_date)) print('updating visited domains...') newdomains.update_visited_domains(list(ranks.keys()), new_urls, cur_date) print('clearing temporary data...') newdomains.temp_clear() print('SUCCESS')