def get_country_population_density(file_path): with open(file_path) as csv_file: reader = csv.reader(csv_file, delimiter=',') data = [] for row in reader: data.append(row) data_selector = DataSelector() parsed_data = [] country_data = data_selector.get_countries() for row in data[1:]: country_name = row[0] population = row[1] density = row[4] country_id = [c[0] for c in country_data if country_name == c[1]] if len(country_id) > 0: parsed_data.append((int(population), int(density), country_id[0])) return parsed_data
def __init__(self, select_fun, insert_fun, worker_class, lock=threading.Lock(), db_m=DBManager()): threading.Thread.__init__(self) self.new_data = queue.Queue() self.done_data = queue.Queue() self.db_m = db_m self._db_access = lock self.db_insert = DataInserter(self.db_m) self.db_select = DataSelector(self.db_m) self.num_loc_threads = 2 self.max_threads = 5 # 5 worked.. self.batch_size = 10 self.quota_exceeded = False self.select_fun = select_fun.__name__ self.insert_fun = insert_fun.__name__ self.worker_class = worker_class
from db_utils.data_selector import DataSelector def hashtag_cloud(db, from_date=None, to_date=None, month_name=None): most_popular_hastags = db.get_hashtags_cloud(from_date, to_date) wordcloud = WordCloud( background_color="white", width=1600, height=800).generate_from_frequencies(most_popular_hastags) plt.figure(figsize=(20, 10)) plt.imshow(wordcloud, interpolation="bilinear") plt.axis("off") plt.title("Hashtags in " + month_name) plt.savefig('wordcloud_' + month_name + '.png', bbox_inches='tight') plt.show() if __name__ == '__main__': db = DataSelector() march = datetime.strptime("2020-03-01", "%Y-%m-%d").date(), datetime.strptime( "2020-03-31", "%Y-%m-%d").date(), "march" april = datetime.strptime("2020-04-01", "%Y-%m-%d").date(), datetime.strptime( "2020-04-30", "%Y-%m-%d").date(), "april" hashtag_cloud(db, *march) hashtag_cloud(db, *april)
name="N"), secondary_y=False ) fig.update_xaxes(title_text="date") fig.update_yaxes(title_text="Number of tweets", secondary_y=True) fig.update_layout( title_text="S_PpNPm_R: " + country_name, ) fig.show() if __name__ == "__main__": db = DataSelector() # ------ COVID19 ------ # c19_infected_in_POL_fig = affected_in(["Poland", "Italy"], ["deaths", 'confirmed'], "COVID19", db) # c19_infected_in_POL_fig.show() # SIR_predictction_fig = SIR_predicted_in("Italy", "COVID19", 100, db, SIR) # SIR_predictction_fig.show() # SEIR_predictction_fig = SEIR_predicted_in("Italy", "COVID19", 100, db, SEIR) # SEIR_predictction_fig.show() # country = COUNTRY_NAME, STATE_NAME=None country = "Poland", None # country = "Italy", None
class Pipe(threading.Thread): def __init__(self, select_fun, insert_fun, worker_class, lock=threading.Lock(), db_m=DBManager()): threading.Thread.__init__(self) self.new_data = queue.Queue() self.done_data = queue.Queue() self.db_m = db_m self._db_access = lock self.db_insert = DataInserter(self.db_m) self.db_select = DataSelector(self.db_m) self.num_loc_threads = 2 self.max_threads = 5 # 5 worked.. self.batch_size = 10 self.quota_exceeded = False self.select_fun = select_fun.__name__ self.insert_fun = insert_fun.__name__ self.worker_class = worker_class def get_new_data(self): batch = [] it = 0 while it < self.batch_size and not self.new_data.empty(): batch.append(self.new_data.get()) it += 1 return batch def get_done_data(self): batch = [] it = 0 while it < self.batch_size and not self.done_data.empty(): batch.append(self.done_data.get()) it += 1 return batch def put_new_data(self, data): for d in data: self.new_data.put(d) def put_done_data(self, data): for d in data: self.done_data.put(d) def stop(self): self.quota_exceeded = True def run(self): worker_threads = [] epoch_count = 0 select_scale = 5 with self._db_access: new_data = self.db_select.__getattribute__(self.select_fun)( self.batch_size * select_scale) print('Data selected') self.put_new_data(new_data) thread_id = 0 while not self.new_data.empty() and not self.quota_exceeded: print("----- Beginning " + str(epoch_count) + " epoch -----") worker_threads = [t for t in worker_threads if t.is_alive()] print("Active threads: " + str(len(worker_threads))) print("Data to process: " + str(self.new_data.qsize())) for i in range(self.num_loc_threads): thread = self.worker_class(thread_id, self.get_new_data(), self) thread.start() worker_threads.append(thread) thread_id += 1 print("Processing started") if len(worker_threads) > self.max_threads: print('Too many to process, waiting..') for t in worker_threads[:-self.max_threads // 2]: t.join() print('Resuming...') print("Inserting started") print("Data to insert: " + str(self.done_data.qsize())) if not self.done_data.empty(): with self._db_access: while not self.done_data.empty(): self.db_insert.__getattribute__(self.insert_fun)( self.get_done_data()) with self._db_access: new_data = self.db_select.__getattribute__(self.select_fun)( self.batch_size * select_scale) print('New data selected') self.put_new_data(new_data) epoch_count += 1 print('--- No more data ---') print('Joining threads') for t in worker_threads: t.join() print("All thread finished, inserting last..") if not self.done_data.empty(): with self._db_access: while not self.done_data.empty(): self.db_insert.__getattribute__(self.insert_fun)( self.get_done_data()) print('--- Everything added ---') def run_one(self): epoch_count = 0 with self._db_access: new_data = self.db_select.__getattribute__(self.select_fun)( self.batch_size) print('Data selected') self.put_new_data(new_data) while not self.new_data.empty() and not self.quota_exceeded: print("----- Beginning " + str(epoch_count) + " epoch -----") print(self.quota_exceeded) worker = self.worker_class(1, self.get_new_data(), self) worker.start() print("Processing started") worker.join() print("Data to insert: " + str(self.done_data.qsize())) with self._db_access: self.db_insert.__getattribute__(self.insert_fun)( self.get_done_data()) new_data = self.db_select.__getattribute__(self.select_fun)( self.batch_size) print('New data selected') self.put_new_data(new_data) epoch_count += 1 if not self.done_data.empty(): with self._db_access: while not self.done_data.empty(): self.db_insert.__getattribute__(self.insert_fun)( self.get_done_data()) print('--- Everything added ---')