def get_country_population_density(file_path):

    with open(file_path) as csv_file:
        reader = csv.reader(csv_file, delimiter=',')
        data = []
        for row in reader:
            data.append(row)

    data_selector = DataSelector()

    parsed_data = []

    country_data = data_selector.get_countries()

    for row in data[1:]:

        country_name = row[0]
        population = row[1]
        density = row[4]

        country_id = [c[0] for c in country_data if country_name == c[1]]

        if len(country_id) > 0:

            parsed_data.append((int(population), int(density), country_id[0]))

    return parsed_data
示例#2
0
 def __init__(self,
              select_fun,
              insert_fun,
              worker_class,
              lock=threading.Lock(),
              db_m=DBManager()):
     threading.Thread.__init__(self)
     self.new_data = queue.Queue()
     self.done_data = queue.Queue()
     self.db_m = db_m
     self._db_access = lock
     self.db_insert = DataInserter(self.db_m)
     self.db_select = DataSelector(self.db_m)
     self.num_loc_threads = 2
     self.max_threads = 5  # 5 worked..
     self.batch_size = 10
     self.quota_exceeded = False
     self.select_fun = select_fun.__name__
     self.insert_fun = insert_fun.__name__
     self.worker_class = worker_class
from db_utils.data_selector import DataSelector


def hashtag_cloud(db, from_date=None, to_date=None, month_name=None):
    most_popular_hastags = db.get_hashtags_cloud(from_date, to_date)

    wordcloud = WordCloud(
        background_color="white", width=1600,
        height=800).generate_from_frequencies(most_popular_hastags)

    plt.figure(figsize=(20, 10))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.title("Hashtags in " + month_name)
    plt.savefig('wordcloud_' + month_name + '.png', bbox_inches='tight')
    plt.show()


if __name__ == '__main__':
    db = DataSelector()

    march = datetime.strptime("2020-03-01",
                              "%Y-%m-%d").date(), datetime.strptime(
                                  "2020-03-31", "%Y-%m-%d").date(), "march"
    april = datetime.strptime("2020-04-01",
                              "%Y-%m-%d").date(), datetime.strptime(
                                  "2020-04-30", "%Y-%m-%d").date(), "april"

    hashtag_cloud(db, *march)
    hashtag_cloud(db, *april)
示例#4
0
            name="N"),
        secondary_y=False
    )

    fig.update_xaxes(title_text="date")

    fig.update_yaxes(title_text="Number of tweets", secondary_y=True)

    fig.update_layout(
        title_text="S_PpNPm_R: " + country_name,
    )
    fig.show()


if __name__ == "__main__":
    db = DataSelector()

    # ------ COVID19 ------

    # c19_infected_in_POL_fig = affected_in(["Poland", "Italy"], ["deaths", 'confirmed'], "COVID19", db)
    # c19_infected_in_POL_fig.show()

    # SIR_predictction_fig = SIR_predicted_in("Italy", "COVID19", 100, db, SIR)
    # SIR_predictction_fig.show()

    # SEIR_predictction_fig = SEIR_predicted_in("Italy", "COVID19", 100, db, SEIR)
    # SEIR_predictction_fig.show()

    # country = COUNTRY_NAME, STATE_NAME=None
    country = "Poland", None
    # country = "Italy", None
示例#5
0
class Pipe(threading.Thread):
    def __init__(self,
                 select_fun,
                 insert_fun,
                 worker_class,
                 lock=threading.Lock(),
                 db_m=DBManager()):
        threading.Thread.__init__(self)
        self.new_data = queue.Queue()
        self.done_data = queue.Queue()
        self.db_m = db_m
        self._db_access = lock
        self.db_insert = DataInserter(self.db_m)
        self.db_select = DataSelector(self.db_m)
        self.num_loc_threads = 2
        self.max_threads = 5  # 5 worked..
        self.batch_size = 10
        self.quota_exceeded = False
        self.select_fun = select_fun.__name__
        self.insert_fun = insert_fun.__name__
        self.worker_class = worker_class

    def get_new_data(self):
        batch = []
        it = 0
        while it < self.batch_size and not self.new_data.empty():
            batch.append(self.new_data.get())
            it += 1
        return batch

    def get_done_data(self):
        batch = []
        it = 0
        while it < self.batch_size and not self.done_data.empty():
            batch.append(self.done_data.get())
            it += 1
        return batch

    def put_new_data(self, data):
        for d in data:
            self.new_data.put(d)

    def put_done_data(self, data):
        for d in data:
            self.done_data.put(d)

    def stop(self):
        self.quota_exceeded = True

    def run(self):
        worker_threads = []
        epoch_count = 0
        select_scale = 5

        with self._db_access:
            new_data = self.db_select.__getattribute__(self.select_fun)(
                self.batch_size * select_scale)
        print('Data selected')
        self.put_new_data(new_data)
        thread_id = 0

        while not self.new_data.empty() and not self.quota_exceeded:
            print("----- Beginning " + str(epoch_count) + " epoch -----")
            worker_threads = [t for t in worker_threads if t.is_alive()]
            print("Active threads: " + str(len(worker_threads)))
            print("Data to process: " + str(self.new_data.qsize()))
            for i in range(self.num_loc_threads):
                thread = self.worker_class(thread_id, self.get_new_data(),
                                           self)
                thread.start()
                worker_threads.append(thread)
                thread_id += 1

            print("Processing started")

            if len(worker_threads) > self.max_threads:
                print('Too many to process, waiting..')
                for t in worker_threads[:-self.max_threads // 2]:
                    t.join()
                print('Resuming...')

            print("Inserting started")
            print("Data to insert: " + str(self.done_data.qsize()))
            if not self.done_data.empty():
                with self._db_access:
                    while not self.done_data.empty():
                        self.db_insert.__getattribute__(self.insert_fun)(
                            self.get_done_data())

            with self._db_access:
                new_data = self.db_select.__getattribute__(self.select_fun)(
                    self.batch_size * select_scale)
            print('New data selected')
            self.put_new_data(new_data)
            epoch_count += 1

        print('--- No more data ---')
        print('Joining threads')
        for t in worker_threads:
            t.join()
        print("All thread finished, inserting last..")

        if not self.done_data.empty():
            with self._db_access:
                while not self.done_data.empty():
                    self.db_insert.__getattribute__(self.insert_fun)(
                        self.get_done_data())
        print('--- Everything added ---')

    def run_one(self):
        epoch_count = 0

        with self._db_access:
            new_data = self.db_select.__getattribute__(self.select_fun)(
                self.batch_size)
        print('Data selected')
        self.put_new_data(new_data)

        while not self.new_data.empty() and not self.quota_exceeded:
            print("----- Beginning " + str(epoch_count) + " epoch -----")
            print(self.quota_exceeded)

            worker = self.worker_class(1, self.get_new_data(), self)
            worker.start()
            print("Processing started")
            worker.join()
            print("Data to insert: " + str(self.done_data.qsize()))
            with self._db_access:
                self.db_insert.__getattribute__(self.insert_fun)(
                    self.get_done_data())
                new_data = self.db_select.__getattribute__(self.select_fun)(
                    self.batch_size)
            print('New data selected')
            self.put_new_data(new_data)
            epoch_count += 1

        if not self.done_data.empty():
            with self._db_access:
                while not self.done_data.empty():
                    self.db_insert.__getattribute__(self.insert_fun)(
                        self.get_done_data())
        print('--- Everything added ---')