Пример #1
0
    def test_counter(self):
        with open('./tests/series.ndjson', 'rb') as f:
            ll = list(
                get_jsons_from_stream(stream=f, object_name='series.ndjson'))
            self.assertEqual(15, len(ll))

        for rec in ll:
            _, measurement, _ = split_record(rec)
            station_id = self.smdao.store_from_json(rec)
            series_id = self.dao.store(
                station_id=station_id,
                parameter=measurement['parameter'],
                unit=measurement['unit'],
                averagingPeriod=f"{measurement['averagingPeriod']}")

            mes_id = self.mdao.store(series_id=series_id,
                                     value=measurement['value'],
                                     date=measurement['date']['utc'])
        mes = self.mdao.get_all()
        self.assertEqual(15, len(mes))
        self.assertEqual(self.mdao.count(), 15)

        self.assertEqual(4, self.smdao.count())
        self.assertEqual(6, self.dao.count())

        self.assertEqual(4, self.mdao.count(series_id=1))
        self.assertEqual(0, self.mdao.count(series_id=838232))

        self.assertEqual(0, self.dao.count(station_id=1212))
Пример #2
0
def update_last(**kwargs):
    prefix = get_prefix(**kwargs)
    target_dir = os.path.join(Variable.get('target_dir'), prefix)
    logging.info(f'Will be processing [{ target_dir }]')

    flist = list_directory(target_dir)
    logging.info(f'Files detected: { len(flist)}')

    previous_run = kwargs['prev_execution_date']
    next_run = kwargs['next_execution_date']
    filtered_list = filter_file_list(
        flist=flist, previous_run=previous_run, next_run=next_run)
    logging.info(f'Previous run was @{previous_run}, next will be @{next_run}. File list reduced to: {len(filtered_list)}')

    station_dao, series_dao, mes_dao = setup_daos()
    m = 0

    for fname in filtered_list:
        logging.info(f'Analyzing { fname}')

        with open(fname, 'rb') as f:
            for record in get_jsons_from_stream(stream=f, object_name=fname):
                station, measurement, _ = split_record(record)
                m += 1
                add_to_db(station_dao, series_dao, mes_dao, station=station,
                          measurement=measurement)

    logging.info(f'Number of measurements added to DB: {m}')
    print_db_stats(station_dao, series_dao, mes_dao)
    return True
Пример #3
0
    def test_get_forstation(self):
        with open('./tests/series.ndjson', 'rb') as f:
            ll = list(
                get_jsons_from_stream(stream=f, object_name='series.ndjson'))
            self.assertEqual(15, len(ll))

        for rec in ll:
            _, measurement, _ = split_record(rec)
            station_id = self.smdao.store_from_json(rec)
            series_id = self.dao.store(
                station_id=station_id,
                parameter=measurement['parameter'],
                unit=measurement['unit'],
                averagingPeriod=f"{measurement['averagingPeriod']}")

            mes_id = self.mdao.store(series_id=series_id,
                                     value=measurement['value'],
                                     date=measurement['date']['utc'])
        mes = self.mdao.get_all()
        self.assertEqual(15, len(mes))

        only_one = self.dao.get_all_for_station(station_id="Nisekh")

        self.assertEqual(1, len(only_one))

        more_than_one = self.dao.get_all_for_station(
            station_id="Sankt Eriksgatan")

        self.assertEqual(3, len(more_than_one))
        my_series = self.dao.get_for_id(series_id=1)

        self.assertEqual(my_series[2], 'pm10')
Пример #4
0
    def test_multiple_inserts(self):

        with open('./tests/series.ndjson', 'rb') as f:
            ll = list(
                get_jsons_from_stream(stream=f, object_name='series.ndjson'))
            self.assertEqual(len(ll), 15)

        for rec in ll:
            station, measurement, _ = split_record(rec)
            station_id = self.smdao.store_from_json(rec)
            series_id = self.dao.store(
                station_id=station_id,
                parameter=measurement['parameter'],
                unit=measurement['unit'],
                averagingPeriod=f"{measurement['averagingPeriod']}")

            mes_id = self.mdao.store(series_id=series_id,
                                     value=measurement['value'],
                                     date=measurement['date']['utc'])

        stations = self.smdao.get_all()
        self.assertEqual(len(stations), 4)

        series = self.dao.get_all()
        self.assertEqual(len(series), 6)

        mes = self.mdao.get_all()
        self.assertEqual(len(mes), 15)
Пример #5
0
    def test_get_jsons_from_gzipped_stream(self):
        with open('./tests/2014-03-30.ndjson.gz', 'rb') as f:
            ll = list(
                get_jsons_from_stream(
                    stream=f, object_name='./tests/2014-03-30.ndjson.gz'))

        self.assertEqual(64, len(ll))
Пример #6
0
    def test_get_jsons_from_stream(self):
        with open('./tests/exobj.ndjson', 'rb') as f:
            ll = list(
                get_jsons_from_stream(stream=f,
                                      object_name='./tests/exobj.ndjson'))

        self.assertEqual(7444, len(ll))
Пример #7
0
def go_through(**kwargs):
    prefix = get_prefix(**kwargs)
    target_dir = os.path.join(Variable.get('target_dir'), prefix)
    logging.info(f'Will be processing [{ target_dir }]')

    flist = glob.glob(os.path.join(target_dir, '*'))
    logging.info(f'Files detected: { len(flist)}')

    station_dao, series_dao, mes_dao = setup_daos()

    for fname in flist:
        logging.info(f'Processing { fname}')
        with open(fname, 'rb') as f:
            for record in get_jsons_from_stream(stream=f, object_name=fname):
                station, measurement, _ = split_record(record)
                add_to_db(station_dao,
                          series_dao,
                          mes_dao,
                          station=station,
                          measurement=measurement)

    print_db_stats(station_dao, series_dao, mes_dao)
    return True