示例#1
0
    def insert_data(self, timeseries, tms_id, fgt, upsert=False):
        """
        Insert timeseries to Data table in the database
        :param tms_id: hash value
        :param fgt: forecast generated time
        :param timeseries: list of [time, value] lists
        :param boolean upsert: If True, upsert existing values ON DUPLICATE KEY. Default is False.
        Ref: 1). https://stackoverflow.com/a/14383794/1461060
             2). https://chartio.com/resources/tutorials/how-to-insert-if-row-does-not-exist-upsert-in-mysql/
        :return: row count if insertion was successful, else raise DatabaseAdapterError
        """

        new_timeseries = []
        for t in [i for i in timeseries]:
            if len(t) > 1:
                # Insert EventId in front of timestamp, value list
                t.insert(0, tms_id)
                t.insert(2, fgt)
                new_timeseries.append(t)
            else:
                logger.warning('Invalid timeseries data:: %s', t)

        row_count = 0

        connection = self.pool.connection()
        try:
            with connection.cursor() as cursor:
                if upsert:
                    sql_statement = "INSERT INTO `data` (`id`, `time`, `fgt`, `value`) VALUES (%s, %s, %s, %s) " \
                                    "ON DUPLICATE KEY UPDATE `value`=VALUES(`value`)"
                else:
                    sql_statement = "INSERT INTO `data` (`id`, `time`, `fgt`, `value`) VALUES (%s, %s, %s, %s)"
                row_count = cursor.executemany(sql_statement, new_timeseries)
            connection.commit()
            return row_count
        except Exception as exception:
            connection.rollback()
            error_message = "Data insertion to data table for tms id {}, upsert={} failed.".format(
                timeseries[0][0], upsert)
            logger.error(error_message)
            traceback.print_exc()
            raise exception

        finally:
            if connection is not None:
                connection.close()
示例#2
0
def extract_distrubuted_hechms_outputs(output_file_name, output_dir, run_date,
                                       run_time):
    """
    Config.json 
    {
      "output_file_name": "DailyDischarge.csv",
      "output_dir": "",

      "run_date": "2019-05-24",
      "run_time": "00:00:00",
      "utc_offset": "",

      "sim_tag": "hourly_run",

      "model": "HECHMS",
      "version": "single",

      "unit": "m3/s",
      "unit_type": "Instantaneous",

      "variable": "Discharge",

      "station_name": "Hanwella"
    }

    """
    try:

        config = json.loads(open('config.json').read())

        # output related details
        output_file_name = output_file_name
        output_dir = output_dir

        run_date = run_date
        run_time = run_time

        utc_offset = read_attribute_from_config_file('utc_offset', config,
                                                     False)
        if utc_offset is None:
            utc_offset = ''

        # sim tag
        sim_tag = read_attribute_from_config_file('sim_tag', config, True)

        # source details
        model = read_attribute_from_config_file('model', config, True)
        version = read_attribute_from_config_file('version', config, True)

        # unit details
        unit = read_attribute_from_config_file('unit', config, True)
        unit_type = UnitType.getType(
            read_attribute_from_config_file('unit_type', config, True))

        # variable details
        variable = read_attribute_from_config_file('variable', config, True)

        # station details
        station_name = read_attribute_from_config_file('station_name', config,
                                                       True)

        out_file_path = os.path.join(output_dir, output_file_name)

        if not os.path.exists(out_file_path):
            msg = 'no file :: {}'.format(out_file_path)
            logger.warning(msg)
            print(msg)
            exit(1)

        fgt = get_file_last_modified_time(out_file_path)
        print("fgt, ", fgt)

        timeseries = read_csv(out_file_path)

        pool = get_Pool(host=CURW_FCST_HOST,
                        port=CURW_FCST_PORT,
                        db=CURW_FCST_DATABASE,
                        user=CURW_FCST_USERNAME,
                        password=CURW_FCST_PASSWORD)

        hechms_stations = get_hechms_stations(pool=pool)

        station_id = hechms_stations.get(station_name)[0]
        lat = str(hechms_stations.get(station_name)[1])
        lon = str(hechms_stations.get(station_name)[2])

        source_id = get_source_id(pool=pool, model=model, version=version)

        variable_id = get_variable_id(pool=pool, variable=variable)

        unit_id = get_unit_id(pool=pool, unit=unit, unit_type=unit_type)

        tms_meta = {
            'sim_tag': sim_tag,
            'model': model,
            'version': version,
            'variable': variable,
            'unit': unit,
            'unit_type': unit_type.value,
            'latitude': lat,
            'longitude': lon,
            'station_id': station_id,
            'source_id': source_id,
            'variable_id': variable_id,
            'unit_id': unit_id
        }

        utcOffset = getUTCOffset(utc_offset, default=True)

        if utcOffset != timedelta():
            tms_meta['utcOffset'] = utcOffset

        # Push timeseries to database
        save_forecast_timeseries_to_db(pool=pool,
                                       timeseries=timeseries,
                                       run_date=run_date,
                                       run_time=run_time,
                                       tms_meta=tms_meta,
                                       fgt=fgt)

    except Exception as e:
        logger.error('JSON config data loading error.')
        print('JSON config data loading error.')
        traceback.print_exc()
    finally:
        logger.info("Process finished.")
        print("Process finished.")
        unit = read_attribute_from_config_file('unit', config, True)
        unit_type = UnitType.getType(
            read_attribute_from_config_file('unit_type', config, True))

        # variable details
        variable = read_attribute_from_config_file('variable', config, True)

        # station details
        station_name = read_attribute_from_config_file('station_name', config,
                                                       True)

        out_file_path = os.path.join(output_dir, output_file_name)

        if not os.path.exists(out_file_path):
            msg = 'no file :: {}'.format(out_file_path)
            logger.warning(msg)
            print(msg)
            exit(1)

        fgt = get_file_last_modified_time(out_file_path)
        print("fgt, ", fgt)

        timeseries = read_csv(out_file_path)

        pool = get_Pool(host=CURW_FCST_HOST,
                        port=CURW_FCST_PORT,
                        db=CURW_FCST_DATABASE,
                        user=CURW_FCST_USERNAME,
                        password=CURW_FCST_PASSWORD)

        hechms_stations = get_hechms_stations(pool=pool)
示例#4
0
def extract_distrubuted_hechms_outputs(target_model, db_user, db_pwd, db_host,
                                       db_name, out_file_path, run_date,
                                       run_time):
    """
    Config.json
    {
      "output_file_name": "DailyDischarge.csv",
      "output_dir": "",
      "run_date": "2019-05-24",
      "run_time": "00:00:00",
      "utc_offset": "",
      "sim_tag": "hourly_run",
      "model": "HECHMS",
      "version": "single",
      "unit": "m3/s",
      "unit_type": "Instantaneous",
      "variable": "Discharge",
      "station_name": "Hanwella"
    }
    """
    try:

        config = json.loads(
            open('/home/curw/git/distributed_hechms/uploads/config.json').read(
            ))

        # output related details
        run_date = run_date
        run_time = run_time

        utc_offset = read_attribute_from_config_file('utc_offset', config,
                                                     False)
        if utc_offset is None:
            utc_offset = ''

        # sim tag
        sim_tag = read_attribute_from_config_file('sim_tag', config, True)
        print("extract_distrubuted_hechms_outputs|sim_tag : ", sim_tag)
        # source details
        model = read_attribute_from_config_file('model', config, True)
        print("extract_distrubuted_hechms_outputs|model : ", model)

        version_config = read_attribute_from_config_file(
            'version_config', config, True)
        print("extract_distrubuted_hechms_outputs|version_config : ",
              version_config)

        version = version_config[target_model]
        print("extract_distrubuted_hechms_outputs|version : ", version)

        # unit details
        unit = read_attribute_from_config_file('unit', config, True)
        print("extract_distrubuted_hechms_outputs|unit : ", unit)
        unit_type = UnitType.getType(
            read_attribute_from_config_file('unit_type', config, True))
        print("extract_distrubuted_hechms_outputs|unit_type : ", unit_type)

        # variable details
        variable = read_attribute_from_config_file('variable', config, True)
        print("extract_distrubuted_hechms_outputs|variable : ", variable)

        # station details
        station_name = read_attribute_from_config_file('station_name', config,
                                                       True)
        print("extract_distrubuted_hechms_outputs|station_name : ",
              station_name)

        if not os.path.exists(out_file_path):
            msg = 'no file :: {}'.format(out_file_path)
            logger.warning(msg)
            print(msg)
            exit(1)

        fgt = get_file_last_modified_time(out_file_path)
        print("extract_distrubuted_hechms_outputs|fgt : ", fgt)

        timeseries = read_csv(out_file_path)

        pool = get_Pool(host=db_host,
                        port=3306,
                        db=db_name,
                        user=db_user,
                        password=db_pwd)

        hechms_stations = get_hechms_stations(pool=pool)
        print("extract_distrubuted_hechms_outputs|hechms_stations : ",
              hechms_stations)

        station_id = hechms_stations.get(station_name)[0]
        lat = str(hechms_stations.get(station_name)[1])
        lon = str(hechms_stations.get(station_name)[2])
        print("extract_distrubuted_hechms_outputs|[station_id, lat, lon] : ",
              [station_id, lat, lon])

        source_id = get_source_id(pool=pool, model=model, version=version)
        print("extract_distrubuted_hechms_outputs|source_id : ", source_id)

        variable_id = get_variable_id(pool=pool, variable=variable)
        print("extract_distrubuted_hechms_outputs|variable_id : ", variable_id)

        unit_id = get_unit_id(pool=pool, unit=unit, unit_type=unit_type)
        print("extract_distrubuted_hechms_outputs|unit_id : ", unit_id)

        tms_meta = {
            'sim_tag': sim_tag,
            'model': model,
            'version': version,
            'variable': variable,
            'unit': unit,
            'unit_type': unit_type.value,
            'latitude': lat,
            'longitude': lon,
            'station_id': station_id,
            'source_id': source_id,
            'variable_id': variable_id,
            'unit_id': unit_id
        }

        print("extract_distrubuted_hechms_outputs|tms_meta : ", tms_meta)
        utcOffset = getUTCOffset(utc_offset, default=True)

        if utcOffset != timedelta():
            tms_meta['utcOffset'] = utcOffset

        # Push timeseries to database
        save_forecast_timeseries_to_db(pool=pool,
                                       timeseries=timeseries,
                                       run_date=run_date,
                                       run_time=run_time,
                                       tms_meta=tms_meta,
                                       fgt=fgt)
        return {'Result': 'Success'}
    except Exception as e:
        logger.error('JSON config data loading error.')
        print('JSON config data loading error.')
        traceback.print_exc()
        return {'Result': 'Fail'}
示例#5
0
    CURW_SIM_DATABASE = read_attribute_from_config_file('CURW_SIM_DATABASE', config)


def read_attribute_from_config_file(attribute, config):
    """
    :param attribute: key name of the config json file
    :param config: loaded json file
    :return:
    """
    if attribute in config and (config[attribute]!=""):
        return config[attribute]
    else:
        logger.error("{} not specified in config file.".format(attribute))


def set_db_config_file_path(db_config_file_path):
    global DB_CONFIG_FILE_PATH

    DB_CONFIG_FILE_PATH = db_config_file_path

    set_variables()


try:
    set_variables()

except FileNotFoundError:
    logger.warning("db_adapter_config.json file does not exists in the calling directory path !!!")


def read_netcdf_file(pool, rainnc_net_cdf_file_path, tms_meta):
    """

    :param pool: database connection pool
    :param rainnc_net_cdf_file_path:
    :param source_id:
    :param variable_id:
    :param unit_id:
    :param tms_meta:
    :return:

    rainc_unit_info:  mm
    lat_unit_info:  degree_north
    time_unit_info:  minutes since 2019-04-02T18:00:00
    """
    if not os.path.exists(rainnc_net_cdf_file_path):
        msg = 'no rainnc netcdf :: {}'.format(rainnc_net_cdf_file_path)
        logger.warning(msg)
        email_content[datetime.now().strftime(COMMON_DATE_TIME_FORMAT)] = msg
        return False
    else:

        try:
            """
            RAINNC netcdf data extraction
    
            """
            fgt = get_file_last_modified_time(rainnc_net_cdf_file_path)

            nnc_fid = Dataset(rainnc_net_cdf_file_path, mode='r')

            time_unit_info = nnc_fid.variables['XTIME'].units

            time_unit_info_list = time_unit_info.split(' ')

            lats = nnc_fid.variables['XLAT'][0, :, 0]
            lons = nnc_fid.variables['XLONG'][0, 0, :]

            lon_min = lons[0].item()
            lat_min = lats[0].item()
            lon_max = lons[-1].item()
            lat_max = lats[-1].item()

            lat_inds = np.where((lats >= lat_min) & (lats <= lat_max))
            lon_inds = np.where((lons >= lon_min) & (lons <= lon_max))

            rainnc = nnc_fid.variables['RAINNC'][:, lat_inds[0], lon_inds[0]]

            times = nnc_fid.variables['XTIME'][:]

            start_date = fgt
            end_date = fgt

            nnc_fid.close()

            diff = get_per_time_slot_values(rainnc)

            width = len(lons)
            height = len(lats)

            ts = Timeseries(pool)

            for y in range(height):
                for x in range(width):

                    lat = float('%.6f' % lats[y])
                    lon = float('%.6f' % lons[x])

                    tms_meta['latitude'] = str(lat)
                    tms_meta['longitude'] = str(lon)

                    station_prefix = 'wrf_{}_{}'.format(lat, lon)

                    station_id = wrf_v3_stations.get(station_prefix)

                    if station_id is None:
                        add_station(pool=pool,
                                    name=station_prefix,
                                    latitude=lat,
                                    longitude=lon,
                                    description="WRF point",
                                    station_type=StationEnum.WRF)
                        station_id = get_station_id(
                            pool=pool,
                            latitude=lat,
                            longitude=lon,
                            station_type=StationEnum.WRF)

                    tms_id = ts.get_timeseries_id_if_exists(tms_meta)

                    if tms_id is None:
                        tms_id = ts.generate_timeseries_id(tms_meta)

                        run_meta = {
                            'tms_id': tms_id,
                            'sim_tag': tms_meta['sim_tag'],
                            'start_date': start_date,
                            'end_date': end_date,
                            'station_id': station_id,
                            'source_id': tms_meta['source_id'],
                            'unit_id': tms_meta['unit_id'],
                            'variable_id': tms_meta['variable_id']
                        }
                        try:
                            ts.insert_run(run_meta)
                        except Exception:
                            logger.error(
                                "Exception occurred while inserting run entry {}"
                                .format(run_meta))
                            traceback.print_exc()

                    data_list = []
                    # generate timeseries for each station
                    for i in range(len(diff)):
                        ts_time = datetime.strptime(
                            time_unit_info_list[2],
                            '%Y-%m-%dT%H:%M:%S') + timedelta(
                                minutes=times[i + 1].item())
                        t = datetime_utc_to_lk(ts_time, shift_mins=0)
                        data_list.append([
                            tms_id,
                            t.strftime('%Y-%m-%d %H:%M:%S'), fgt,
                            float(diff[i, y, x])
                        ])

                    push_rainfall_to_db(ts=ts,
                                        ts_data=data_list,
                                        tms_id=tms_id,
                                        fgt=fgt)
            return True
        except Exception as e:
            msg = "netcdf file at {} reading error.".format(
                rainnc_net_cdf_file_path)
            logger.error(msg)
            traceback.print_exc()
            email_content[datetime.now().strftime(
                COMMON_DATE_TIME_FORMAT)] = msg
            return False