def update_indicator_table(self, csvfile): """ Insert water quality measurement data from a csv file into the appropriate indicator database table. :param csvfile: CSV file containing measurement data :return: None """ date_cols = ("ActivityStartDate", "ActivityEndDate") indicator = csvfile.replace('_Result.csv', '') if not table_exists(indicator): self.create_indicator_table(indicator) with open(os.path.join(self.tmp_dir, csvfile), 'r') as csvin: csvreader = csv.reader(csvin) headers = None for row in csvreader: if not headers: headers = ['"{}"'.format(x.replace('/', '_')) for x in row] else: insert_sql = 'INSERT INTO "{}" ({}) SELECT \n'.format( indicator, ','.join(headers) ) query_format = [] for i, val in enumerate(row): attribute = headers[i].strip('"') id_idx = headers.index('"ActivityIdentifier"') query_format.append("%s") if attribute in date_cols and val: time_idx = headers.index( '"{}_Time"'.format( attribute.replace("Date", "Time"))) zone_idx = headers.index( '"{}_TimeZoneCode"'.format( attribute.replace("Date", "Time"))) time_str = "{} {} {}".format( val, row[time_idx], row[zone_idx]) row[i] = time_str else: if not val or val == '.' or val == 'None': row[i] = None insert_sql += '{}'.format( ','.join('{}'.format(x) for x in query_format)) + \ ' WHERE NOT EXISTS (SELECT 1 from ' + \ '{} WHERE "ActivityIdentifier" = \'{}\');'.format( indicator, re.sub('\'{1}', '\'\'', row[id_idx])) postgres_query(insert_sql, params=tuple(row), commit=True) purge_old_data(indicator, date_cols[0], self.days_to_keep) if not table_exists(indicator + self.suffix): view_sql = 'CREATE OR REPLACE VIEW ' + indicator + self.suffix + \ ' AS SELECT i.*, g.wkb_geometry from ' + indicator + ' i ' + \ ' INNER JOIN ' + self.station_table + ' g on ' + \ ' i."MonitoringLocationIdentifier" = ' + \ ' g.monitoringlocationidentifier;' postgres_query(view_sql, commit=True) self.post_geoserver_vector(indicator + self.suffix)
def __init__(self, table, cities): self.cities = cities self.prefix = table self.archive = self.prefix + "_archive" self.max_wait = 5 if not table_exists(self.archive): postgres_query(AQICN_TABLE.format(table=self.archive), commit=True)
def process(self): if not table_exists(self.prefix): postgres_query(AQICN_TABLE.format(table=self.prefix), commit=True) logger.debug("Start %s" % datetime.datetime.now()) if not self.cities: self.getCities() logger.debug("There are %s cities" % str(len(self.cities))) pool = ThreadPool(self.pool_size) for citylist in self.split_list(self.pool_size): pool.apply_async(thread_parse, args=(self.prefix, citylist)) pool.close() pool.join()
def run(self): if not table_exists(self.prefix): postgres_query(WHISP_TABLE.format(table=self.prefix), commit=True) self.import_archive() self.scrape() if not layer_exists(self.prefix, ogc_server_settings.server.get('DATASTORE'), DEFAULT_WORKSPACE): self.post_geoserver_vector(self.prefix) if not style_exists(self.prefix): with open(os.path.join(script_dir, 'resources/whisp.sld')) as sld: self.set_default_style(self.prefix, self.prefix, sld.read()) self.update_geonode(self.prefix, title=self.title, description=self.description) self.truncate_gs_cache(self.prefix) self.cleanup()
def update_station_table(self, csvfile): """ Insert data on water quality monitoring stations from a csv file into the database :param csvfile: CSV file containing station data :return: None """ vrt_content = ("""<OGRVRTDataSource> <OGRVRTLayer name="{name}"> <SrcDataSource>{csv}</SrcDataSource> <GeometryType>wkbPoint</GeometryType> <LayerSRS>WGS84</LayerSRS> <GeometryField encoding="PointFromColumns" x="LongitudeMeasure" y="LatitudeMeasure"/> </OGRVRTLayer> </OGRVRTDataSource> """) station_table = self.station_table needs_index = not table_exists(station_table) db = ogc_server_settings.datastore_db vrt_file = os.path.join(self.tmp_dir, csvfile.replace('.csv', '.vrt')) csv_name = os.path.basename(csvfile).replace(".csv", "") if not os.path.exists(vrt_file): with open(vrt_file, 'w') as vrt: vrt.write( vrt_content.format(name=csv_name, csv=os.path.join(self.tmp_dir, csvfile))) ogr2ogr_exec("-append -skipfailures -f PostgreSQL \ \"PG:host={db_host} user={db_user} password={db_pass} \ dbname={db_name}\" {vrt} -nln {table}".format( db_host=db["HOST"], db_user=db["USER"], db_pass=db["PASSWORD"], db_name=db["NAME"], vrt="{}".format(vrt_file), table=station_table)) if needs_index: sql = 'ALTER TABLE {} '.format(station_table) + \ 'ADD CONSTRAINT monitoringlocationidentifier_key ' + \ 'UNIQUE (monitoringlocationidentifier)' logger.debug(sql) postgres_query(sql, commit=True)
def update_station_table(self, csvfile): """ Insert data on water quality monitoring stations from a csv file into the database :param csvfile: CSV file containing station data :return: None """ vrt_content = ( """<OGRVRTDataSource> <OGRVRTLayer name="{name}"> <SrcDataSource>{csv}</SrcDataSource> <GeometryType>wkbPoint</GeometryType> <LayerSRS>WGS84</LayerSRS> <GeometryField encoding="PointFromColumns" x="LongitudeMeasure" y="LatitudeMeasure"/> </OGRVRTLayer> </OGRVRTDataSource> """) station_table = self.station_table needs_index = not table_exists(station_table) db = ogc_server_settings.datastore_db vrt_file = os.path.join(self.tmp_dir, csvfile.replace('.csv', '.vrt')) csv_name = os.path.basename(csvfile).replace(".csv", "") if not os.path.exists(vrt_file): with open(vrt_file, 'w') as vrt: vrt.write(vrt_content.format( name=csv_name, csv=os.path.join(self.tmp_dir, csvfile))) ogr2ogr_exec("-append -skipfailures -f PostgreSQL \ \"PG:host={db_host} user={db_user} password={db_pass} \ dbname={db_name}\" {vrt} -nln {table}".format( db_host=db["HOST"], db_user=db["USER"], db_pass=db["PASSWORD"], db_name=db["NAME"], vrt="{}".format(vrt_file), table=station_table)) if needs_index: sql = 'ALTER TABLE {} '.format(station_table) + \ 'ADD CONSTRAINT monitoringlocationidentifier_key ' + \ 'UNIQUE (monitoringlocationidentifier)' logger.debug(sql) postgres_query(sql, commit=True)
def update_indicator_table(self, csvfile): """ Insert water quality measurement data from a csv file into the appropriate indicator database table. :param csvfile: CSV file containing measurement data :return: None """ date_cols = ("ActivityStartDate", "ActivityEndDate") indicator = csvfile.replace('_Result.csv', '') if not table_exists(indicator): self.create_indicator_table(indicator) with open(os.path.join(self.tmp_dir, csvfile), 'r') as csvin: csvreader = csv.reader(csvin) headers = None for row in csvreader: if not headers: headers = ['"{}"'.format(x.replace('/', '_')) for x in row] else: insert_sql = 'INSERT INTO "{}" ({}) SELECT \n'.format( indicator, ','.join(headers) ) query_format = [] for i, val in enumerate(row): attribute = headers[i].strip('"') id_idx = headers.index('"ActivityIdentifier"') query_format.append("%s") if attribute in date_cols and val: time_idx = headers.index( '"{}_Time"'.format( attribute.replace("Date", "Time"))) zone_idx = headers.index( '"{}_TimeZoneCode"'.format( attribute.replace("Date", "Time"))) time_str = "{} {} {}".format( val, row[time_idx], row[zone_idx]) row[i] = time_str else: if not val or val == '.' or val == 'None': row[i] = None insert_sql += '{}'.format( ','.join('{}'.format(x) for x in query_format)) + \ ' WHERE NOT EXISTS (SELECT 1 from ' + \ '{} WHERE "ActivityIdentifier" = \'{}\');'.format( indicator, re.sub('\'{1}', '\'\'', row[id_idx])) try: postgres_query( insert_sql, params=tuple(row), commit=True) except Exception as e: logger.error("The query failed: {} with parameters: {}". format(insert_sql, row)) logger.error(traceback.format_exc()) if not self.skip_errors: raise e purge_old_data(indicator, date_cols[0], self.days_to_keep) if not table_exists(indicator + self.suffix): view_sql = 'CREATE OR REPLACE VIEW ' + indicator + self.suffix + \ ' AS SELECT i.*, g.wkb_geometry from ' + indicator + ' i ' + \ ' INNER JOIN ' + self.station_table + ' g on ' + \ ' i."MonitoringLocationIdentifier" = ' + \ ' g.monitoringlocationidentifier;' postgres_query(view_sql, commit=True) self.post_geoserver_vector(indicator + self.suffix)