def main(args): filename = args.get("filename") rest_names = args.get("restfilenames") content = args.get("metadata") measurand = args.get("measurand", "temperature") try: for line in content.split("\\n"): line = clean_str(line) if len(line) < 7 or not line[0].isdigit(): continue stationID, heightAboveNN, latitude, longitude, fromDate, toDate, *_ = line fromDate = to_osn_date(fromDate) if toDate: toDate = to_osn_date(toDate) createLocalAndRemoteSensor(stationID, measurand, fromDate, toDate, float(latitude), float(longitude)) result = { "message": "finished given metadata", "filename": filename, "restfilenames": rest_names } return result except Exception as e: secretmanager.complete_sequence(rest_names) result = { "error": "failed metadata because of unkown error - jump to next file" } print(result, e) return result
def main(args): file_name = args.get("filename") rest_names = args.get("restfilenames") if file_name is None: return {"error": "seuquence should be stopped"} try: ftp_url = "ftp://ftp-cdc.dwd.de/" + args.get("ftp_url", "climate_environment/CDC/observations_germany/climate/hourly/air_temperature/recent/") inner_file_name = "COULD NOT GET FILENAME" sensorzip = urlopen(ftp_url + file_name) memfile = io.BytesIO(sensorzip.read()) with ZipFile(memfile, 'r') as myzip: try: for z_info in myzip.filelist: if z_info.filename.startswith("produkt"): inner_file_name = z_info.filename except Exception as e: print(e) finally: csv_file_value_data = myzip.open(inner_file_name) result = {"csv": str(csv_file_value_data.read())[2:-1], "restfilenames": rest_names} print("send in get csv") return result except Exception as e: secretmanager.complete_sequence(rest_names) result = {"error": "failed metadata because of unkown error - jump to next file"} print(result, e) return result
def main(args): csv = args.get("csv") rest_names = args.get("restfilenames") measurand = args.get("measurand", 'temperature') if csv is None: return {"error": "seuquence should be stopped"} try: api.login(username=secretmanager.__OSNUSERNAME__, password=secretmanager.__OSNPASSWORD__) lines = csv.splitlines() first_line, lines = lines[0], lines[1:] handle_content_data(first_line=first_line, lines=lines) except Exception as e: print("Exception {}".format(e)) finally: secretmanager.complete_sequence(rest_names) return {"message": "finished"}
def main(args): filename = args.get('filename') rest_names = args.get('restfilenames') content = args.get('metadata') measurand = args.get('measurand', 'temperature') try: parse_metadata(content, measurand) return { 'message': 'finished given metadata', 'filename': filename, 'restfilenames': rest_names } except Exception as e: secretmanager.complete_sequence(rest_names) result = { 'error': 'failed metadata because of unkown error - jump to next file' } print(result, e) return result
def main(args): inner_file_name = "COULD NOT GET FILENAME" file_name = args.get("filename") rest_names = args.get("restfilenames") try: ftp_url = "ftp://ftp-cdc.dwd.de/" + args.get( "ftp_url", "climate_environment/CDC/observations_germany/climate/hourly/air_temperature/recent/" ) sensorzip = urlopen(ftp_url + file_name) memfile = io.BytesIO(sensorzip.read()) with ZipFile(memfile, 'r') as myzip: try: for z_info in myzip.filelist: substrings = z_info.filename.split("_") if substrings[0] == "Stationsmetadaten" or ( substrings[0] == "Metadaten" and substrings[1] == "Geographie"): inner_file_name = z_info.filename except Exception as e: print(e) finally: meta_data = myzip.open(inner_file_name) result = { "metadata": str(meta_data.read())[2:-1], "filename": file_name, "restfilenames": rest_names } print("send in get metadata", result) return result except Exception as e: secretmanager.complete_sequence(rest_names) result = { "message": "failed metadata because of unkown error - jump to next file" } return result
def main(args): csv = args.get("csv") rest_names = args.get("restfilenames") measurand = args.get("measurand", 'temperature') if csv is None: return {"error": "seuquence should be stopped"} try: logged_action = False lines = csv.split("\\r\\n") first_line = lines.pop(0) lines.pop(0) first_line = clean_str(first_line) len_first_line = len(first_line) field_defs = get_indices(first_line) print("fielddefs", field_defs) if len_first_line < 5: raise Exception( f'Nr of fields is lower than expected: {first_line}') dwd_id_idx, date_idx, quality_idx, structure_version_idx, \ air_temperature_idx, humidity_idx, cloudiness_idx, \ precipitation_yes_no_idx, precipitation_amount_idx, precipitation_type_idx, \ air_pressure_nn_idx, air_pressure_idx, \ sunshine_mins_per_hour_idx, wind_speed_idx, wind_direction_idx = field_defs if dwd_id_idx is None: raise Exception( f'File does not contain a dwd_id index: {field_defs}') if date_idx is None: raise Exception( f'File does not contain a Timestamp index: {field_defs}') if quality_idx is None: pass # Not Implemented yet and not essential if structure_version_idx is None: pass # Not Implemented yet and not essential print("len lines", len(lines)) lines = [clean_str(x) for x in lines if not (x is None or x == '')] dwd_id = lines[0][dwd_id_idx] if not dwd_id.isdigit() or dwd_id is None: raise Exception(f'Not a valid dwd_id: {dwd_id}') print("dwdid", dwd_id) lines = sorted(lines, key=lambda x: x[date_idx]) lines = [ line for line in lines if len(line) > 5 and len(line) == len_first_line ] print("lenlines", len(lines)) lines = list(zip(*lines)) # transpose dates = [] hours = [] iso_dates = [] for ts in lines[date_idx]: year, month, day, hour = ts[:4], ts[4:6], ts[6:8], ts[-2:] dates.append(f'{year}-{month}-{day}') hours.append(f'{year}-{month}-{day}-{hour}') iso_dates.append( datetime(int(year), int(month), int(day), int(hour)).isoformat()) # replace with strptime? print('-' * 50) valuebulk = {'collapsedMessages': []} messages = valuebulk['collapsedMessages'] def _add_float_values(_idx: int, _i: int, _j: int, _osn_id: int): for _iso_date, _value in zip(iso_dates[_i:_j], lines[_idx][_i:_j]): _value = to_float_or_none(_value) if _value is not None and _value != -999.0: messages.append({ 'sensorId': _osn_id, 'timestamp': _iso_date, 'numberValue': _value }) def _add_cloudiness_values(_idx: int, _i: int, _j: int, _osn_id: int): for _iso_date, _value in zip(iso_dates[_i:_j], lines[_idx][_i:_j]): _value = to_int_or_none(_value) if _value is not None and 0 < _value < 8: _value *= 1 / 8 # map to float between 0 and 1 messages.append({ 'sensorId': _osn_id, 'timestamp': _iso_date, 'numberValue': _value }) def _process_chunks(_idx: int, _chunks: tuple, _sensors: dict, _measurand: str, _add_values: Callable): nonlocal valuebulk, messages, logged_action print("chunks ", _chunks) _local_id = f'{dwd_id}-{_measurand}' with mongo_conn(mongo_db_url) as collection: if not logged_action: collection.update({"_id": 5}, {"$inc": {"actionCount": 1}}) logged_action = True for _sensor_id in _chunks: _sensor = _sensors[_sensor_id] _osn_id = _sensor['osn_id'] _sensor_idx = _sensor['idx'] _chunk = _chunks[_sensor_id] if _sensor['earliest_sent_value'] == '': collection.update_one( filter={'local_id': _local_id}, update={ '$set': { f'sensors.{_sensor_idx}.earliest_sent_value': dates[_chunk[0]] } }) for i, j in batchify(*_chunk, max_batch_size=3000): _add_values(_idx, i, j, _osn_id) t0 = time.time() collection.update( {"_id": 2}, {"$inc": { "aimedValueCount": len(messages) }}) if osn_push_valuebulk(valuebulk): print( f'Pushed {len(messages)} values to OSN. took: {round(time.time() - t0, 5)} sec' ) collection.update_one( filter={'local_id': _local_id}, update={ '$set': { f'sensors.{_sensor_idx}.latest_sent_value': dates[j - 1] } }) # count pushed values in whole process collection.update( {"_id": 2}, {"$inc": { "valueCount": len(messages) }}) valuebulk['collapsedMessages'] = [] messages = valuebulk['collapsedMessages'] else: valuebulk['collapsedMessages'] = [] messages = valuebulk['collapsedMessages'] continue # this should probably be an exception... def _update(_measurand: str, _func: Callable): _sensors = mongo_sensors_by_local_id(dwd_id, _measurand) _chunks = seperate_by_sensor(dates, _sensors, dwd_id) _process_chunks(air_temperature_idx, _chunks, _sensors, _measurand, _func) if measurand == 'temperature': if air_temperature_idx is not None: _update('temperature', _add_float_values) if humidity_idx is not None: _update('humidity', _add_float_values) elif measurand == 'cloudiness': if cloudiness_idx is not None: _update('cloudiness', _add_cloudiness_values) elif measurand == 'air_pressure': if air_pressure_idx is not None: _update('air_pressure', _add_float_values) if air_pressure_nn_idx is not None: _update('air_pressure_nn', _add_float_values) elif measurand == 'wind_speed': if wind_speed_idx is not None: _update('wind_speed', _add_float_values) if wind_direction_idx is not None: _update('wind_direction', _add_float_values) except Exception as e: print("Exception {}".format(e)) finally: secretmanager.complete_sequence(rest_names) return {"message": "finished"}