def put_24hr_observations(session): """get yesterdays observations Args session (Session): database session """ # create a repo and pull all the weather stations from NOAA repo = Repository(session) stations = repo.get_all_stations(source='NOAA') # setup the day to retrieve yesterday = dt.datetime.now() - dt.timedelta(hours=24) yesterday = dt.datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day) # apply the api request to each station content = stations.apply( lambda station: make_station_observation_request(station, yesterday.isoformat()), axis=1 ).values # put them all in the db added = 0 for station_measurements in content: repo.put_measurements_from_list(station_measurements) added += len(station_measurements) return added
def upload_data_from_file(csv_file, from_csv=False): """ insert all records contained in file to database Args: csv_file (str): full path of CSV file containing records from_csv (bool): whether to insert into database using CSV or ORM (CSV scales better) Returns: bool: success/exception """ r = Repository() if from_csv: success = r.put_measurements_from_csv(csv_file=csv_file) else: measurements = [] with open(csv_file, "r") as f: for line in f: site_id, param_code, date_time, value = line.strip().split(",") measurement = Measurement( station_id=site_id, metric_id=param_code, date_time=dateutil.parser.parse(date_time), value=float(value) ) measurements.append(measurement) success = r.put_measurements_from_list(measurements=measurements) return success
def fill_noaa_gaps(start_date, end_date, db=settings.DATABASE): """use as needed to fill gaps in weather measurements Args: start_date: the start day, included in API calls end_date: the end day, inclusive """ context = Context(db) session = context.Session() repo = Repository(session) stations = repo.get_all_stations(source='NOAA') total = 0 # loop through each day retrieving observations while start_date <= end_date: content = stations.apply( lambda station: make_station_observation_request(station, start_date.isoformat()), axis=1 ).values # put them all in the db added = 0 for station_measurements in content: try: repo.put_measurements_from_list(station_measurements) except SQLAlchemyError: session.rollback() continue added += len(station_measurements) station = station_measurements[0].station print(f'added {added} measurements for station_id {station_measurements} - {start_date.isoformat()}') start_date += dt.timedelta(days=1) total += added return total