Python LoadData.Get_AURN_data示例

def Fill_Year_DEFRA_Data(year):
    ## This will be a module to fill up the db with the past values
    ## Likely/hopefully only need this the once.
    all_sites_query = site_info.objects.all()

    ## Need to prioritise input as this takes an absolute age.

    for site in all_sites_query:
        site_name = site.site_name
        site_code = site.site_code
        site_open = site.site_open
        date_open = site.date_open
        date_closed = site.date_closed

        # This skips sites that have already been added to the database
        ### THIS IS NOT A SMART WAY OF DOING THIS BUT IS A TEMP BODGE
        if measurement_data.objects.filter(date_and_time__year=year).filter(
                site_id=site_info.objects.filter(
                    site_name=site_name)).exists():
            continue

        # Don't include sites that are just a quick PM10 site
        # Only inlcudes Brighton Roadside PM10 & Northampton PM10
        if 'PM10' in site_name:
            continue

        if date_open.year > year:
            continue

        # Load in dataframe - could be a memory issue here with the
        # site open the longest
        if site_open:
            date_closed = dt.now()

        # For the time being only get 2018 data
        if site_open:

            print('Getting data for %s: %d - %d (%s)' %
                  (site_name, date_open.year, date_closed.year, site_code))
            df = LoadData.Get_AURN_data(site_name, [year, year],
                                        drop_status_and_units=False)

            DEFRA_AURN_data_to_db(df, site_code)
            print('Submitted to database')

示例#2

显示文件

文件： AQ_data_into_db.py 项目： dpfinch/ukatmosphere

def Get_Latest_AURN_Data(site_name, year):
    # Just add the latest data to the database. This relies on all variables
    # in a site being updated at the same time. Which I think is correct.

    df = LoadData.Get_AURN_data(site_name, [year, year],
                                drop_status_and_units=False)

    # Get the site code - I need a cleaner way of doing this...
    filename = 'dataplot/InfoFiles/DEFRA_AURN_sites_info.csv'
    sites = pd.read_csv(filename)
    site_code = sites['Site Code'].loc[sites['Site Name'] == site_name]
    site_code = site_code.values[0]

    # Query the site info based on the site code
    site_id = site_info.objects.get(site_code=site_code)
    # Get the latest date and time in the database for a given site
    site_measurements = measurement_data.objects.filter(site_id=site_id)
    most_recent_date = site_measurements.latest('date_and_time').date_and_time

    trimmed_df = df.loc[df.index > most_recent_date]

    DEFRA_AURN_data_to_db(trimmed_df, site_code)

示例#3

显示文件

文件： Database_Uploader.py 项目： dpfinch/ukatmosphere

        continue

    site_year_open = site.date_open.year
    site_year_closed = site.date_closed

    if site_year_closed:
        site_year_closed = site_year_closed.year
    else:
        site_year_closed = dt.now().year

    site_year_open = 2020

    for year in range(site_year_open, site_year_closed + 1):
        print('Processing {} data for site {}'.format(year, site.site_name))
        try:
            df = LoadData.Get_AURN_data(site.site_name, [year, year],
                                        drop_status_and_units=False)
        except (HTTPError, URLError) as e:
            print('No web data for {} {}'.format(site.site_name, year))
            continue
        pollutant_cols = []
        status_cols = []
        unit_cols = []

        for c in df.columns:
            if c.split('.')[0].lower() == 'status':
                status_cols.append(c)
            elif c.split('.')[0].lower() == 'unit':
                unit_cols.append(c)
            else:
                pollutant_cols.append(c)

示例#4

显示文件

文件： AQ_data_into_db.py 项目： dpfinch/ukatmosphere

def Update_DEFRA_Data(site_name):
    ## Find where the database still has unverified data in and see
    ## if the DEFRA site has been updated.
    site_id = site_info.objects.filter(site_code=site_code)
    site_data = measurement_info.objects.filter(site_id=site_id)

    # Only get data that hasn't been verified but isn't unknown
    queried_data = site_data.exclude(verified='V').exclude(verified='U')

    # Into a dataframe for ease of use
    current_data = pd.DataFrame.from_records(queried_data.values(
        'date_and_time', 'value', 'verified'),
                                             index='date_and_time')

    # Get the years that still have unverified data
    years = current_data.index.year.unique().values
    # If the range of years is more than two then its unlikely it'll
    # ever be verifed so ignore it
    if len(years) > 2:
        years[years[0], years[1]]

    if max(years) < dt.now().year - 1:
        # If the maximum year is more than a year ago then don't bother doing anything
        pass

    if min(years) < dt.now().year - 1:
        # If the minimum year is more than a year ago then only use recent year
        years = [years[0]]

    # Load in the data again
    new_df = LoadData.Get_AURN_data(site_name, [years[0], years[-1]],
                                    drop_status_and_units=False)

    pollutant_cols = []
    status_cols = []
    unit_cols = []

    for c in df.columns:
        if c.split('.')[0].lower() == 'status':
            status_cols.append(c)
        elif c.split('.')[0].lower() == 'unit':
            unit_cols.append(c)
        else:
            pollutant_cols.append(c)
    # Need to now update the database
    for i, col in enumerate(pollutant_cols):
        # Get the relevant status for the measurement
        status_col = df[status_cols[i]]
        status_col.replace('R', 'V', inplace=True)
        # Fill the nan values with 'U' for unknown - although this will rarely be
        # a problem as all nan status have a matching nan measurement
        status_col.dropna(inplace=True)
        status_col.fillna('U')

        chemical_formula = Get_Chemical_Formula(col)
        measurement_name = 'DEFRA_AURN_%s' % chemical_formula

        temp_col = new_df[col]
        temp_col.dropna(inplace=True)

        for x in range(len(temp_col)):
            # Filter the data by measurement_id, site and time
            # There should only be one data entry for each of these
            data_entry = measurement_data.objects.filter(
                measurement_id=measurement_name).filter(
                    site_id=site_id).filter(date_and_time=temp_col.index[x])[0]

            if data_entry.verified == 'V':
                continue
            elif status_col[x] in ['U', 'N']:
                continue
            elif status_col[x] == data_entry.verified:
                continue
            else:
                data_entry.verified = status_col[x]
                data_entry.value = temp_col[x]
                data_entry.save()