Exemplos de normalize_numeric em Python, exemplos de util.normalize_numeric em Python

Exemplo n.º 1

0

Exibir arquivo

def get_data(test, sql=False, query='throughput_gas_monthly.sql'):

    csvName = query.split(".")[0]+'.csv'
    if sql:
        print('reading sql '+query.split(".")[0])
        df = execute_sql(path=os.path.join(script_dir, "/queries"), query_name=query, db='EnergyData')
        df.to_csv('raw_data/'+csvName, index=False)
    elif test:
        print('reading test '+query.split(".")[0])
        df = pd.read_csv('raw_data/test_data/'+csvName)
    else:
        print('reading local '+query.split(".")[0])
        df = pd.read_csv('raw_data/'+csvName, encoding='latin-1')

    # inital processing for key points
    if query == 'key_points.sql':
        # add extra key points that dont appear in database
        new = range(5)
        others = pd.DataFrame.from_dict({"Key Point": ["Calgary", "Edmonton", "Saturn", "OSDA Kirby", "OSDA Liege"],
                                         "Corporate Entity": ["NOVA Gas Transmission Ltd." for x in new],
                                         "Latitude": [51.22022, 51.80478, 55.99558, 53.31907, 56.9473],
                                         "Longitude": [-114.4925, -113.59329, -121.1104, -111.35386, -111.80979]})
        df = fixKeyPoint(df)
        df = df.append(others, ignore_index=True)
        df = normalize_text(df, ['Key Point', 'Corporate Entity'])
        df = normalize_numeric(df, ['Latitude', 'Longitude'], 3)
        df = fixCorporateEntity(df)
        df = df[df['Key Point'] != "FortisBC Lower Mainland"]

    return df

Exemplo n.º 2

0

Exibir arquivo

Arquivo: apportionment.py Projeto: mbradds/pipeline-profiles

def process_apportionment(save=False, sql=False, companies=False):

    if sql:
        df = get_data(os.getcwd(), "apportionment.sql", "PipelineInformation",
                      sql)
    else:
        print('reading local apportionment csv...')
        df = pd.read_csv("./raw_data/apportionment.csv")

    df = normalize_dates(df, ['Date'])
    df = normalize_text(df, ['Pipeline Name'])
    # enbridge processing
    df = df.drop(df[(df['Pipeline Name'] == 'EnbridgeMainline')
                    & (df['KeyPointID'].isin(['KP0016', 'KP0021']))].index)
    df = df.drop(df[(df['Pipeline Name'] == 'EnbridgeMainline')
                    & (df['Date'].dt.year < 2016)].index)
    # cochin processing
    df = df.drop(df[(df['Pipeline Name'] == 'Cochin')
                    & (df['KeyPointID'] != 'KP0018')].index)
    df = df[~df['Pipeline Name'].
            isin(["SouthernLights", "Westpur", "TransNorthern"])].reset_index(
                drop=True)

    df = df.rename(columns={x: x.split("(")[0].strip() for x in df.columns})
    num_cols = [
        'Available Capacity', 'Original Nominations', 'Accepted Nominations',
        'Apportionment Percentage'
    ]
    df = normalize_numeric(df, num_cols, 2)
    df = conversion(df, "oil", num_cols[:-1], 2, False)
    df['Apportionment Percentage'] = df['Apportionment Percentage'].round(2)
    company_files = get_company_list("all")

    if companies:
        company_files = companies

    enbridge_points = get_enbridge_points(sql)
    df = sort_by_points(df)

    for company in company_files:
        try:
            this_company_data = process_company(df, company, enbridge_points,
                                                save)
            print("completed: " + company)
        except:
            print("apportionment error: " + company)
            raise

    return this_company_data

Exemplo n.º 3

0

Exibir arquivo

def get_traffic_data(sql=False,
                     query='throughput_gas_monthly.sql',
                     db="PipelineInformation"):

    csv_name = query.split(".")[0] + '.csv'
    if sql:
        print('reading sql ' + query.split(".")[0])
        df = execute_sql(path=os.path.join(os.getcwd(), "queries"),
                         query_name=query,
                         db=db)
        df.to_csv('raw_data/' + csv_name, index=False)

    else:
        print('reading local ' + query.split(".")[0])
        df = pd.read_csv('raw_data/' + csv_name, encoding='utf-8')

    # inital processing for key points
    if query == 'key_points.sql':
        df = normalize_text(df, ['Key Point', 'Pipeline Name'])
        df = normalize_numeric(df, ['Latitude', 'Longitude'], 3)

    return df

Exemplo n.º 4

0

Exibir arquivo

Arquivo: apportionment.py Projeto: jrodioukova/pipeline-profiles

def process_apportionment(test=False, sql=False, companies=False):

    if sql:
        df = get_data(False, True, "apportionment.sql")
    elif test:
        print('no tests for apportionment data!')
    else:
        print('reading local apportionment csv...')
        df = pd.read_csv("./raw_data/apportionment.csv")

    df = normalize_dates(df, ['Date'])
    df = normalize_text(df, ['Corporate Entity'])
    # enbridge processing
    df = df.drop(df[(df['Corporate Entity'] == 'Enbridge Pipelines Inc.') & (df['Key Point'] != 'system')].index)
    df = df.drop(df[(df['Corporate Entity'] == 'Enbridge Pipelines Inc.') & (df['Date'].dt.year < 2016)].index)
    # cochin processing
    df = df.drop(df[(df['Corporate Entity'] == 'PKM Cochin ULC') & (df['Key Point'] != 'Ft. Saskatchewan')].index)
    df = df[~df['Pipeline Name'].isin(["Southern Lights Pipeline",
                                       "Westpur Pipeline",
                                       "Trans-Northern"])].reset_index(drop=True)

    df['Key Point'] = df['Key Point'].replace("All", "system")
    df = addIds(df)
    del df['Pipeline Name']
    df = df.rename(columns={x: x.split("(")[0].strip() for x in df.columns})
    numCols = ['Available Capacity', 'Original Nominations', 'Accepted Nominations', 'Apportionment Percentage']
    df = normalize_numeric(df, numCols, 2)
    df = conversion(df, "oil", numCols[:-1], 2, False)

    df['Apportionment Percentage'] = df['Apportionment Percentage'].round(2)

    company_files = ['NOVA Gas Transmission Ltd.',
                     'Westcoast Energy Inc.',
                     'TransCanada PipeLines Limited',
                     'Alliance Pipeline Ltd.',
                     'Trans Quebec and Maritimes Pipeline Inc.',
                     'Maritimes & Northeast Pipeline Management Ltd.',
                     'Many Islands Pipe Lines (Canada) Limited',
                     'Emera Brunswick Pipeline Company Ltd.',
                     'Foothills Pipe Lines Ltd.',
                     'Enbridge Pipelines Inc.',
                     'TransCanada Keystone Pipeline GP Ltd.',
                     'Trans Mountain Pipeline ULC',
                     'PKM Cochin ULC',
                     'Trans-Northern Pipelines Inc.',
                     'Enbridge Pipelines (NW) Inc.',
                     'Enbridge Southern Lights GP Inc.',
                     'Kingston Midstream Westspur Limited',
                     'Vector Pipeline Limited Partnership',
                     'Many Islands Pipe Lines (Canada) Limited',
                     'Plains Midstream Canada ULC',
                     'Enbridge Bakken Pipeline Company Inc.',
                     'Express Pipeline Ltd.',
                     'Genesis Pipeline Canada Ltd.',
                     'Montreal Pipe Line Limited',
                     'Aurora Pipeline Company Ltd',
                     'Kingston Midstream Westspur Limited',
                     'Enbridge Southern Lights GP Inc.',
                     'Emera Brunswick Pipeline Company Ltd.']

    # for company in ['Enbridge Pipelines (NW) Inc.']:
    for company in company_files:
        thisCompanyData = {}
        folder_name = company.replace(' ', '').replace('.', '')
        df_c = df[df['Corporate Entity'] == company].copy().reset_index(drop=True)
        if not df_c.empty:
            thisCompanyData['build'] = True
            df_c = df_c.drop_duplicates(subset=['Date'])
            df_c = df_c.sort_values(by='Date')
            minDate = min(df_c['Date']) - dateutil.relativedelta.relativedelta(months=1)
            thisCompanyData["keyPoint"] = list(df_c['Key Point'])[0]
            thisCompanyData["company"] = company
            hasCap = hasData(df_c, "Available Capacity")
            hasOrigNom = hasData(df_c, "Original Nominations")
            hasAccepNom = hasData(df_c, "Accepted Nominations")
            hasPct = hasNotNull(df_c, "Apportionment Percentage")
            lineData, areaData, pctData = [], [], []
            series = []
            series.append({"name": "date",
                           "min": [minDate.year, minDate.month-1, minDate.day]})
            for cap, oNom, aNom, aPct in zip(df_c['Available Capacity'],
                                             df_c['Original Nominations'],
                                             df_c['Accepted Nominations'],
                                             df_c['Apportionment Percentage']):

                if hasCap and hasOrigNom:
                    linePoint = cap
                    areaPoint = oNom
                    areaName = "on"  # Original Nominations
                    lineName = "ac"  # Available Capacity
                elif hasOrigNom and hasAccepNom:
                    linePoint = aNom
                    areaPoint = oNom
                    areaName = "on"
                    lineName = "an"
                else:
                    raise ApportionSeriesCombinationError(company)

                pctData.append(aPct)
                lineData.append(linePoint)
                areaData.append(areaPoint)

            series.append({"id": lineName,
                           "data": lineData,
                           "yAxis": 0,
                           "type": "line"})
            series.append({"id": areaName,
                           "data": areaData,
                           "yAxis": 0,
                           "type": "area"})
            if hasPct:
                series.append({"id": "ap",  # Apportionment Percent
                               "data": pctData,
                               "yAxis": 1,
                               "type": "line",
                               "visible": False,
                               # "showInLegend": False
                               })

            thisCompanyData["series"] = series

        else:
            thisCompanyData["build"] = False

        if not test:
            with open('../apportionment/company_data/'+folder_name+'.json', 'w') as fp:
                json.dump(thisCompanyData, fp, default=str)

    return df