Exemplo n.º 1
0
def execute(context):
    df_codes = context.stage("data.spatial.codes")
    df_households, df_persons, df_trips = context.stage("data.hts.entd.cleaned")

    # Filter for non-residents
    requested_departments = df_codes["departement_id"].unique()
    f = df_persons["departement_id"].astype(str).isin(requested_departments)
    df_persons = df_persons[f]

    # Filter for people going outside of the area (because they have NaN distances)
    remove_ids = set()

    remove_ids |= set(df_trips[
        ~df_trips["origin_departement_id"].astype(str).isin(requested_departments) | ~df_trips["destination_departement_id"].astype(str).isin(requested_departments)
    ]["person_id"].unique())

    df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]

    # Only keep trips and households that still have a person
    df_trips = df_trips[df_trips["person_id"].isin(df_persons["person_id"].unique())]
    df_households = df_households[df_households["household_id"].isin(df_persons["household_id"])]

    # Finish up
    df_households = df_households[hts.HOUSEHOLD_COLUMNS]
    df_persons = df_persons[hts.PERSON_COLUMNS]
    df_trips = df_trips[hts.TRIP_COLUMNS + ["routed_distance"]]

    hts.check(df_households, df_persons, df_trips)
    return df_households, df_persons, df_trips
Exemplo n.º 2
0
def execute(context):
    df_households, df_persons, df_trips = context.stage("data.hts.egt.cleaned")

    # Filter for non-residents
    f = df_persons["departement_id"].isin(hts.FILTER_DEPARTEMENTS)
    df_persons = df_persons[f]

    # Filter for people going outside of the area (because they have NaN distances)
    remove_ids = set()

    remove_ids |= set(df_trips[
        ~df_trips["origin_departement_id"].isin(hts.FILTER_DEPARTEMENTS)
        | ~df_trips["destination_departement_id"].
        isin(hts.FILTER_DEPARTEMENTS)]["person_id"].unique())

    remove_ids |= set(
        df_persons[~df_persons["departement_id"].isin(hts.FILTER_DEPARTEMENTS)]
    )

    df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]

    # Only keep trips and households that still have a person
    df_trips = df_trips[df_trips["person_id"].isin(
        df_persons["person_id"].unique())]
    df_households = df_households[df_households["household_id"].isin(
        df_persons["household_id"])]

    # Finish up
    df_households = df_households[hts.HOUSEHOLD_COLUMNS]
    df_persons = df_persons[hts.PERSON_COLUMNS]
    df_trips = df_trips[hts.TRIP_COLUMNS + ["euclidean_distance"]]

    hts.check(df_households, df_persons, df_trips)
    return df_households, df_persons, df_trips
Exemplo n.º 3
0
def execute(context):
    df_codes = context.stage("data.spatial.codes")
    assert (
        df_codes["region_id"] == 11).all()  # Otherwise EGT doesn't make sense

    df_households, df_persons, df_trips = context.stage("data.hts.egt.cleaned")

    # Filter for non-residents
    requested_departments = df_codes["departement_id"].unique()
    f = df_persons["departement_id"].astype(str).isin(
        requested_departments)  # pandas bug!
    df_persons = df_persons[f]

    # Filter for people going outside of the area (because they have NaN distances)
    remove_ids = set()

    remove_ids |= set(
        df_trips[~df_trips["origin_departement_id"].astype(str).
                 isin(requested_departments)
                 | ~df_trips["destination_departement_id"].astype(str).
                 isin(requested_departments)]["person_id"].unique())

    remove_ids |= set(
        df_persons[~df_persons["departement_id"].isin(requested_departments)])

    df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]

    # Only keep trips and households that still have a person
    df_trips = df_trips[df_trips["person_id"].isin(
        df_persons["person_id"].unique())]
    df_households = df_households[df_households["household_id"].isin(
        df_persons["household_id"])]

    # Finish up
    df_households = df_households[hts.HOUSEHOLD_COLUMNS + ["income_class"] +
                                  ["egt_household_id"]]
    df_persons = df_persons[hts.PERSON_COLUMNS +
                            ["egt_household_id", "egt_person_id"]]
    df_trips = df_trips[hts.TRIP_COLUMNS + ["euclidean_distance"] +
                        ["egt_household_id", "egt_person_id", "egt_trip_id"]]

    hts.check(df_households, df_persons, df_trips)

    return df_households, df_persons, df_trips
Exemplo n.º 4
0
remove_ids |= set(
    df_persons[~df_persons["commune_id"].isin(requested_communes)])

df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]

# Only keep trips and households that still have a person
df_trips = df_trips[df_trips["person_id"].isin(
    df_persons["person_id"].unique())]
df_households = df_households[df_households["household_id"].isin(
    df_persons["household_id"])]

# Finish up

df_households = df_households[hts.HOUSEHOLD_COLUMNS]
df_persons = df_persons[hts.PERSON_COLUMNS]
df_trips = df_trips[hts.TRIP_COLUMNS + [
    "age", "sex", "employed", "studies", "has_license", "has_pt_subscription",
    "socioprofessional_class"
]]

hts.check(df_households, df_persons, df_trips)

export_csv = df_households.to_csv(r'../../../output/df_households_MEL.csv',
                                  index=None,
                                  header=True)
export_csv2 = df_persons.to_csv(r'../../../output/df_persons_MEL.csv',
                                index=None,
                                header=True)
export_csv3 = df_trips.to_csv(r'../../../output/df_trips_MEL.csv',
                              index=None,
                              header=True)
Exemplo n.º 5
0
def execute(context):
    df_codes = context.stage("data.spatial.codes")

    df_households, df_persons, df_trips = context.stage(
        "data.hts.emc2.cleaned")

    len_ini = int(df_trips['trip_weight'].sum())

    # Filter for non-residents
    requested_departments = df_codes["departement_id"].unique()
    f = df_persons["departement_id"].astype(str).isin(
        requested_departments)  # pandas bug!
    df_persons = df_persons[f]
    df_trips = df_trips[df_trips["person_id"].isin(
        df_persons["person_id"].unique())]

    len_non_res = int(df_trips['trip_weight'].sum())

    # Filter for people going outside of the area (because they have NaN distances)
    remove_ids = set()

    remove_ids |= set(
        df_trips[~df_trips["origin_departement_id"].astype(str).
                 isin(requested_departments)
                 | ~df_trips["destination_departement_id"].astype(str).
                 isin(requested_departments)]["person_id"].unique())

    # remove_ids |= set(df_persons[~df_persons["departement_id"].isin(requested_departments)])

    df = df_trips.groupby([
        'origin_departement_id', 'destination_departement_id'
    ])['trip_weight'].agg('sum').reset_index(name='trip_weight')
    df['trip_weight'].fillna(0, inplace=True)
    df['trip_weight'] = df['trip_weight'].map(lambda x: int(x))
    df.to_csv('departement_matrix_od.csv')

    df_trips = df_trips[~df_trips["person_id"].isin(remove_ids)]

    len_out = int(df_trips['trip_weight'].sum())

    print('INITIAL NUMBER OF TRIPS', len_ini)
    print('FILTER FOR NON-RESIDENTS: MINUS', len_ini - len_non_res,
          round(100 * (len_ini - len_non_res) / len_ini, 2), '%')
    print('FILTER FOR OUTGOING TRIPS: MINUS', len_non_res - len_out,
          round(100 * (len_non_res - len_out) / len_ini, 2), '%')
    print('INPUT NUMBER OF TRIPS', len_out)

    df_persons = df_persons[~df_persons["person_id"].isin(remove_ids)]

    # Only keep trips and households that still have a person
    df_trips = df_trips[df_trips["person_id"].isin(
        df_persons["person_id"].unique())]
    df_households = df_households[df_households["household_id"].isin(
        df_persons["household_id"])]

    # Finish up
    df_households = df_households[hts.HOUSEHOLD_COLUMNS + ["MID"]]
    df_persons = df_persons[hts.PERSON_COLUMNS + ["MID", "PER"]]
    df_trips = df_trips[hts.TRIP_COLUMNS + ["euclidean_distance"] +
                        ["MID", "PER", "NDEP"]]

    hts.check(df_households, df_persons, df_trips)
    print('NUMBER OF TRIPS', df_trips['trip_weight'].sum())

    df_trips.to_csv('trips_emc².csv', index=False)
    df_households.to_csv('households_emc².csv', index=False)
    df_persons.to_csv('persons_emc².csv', index=False)
    print('========= SAVED =========')
    return df_households, df_persons, df_trips