Пример #1
0
def gpu_load_names(col_names_path, **kwargs):
    """ Loads names used for renaming the banks

    Returns
    -------
    GPU DataFrame
    """
    chronometer = Chronometer.makeStarted()

    cols = ["seller_name", "new_seller_name"]

    dtypes = OrderedDict([
        ("seller_name", "category"),
        ("new_seller_name", "category"),
    ])
    new = col_names_path + "names.load"
    print(new)
    names_table = pyblazing.create_table(
        table_name="names",
        type=get_type_schema(new),
        path=new,
        delimiter="|",
        names=cols,
        dtypes=dtypes,  # TODO: dtypes=get_dtype_values(dtypes)
        skip_rows=1,
    )
    Chronometer.show(chronometer, "Read Names CSV")
    return names_table
Пример #2
0
def gpu_load_names(**kwargs):
    """ Loads names used for renaming the banks

    Returns
    -------
    GPU DataFrame
    """
    chronometer = Chronometer.makeStarted()

    cols = ['seller_name', 'new_seller_name']

    dtypes = OrderedDict([
        ("seller_name", "category"),
        ("new_seller_name", "category"),
    ])

    names_table = pyblazing.create_table(table_name='names',
                                         type=get_type_schema(col_names_path),
                                         path=col_names_path,
                                         delimiter='|',
                                         names=cols,
                                         dtypes=get_dtype_values(dtypes),
                                         skip_rows=1)
    Chronometer.show(chronometer, 'Read Names CSV')
    return names_table
Пример #3
0
def gpu_load_acquisition_csv(acquisition_path, **kwargs):
    """ Loads acquisition data

    Returns
    -------
    GPU DataFrame
    """
    chronometer = Chronometer.makeStarted()

    cols = [
        'loan_id', 'orig_channel', 'seller_name', 'orig_interest_rate',
        'orig_upb', 'orig_loan_term', 'orig_date', 'first_pay_date',
        'orig_ltv', 'orig_cltv', 'num_borrowers', 'dti',
        'borrower_credit_score', 'first_home_buyer', 'loan_purpose',
        'property_type', 'num_units', 'occupancy_status', 'property_state',
        'zip', 'mortgage_insurance_percent', 'product_type',
        'coborrow_credit_score', 'mortgage_insurance_type',
        'relocation_mortgage_indicator'
    ]

    dtypes = OrderedDict([("loan_id", "int64"), ("orig_channel", "category"),
                          ("seller_name", "category"),
                          ("orig_interest_rate", "float64"),
                          ("orig_upb", "int64"), ("orig_loan_term", "int64"),
                          ("orig_date", "date"), ("first_pay_date", "date"),
                          ("orig_ltv", "float64"), ("orig_cltv", "float64"),
                          ("num_borrowers", "float64"), ("dti", "float64"),
                          ("borrower_credit_score", "float64"),
                          ("first_home_buyer", "category"),
                          ("loan_purpose", "category"),
                          ("property_type", "category"),
                          ("num_units", "int64"),
                          ("occupancy_status", "category"),
                          ("property_state", "category"), ("zip", "int64"),
                          ("mortgage_insurance_percent", "float64"),
                          ("product_type", "category"),
                          ("coborrow_credit_score", "float64"),
                          ("mortgage_insurance_type", "float64"),
                          ("relocation_mortgage_indicator", "category")])

    print(acquisition_path)

    acquisition_table = pyblazing.create_table(
        table_name='acq',
        type=get_type_schema(acquisition_path),
        path=acquisition_path,
        delimiter='|',
        names=cols,
        dtypes=get_dtype_values(dtypes),
        skip_rows=1)
    Chronometer.show(chronometer, 'Read Acquisition CSV')
    return acquisition_table
Пример #4
0
def gpu_load_acquisition_csv(acquisition_path, **kwargs):
    """ Loads acquisition data

    Returns
    -------
    GPU DataFrame
    """
    chronometer = Chronometer.makeStarted()

    cols = [
        "loan_id",
        "orig_channel",
        "seller_name",
        "orig_interest_rate",
        "orig_upb",
        "orig_loan_term",
        "orig_date",
        "first_pay_date",
        "orig_ltv",
        "orig_cltv",
        "num_borrowers",
        "dti",
        "borrower_credit_score",
        "first_home_buyer",
        "loan_purpose",
        "property_type",
        "num_units",
        "occupancy_status",
        "property_state",
        "zip",
        "mortgage_insurance_percent",
        "product_type",
        "coborrow_credit_score",
        "mortgage_insurance_type",
        "relocation_mortgage_indicator",
    ]

    dtypes = OrderedDict([
        ("loan_id", "int64"),
        ("orig_channel", "category"),
        ("seller_name", "category"),
        ("orig_interest_rate", "float64"),
        ("orig_upb", "int64"),
        ("orig_loan_term", "int64"),
        ("orig_date", "date"),
        ("first_pay_date", "date"),
        ("orig_ltv", "float64"),
        ("orig_cltv", "float64"),
        ("num_borrowers", "float64"),
        ("dti", "float64"),
        ("borrower_credit_score", "float64"),
        ("first_home_buyer", "category"),
        ("loan_purpose", "category"),
        ("property_type", "category"),
        ("num_units", "int64"),
        ("occupancy_status", "category"),
        ("property_state", "category"),
        ("zip", "int64"),
        ("mortgage_insurance_percent", "float64"),
        ("product_type", "category"),
        ("coborrow_credit_score", "float64"),
        ("mortgage_insurance_type", "float64"),
        ("relocation_mortgage_indicator", "category"),
    ])

    print(acquisition_path)

    acquisition_table = pyblazing.create_table(
        table_name="acq",
        type=get_type_schema(acquisition_path),
        path=acquisition_path,
        delimiter="|",
        names=cols,
        dtypes=dtypes,  # TODO: dtypes=get_dtype_values(dtypes)
        skip_rows=1,
    )
    Chronometer.show(chronometer, "Read Acquisition CSV")
    return acquisition_table
Пример #5
0
def gpu_load_performance_csv(performance_path, **kwargs):
    """ Loads performance data

    Returns
    -------
    GPU DataFrame
    """
    chronometer = Chronometer.makeStarted()

    cols = [
        "loan_id",
        "monthly_reporting_period",
        "servicer",
        "interest_rate",
        "current_actual_upb",
        "loan_age",
        "remaining_months_to_legal_maturity",
        "adj_remaining_months_to_maturity",
        "maturity_date",
        "msa",
        "current_loan_delinquency_status",
        "mod_flag",
        "zero_balance_code",
        "zero_balance_effective_date",
        "last_paid_installment_date",
        "foreclosed_after",
        "disposition_date",
        "foreclosure_costs",
        "prop_preservation_and_repair_costs",
        "asset_recovery_costs",
        "misc_holding_expenses",
        "holding_taxes",
        "net_sale_proceeds",
        "credit_enhancement_proceeds",
        "repurchase_make_whole_proceeds",
        "other_foreclosure_proceeds",
        "non_interest_bearing_upb",
        "principal_forgiveness_upb",
        "repurchase_make_whole_proceeds_flag",
        "foreclosure_principal_write_off_amount",
        "servicing_activity_indicator",
    ]

    dtypes = OrderedDict([
        ("loan_id", "int64"),
        ("monthly_reporting_period", "date"),
        ("servicer", "category"),
        ("interest_rate", "float64"),
        ("current_actual_upb", "float64"),
        ("loan_age", "float64"),
        ("remaining_months_to_legal_maturity", "float64"),
        ("adj_remaining_months_to_maturity", "float64"),
        ("maturity_date", "date"),
        ("msa", "float64"),
        ("current_loan_delinquency_status", "int32"),
        ("mod_flag", "category"),
        ("zero_balance_code", "category"),
        ("zero_balance_effective_date", "date"),
        ("last_paid_installment_date", "date"),
        ("foreclosed_after", "date"),
        ("disposition_date", "date"),
        ("foreclosure_costs", "float64"),
        ("prop_preservation_and_repair_costs", "float64"),
        ("asset_recovery_costs", "float64"),
        ("misc_holding_expenses", "float64"),
        ("holding_taxes", "float64"),
        ("net_sale_proceeds", "float64"),
        ("credit_enhancement_proceeds", "float64"),
        ("repurchase_make_whole_proceeds", "float64"),
        ("other_foreclosure_proceeds", "float64"),
        ("non_interest_bearing_upb", "float64"),
        ("principal_forgiveness_upb", "float64"),
        ("repurchase_make_whole_proceeds_flag", "category"),
        ("foreclosure_principal_write_off_amount", "float64"),
        ("servicing_activity_indicator", "category"),
    ])
    print(performance_path)
    performance_table = pyblazing.create_table(
        table_name="perf",
        type=get_type_schema(performance_path),
        path=performance_path,
        delimiter="|",
        names=cols,
        dtypes=dtypes,  # TODO: dtypes=get_dtype_values(dtypes)
        skip_rows=1,
    )
    Chronometer.show(chronometer, "Read Performance CSV")
    return performance_table