Python exception示例，utils.log_setup._log.exception Python示例

示例#1

0

显示文件

文件： get_successful_auth_perc.py 项目： Abhishek-IITM/elo-kaggle

def auth_flag_features(source_df, field, path=path.path, nrows=None):
    """
        Returns number of levels and mode for a given field
        :param source_df: historical/new transactions
        :param field: feature to be extracted from
        :param path: path of source df
        :param nrows: no. of rows to be read
        :return: df with unique levels count and mode
        """
    _log.info("Creating features from {}".format(field))
    prefix = source_df.split("_")[0]

    source_df = "{}/{}".format(path, source_df)
    _log.info("Reading from {}".format(source_df))
    try:
        df = pd.read_csv(source_df, usecols=["card_id", field], nrows=nrows)
    except Exception as e:
        _log.exception(e)
    _log.info("Successfully read from {}".format(source_df))

    df["dummy"] = 1

    _log.info("Computing successful and unsuccesful authorizations")
    df_agg = (df.groupby([field, "card_id"]).agg({
        "dummy": np.sum
    }).reset_index().pivot_table(index="card_id",
                                 columns=field,
                                 values="dummy"))
    df_agg["total"] = df_agg['N'] + df_agg['Y']
    df_agg['Y'] = df_agg['Y'] / df_agg["total"]
    df_agg.drop(columns=["N", "total"], inplace=True)
    df_agg.rename(columns={'Y': (prefix + '_Y')}, inplace=True)
    _log.info("Succesfully computed features for {}".format(field))

    return df_agg

示例#2

0

显示文件

def frequency_features_from_field(source_df):
    """
    Gives average number of transactions and max number of transactions in a day
    :param source_df: historical/new transactions csv with essential derived features. Refer to earlier function
    :return: df with features corresponding to transactions in a day
    """
    _log.info("Computing average and max frequency transactions in a day")
    prefix = source_df.split("_")[0]
    source_df = "{}/{}".format(path.feature_path, source_df)
    _log.info("Reading from {}".format(source_df))
    try:
        df = pd.read_csv(source_df)
    except Exception as e:
        _log.exception(e)
    df["dummy"] = 1
    day_features_rename_dict = {
        "amax": "{}_{}".format(prefix, "max_freq_in_day"),
        "mean": "{}_{}".format(prefix, "avg_freq_in_transacting_day")
    }
    day_features = (df.groupby(["card_id", "date"]).agg({
        "dummy": np.sum
    }).reset_index().groupby(["card_id"]).agg({
        "dummy": [np.max, np.mean]
    }).reset_index())

    day_features = (pd.DataFrame([
        day_features["card_id"], day_features["dummy"]["amax"],
        day_features["dummy"]["mean"]
    ]).T.rename(columns=day_features_rename_dict))
    _log.info("Successfully computed feature")
    return day_features

示例#3

0

显示文件

def get_avg_days_bw_purchases(source_df):
    """
    Gives average days between two purchases for a given card id
    :param source_df: historical/new transactions csv with essential derived features. Refer to earlier function
    :return: df with features corresponding to days between purchases
    """
    _log.info("Computing average number of days between consecutive purchases")
    prefix = source_df.split("_")[0]
    source_df = "{}/{}".format(path.feature_path, source_df)
    _log.info("Reading from {}".format(source_df))
    try:
        df = pd.read_csv(source_df)
    except Exception as e:
        _log.exception(e)
    df["next_purchase_date"] = df.groupby("card_id").shift(-1)["purchase_date"]
    df.loc[
        pd.isnull(df["next_purchase_date"]),
        "next_purchase_date"] = dt.datetime.now()  # should be a better value
    df["purchase_date"] = pd.to_datetime(df["purchase_date"])
    df["next_purchase_date"] = pd.to_datetime(df["next_purchase_date"])
    df["days_between_purchases"] = df["next_purchase_date"] - df[
        "purchase_date"]
    df["days_between_purchases"] = df["days_between_purchases"].apply(
        lambda x: x.days)
    days_bw_purchase = (df.groupby("card_id").agg({
        "days_between_purchases":
        np.mean
    }).reset_index().rename(
        {"mean": "{}_{}".format(prefix, "avg_days_bw_purchases")}))
    _log.info("Successfully computed feature")
    return days_bw_purchase

示例#4

0

显示文件

def get_purchase_amt_dist(source_df,
                          field="purchase_amount",
                          path=path.path,
                          nrows=None):
    """
    Returns min,max and distribution of purchase amount greater than and less than zero
        :param source_df: historical/new transactions
        :param field: feature to be extracted from
        :param path: path of source df
        :param nrows: no. of rows to be read
        :return: df with unique levels count and mode
    :return: df with card wise purchase amount details
    """
    _log.info("Creating features from {}".format(field))
    prefix = source_df.split("_")[0]
    source_df = "{}/{}".format(path, source_df)

    _log.info("Reading from {}".format(source_df))
    try:
        df = pd.read_csv(source_df, usecols=["card_id", field], nrows=nrows)
        _log.info("Successfully read from {}".format(source_df))
    except Exception as e:
        _log.exception(e)

    _log.info("Computing distribution of purchase amount")
    func_to_be_applied = [
        count_greater_than_equal_to_zero, sum_greater_than_equal_to_zero,
        count_less_than_zero, sum_less_than_zero, min, max
    ]

    rename_dict = create_rename_dict(prefix, field, func_to_be_applied)

    _log.info(("Creating final df"))
    df_features = df.groupby("card_id").agg({
        field: func_to_be_applied
    }).reset_index()
    df_features = pd.concat(
        [pd.DataFrame(df_features["card_id"]), df_features[field]],
        axis=1,
        sort=False)
    _log.info("Renaming columns: {}".format(rename_dict))
    df_features.rename(columns=rename_dict, inplace=True)

    return df_features

示例#5

0

显示文件

def field_to_features(source_df, field, path=path.path, nrows=None):
    """
    :param source_df: historical or new transactions.csv
    :param field:param to be used for making feature
    :param nrows: number of rows to be read
    :param path: path of source_df
    Reads selected field from data frame and
    :return: df with columns:
        card_id,
        column each for distinct levels  of field (giving count of txn),
        level with max txn
    """
    _log.info("Creating features from {}".format(field))
    prefix = source_df.split("_")[0]

    source_df = "{}/{}".format(path, source_df)
    _log.info("Reading from {}".format(source_df))
    try:
        df = pd.read_csv(source_df, usecols=["card_id", field], nrows=nrows)
    except Exception as e:
        _log.exception(e)
    _log.info("Successfully read from {}".format(source_df))
    df.loc[pd.isnull(df[field]), field] = -100

    keys = pd.Series(list(set(df[field])))
    vals = keys.apply(lambda x: "{}_{}_{}".format(prefix, field, x))
    rename_dict = dict(zip(keys, vals))
    _log.info("Rename dict: {}".format(rename_dict))
    df["dummy"] = 1
    df_agg = (df.groupby(["card_id", field]).agg({
        "dummy": np.sum
    }).reset_index().pivot_table(index="card_id",
                                 columns=field,
                                 values="dummy",
                                 fill_value=0).reset_index())
    field_name = prefix + "_max_txn_" + str(field)
    df_agg[field_name] = df_agg.drop(columns="card_id").idxmax(axis=1)
    df_agg.rename(columns=rename_dict, inplace=True)
    _log.info("Successfully computed feature")
    return df_agg

示例#6

0

显示文件

def get_unique_count_and_mode(source_df, field, path=path.path, nrows=None):
    """
    Returns number of levels and mode for a given field
    :param source_df: historical/new transactions
    :param field: feature to be extracted from
    :param path: path of source df
    :param nrows: no. of rows to be read
    :return: df with unique levels count and mode
    """
    _log.info("Creating features from {}".format(field))
    prefix = source_df.split("_")[0]

    source_df = "{}/{}".format(path, source_df)
    _log.info("Reading from {}".format(source_df))
    try:
        df = pd.read_csv(source_df, usecols=["card_id", field], nrows=nrows)
    except Exception as e:
        _log.exception(e)
    _log.info("Successfully read from {}".format(source_df))

    if sum(pd.isnull(df[field])):
        field_mode = df[field].mode()[0]
        df.loc[pd.isnull(df[field]), field] = field_mode

    _log.info("Fetching no. of distinct merchants transacted on")
    df_uniq = df.groupby("card_id").agg({field: pd.Series.nunique}).reset_index().rename(columns={field: "{}_{}_{}".format(prefix, "unique", field)})

    _log.info("Computing mode of merchants")
    df_agg = df.groupby(["card_id", field]).agg({field: np.count_nonzero}).rename(columns={field: "count"}).reset_index()
    df_max = df_agg.groupby("card_id").agg({"count": np.max}).reset_index()
    df_max = df_agg.merge(df_max, how="inner", on=["card_id", "count"]).drop_duplicates().reset_index(drop=True)
    df_max['rank'] = df_max.groupby(['card_id']).cumcount() + 1
    df_max = df_max[df_max['rank'] == 1]
    df = df_max.merge(df_uniq, on="card_id", how="inner")
    df.rename(columns={"count":"{}_{}_{}".format(prefix, "max_count", field)}, inplace=True)

    _log.info("Succesfully computed mode and levels for {}".format(field))
    return df

示例#7

0

显示文件

def purchase_date_features_df_gen(source_df):
    """
    create preliminary cols to derive essential features
    :param source_df:historica;/new transactions
    :return: file name of csv with essential cols created
    """
    _log.info(
        "Creating necessary columns for extracting purchase date features")
    prefix = source_df.split("_")[0]
    source_df = "{}/{}".format(path.path, source_df)
    _log.info("Reading from {}".format(source_df))
    try:
        df = pd.read_csv(source_df,
                         usecols=["card_id", "purchase_date"],
                         dtype=dtypes_specifier.dtypes,
                         parse_dates=dtypes_specifier.parse_dates)
    except Exception as e:
        _log.exception(e)

    _log.info("Creating feature: {}".format("time_of_day"))
    df["time_of_day"] = df["purchase_date"].apply(
        time_fragments.time_of_day_fragment)
    _log.info("Creating feature: {}".format("day_of_week"))
    df["day_of_week"] = df["purchase_date"].apply(time_fragments.day_of_week)
    _log.info("Creating feature: {}".format("month"))
    df["month"] = df["purchase_date"].apply(time_fragments.month_from_date)
    _log.info("Creating feature: {}".format("date"))
    df["date"] = df["purchase_date"].apply(lambda x: x.date())

    written_file_name = "{}_{}.csv".format(prefix,
                                           "purchase_date_features_raw")
    written_file = "{}/{}_{}.csv".format(path.feature_path, prefix,
                                         "purchase_date_features_raw")
    _log.info("Writing file {}".format(written_file))
    df.to_csv(written_file)
    _log.info("Process successfully completed")
    return written_file_name