def test_summary_data_from_transaction_data_returns_correct_results(transaction_level_data): today = '2015-02-07' actual = utils.summary_data_from_transaction_data(transaction_level_data, 'id', 'date', observation_period_end=today) expected = pd.DataFrame([[1, 1., 5., 6.], [2, 0., 0., 37.], [3, 2., 4., 37.]], columns=['id', 'frequency', 'recency', 'T']).set_index('id') assert_frame_equal(actual, expected)
def calc_clv(clv_recs, end, months=12): df = pandas.DataFrame(clv_recs) df = df[['player_id', 'start_date', 'theo_win']] df['theo_win'] = df['theo_win'].astype(float) end_date = parse(end) summary = summary_data_from_transaction_data(df, 'player_id', 'start_date', monetary_value_col='theo_win', observation_period_end=end_date) bgf = BetaGeoFitter(penalizer_coef=0.0) bgf.fit(summary['frequency'], summary['recency'], summary['T']) ggf = GammaGammaFitter(penalizer_coef = 0) ggf.fit(summary['frequency'], summary['monetary_value']) ggf_clv = ggf.customer_lifetime_value( bgf, #the model to use to predict the number of future transactions summary['frequency'], summary['recency'], summary['T'], summary['monetary_value'], time=months, discount_rate=0.0 ) clv_df = pandas.DataFrame(ggf_clv) clv_df=clv_df.dropna() clv_df[clv_df['clv']<0] = 0.0 summary=summary.merge(clv_df, left_index=True, right_index=True, how='inner') return summary
def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies( self): transaction_data = load_transaction_data(parse_dates=['date']) daily_summary = utils.summary_data_from_transaction_data( transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='D') hourly_summary = utils.summary_data_from_transaction_data( transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='h') thirty_days = 30 hours_in_day = 24 mbfg = estimation.ModifiedBetaGeoFitter() np.random.seed(0) mbfg.fit(daily_summary['frequency'], daily_summary['recency'], daily_summary['T']) thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time( thirty_days) np.random.seed(0) mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'], hourly_summary['T']) thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time( thirty_days * hours_in_day) npt.assert_almost_equal(thirty_day_prediction_from_daily_data, thirty_day_prediction_from_hourly_data)
def test_summary_data_from_transaction_data_will_choose_the_correct_first_order_to_drop_in_monetary_transactions( ): # this is the correct behaviour. See https://github.com/CamDavidsonPilon/lifetimes/issues/85 # and test_summary_statistics_are_indentical_to_hardies_paper_confirming_correct_aggregations cust = pd.Series([2, 2, 2]) dates_ordered = pd.to_datetime( pd.Series([ '2014-03-14 00:00:00', '2014-04-09 00:00:00', '2014-05-21 00:00:00' ])) sales = pd.Series([10, 20, 25]) transaction_data = pd.DataFrame({ 'date': dates_ordered, 'id': cust, 'sales': sales }) summary_ordered_data = utils.summary_data_from_transaction_data( transaction_data, 'id', 'date', 'sales') dates_unordered = pd.to_datetime( pd.Series([ '2014-04-09 00:00:00', '2014-03-14 00:00:00', '2014-05-21 00:00:00' ])) sales = pd.Series([20, 10, 25]) transaction_data = pd.DataFrame({ 'date': dates_unordered, 'id': cust, 'sales': sales }) summary_unordered_data = utils.summary_data_from_transaction_data( transaction_data, 'id', 'date', 'sales') assert_frame_equal(summary_ordered_data, summary_unordered_data) assert summary_ordered_data['monetary_value'].loc[2] == 22.5
def test_summary_data_from_transaction_data_works_with_string_customer_ids(transaction_level_data): d = [ ['X', '2015-02-01'], ['X', '2015-02-06'], ['Y', '2015-01-01'], ['Y', '2015-01-01'], ['Y', '2015-01-02'], ['Y', '2015-01-05'], ] df = pd.DataFrame(d, columns=['id', 'date']) utils.summary_data_from_transaction_data(df, 'id', 'date')
def test_summary_data_from_transaction_data_works_with_string_customer_ids( transaction_level_data): d = [ ["X", "2015-02-01"], ["X", "2015-02-06"], ["Y", "2015-01-01"], ["Y", "2015-01-01"], ["Y", "2015-01-02"], ["Y", "2015-01-05"], ] df = pd.DataFrame(d, columns=["id", "date"]) utils.summary_data_from_transaction_data(df, "id", "date")
def test_summary_data_from_transaction_data(): transactions = pd.read_csv( 'lifetimes/datasets/glovo_example_transactions.csv') actual = utils.summary_data_from_transaction_data( transactions, customer_id_col='customer_id', datetime_col='date', observation_period_end=datetime(2019, 2, 19).date(), freq='D', monetary_value_col='gtv_eur', money_first_transaction=True, save=False) expected_columns = [ 'customer_id', 'frequency', 'recency', 'T', 'orders_per_period', 'monetary_value', 'margin' ] expected = pd.DataFrame([[213, 0., 0., 1435., 1., 5.5, 82.6446281], [240, 0., 0., 1429., 1., 28.99, 15.67938788], [272, 0., 0., 1431., 1., 11.9, 38.19709702], [382, 0., 0., 1451., 1., 17.9, 25.39360081], [438, 0., 0., 1433., 1., 25.67, 17.70726352], [501, 0., 0., 1434., 1., 50., 9.09090909], [587, 0., 0., 1428., 1., 5.5, 82.6446281], [688, 0., 0., 1431., 1., 11.1, 20.47502048], [885, 0., 0., 1434., 1., 8., 56.81818182]], columns=expected_columns).set_index('customer_id') assert_frame_equal(actual, expected)
def test_beta_geometric_nbd_model_transactional_data(T, r, alpha, a, b, observation_period_end, freq, size): np.random.seed(188898) transaction_data = beta_geometric_nbd_model_transactional_data( T=T, r=r, alpha=alpha, a=a, b=b, observation_period_end=observation_period_end, freq=freq, size=size) actual = summary_data_from_transaction_data( transactions=transaction_data, customer_id_col="customer_id", datetime_col="date", observation_period_end=observation_period_end, freq=freq, ) np.random.seed(188898) expected = beta_geometric_nbd_model(T=T, r=r, alpha=alpha, a=a, b=b, size=size)[[ "frequency", "recency", "T" ]] expected["recency"] = expected["recency"].apply(np.ceil) expected = expected.reset_index(drop=True) actual = actual.reset_index(drop=True) assert expected.equals(actual)
def test_plot_incremental_transactions(self): """Test plotting incremental transactions with CDNOW example.""" transactions = load_dataset('CDNOW_sample.txt', header=None, sep='\s+') transactions.columns = [ 'id_total', 'id_sample', 'date', 'num_cd_purc', 'total_value' ] t = 39 freq = 'W' transactions_summary = utils.summary_data_from_transaction_data( transactions, 'id_sample', 'date', datetime_format='%Y%m%d', observation_period_end='19970930', freq=freq) bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(transactions_summary['frequency'], transactions_summary['recency'], transactions_summary['T']) plt.figure() plotting.plot_incremental_transactions(bgf, transactions, 'date', 'id_sample', 2 * t, t, freq=freq, xlabel='week', datetime_format='%Y%m%d') return plt.gcf()
def df_cum_transactions(cdnow_transactions): datetime_col = 'date' customer_id_col = 'id_sample' t = 25 * 7 datetime_format = '%Y%m%d' freq = 'D' observation_period_end = '19970930' freq_multiplier = 7 transactions_summary = utils.summary_data_from_transaction_data( cdnow_transactions, customer_id_col, datetime_col, datetime_format=datetime_format, freq=freq, freq_multiplier=freq_multiplier, observation_period_end=observation_period_end) transactions_summary = transactions_summary.reset_index() model = ParetoNBDFitter() model.fit(transactions_summary['frequency'], transactions_summary['recency'], transactions_summary['T']) df_cum = utils.expected_cumulative_transactions( model, cdnow_transactions, datetime_col, customer_id_col, t, datetime_format, freq, set_index_date=False, freq_multiplier=freq_multiplier) return df_cum
def test_expected_cumulative_transactions_date_index(cdnow_transactions): """ Test set_index as date for cumulative transactions and bgf fitter. Get first 14 cdnow transactions dates and validate that date index, freq_multiplier = 1 working and compare with tested data for last 4 records. dates = ['1997-01-11', '1997-01-12', '1997-01-13', '1997-01-14'] actual_trans = [11, 12, 15, 19] expected_trans = [10.67, 12.67, 14.87, 17.24] """ datetime_col = "date" customer_id_col = "id_sample" t = 14 datetime_format = "%Y%m%d" freq = "D" observation_period_end = "19970930" freq_multiplier = 1 transactions_summary = utils.summary_data_from_transaction_data( cdnow_transactions, customer_id_col, datetime_col, datetime_format=datetime_format, freq=freq, freq_multiplier=freq_multiplier, observation_period_end=observation_period_end, ) transactions_summary = transactions_summary.reset_index() model = BetaGeoFitter() model.fit(transactions_summary["frequency"], transactions_summary["recency"], transactions_summary["T"]) df_cum = utils.expected_cumulative_transactions( model, cdnow_transactions, datetime_col, customer_id_col, t, datetime_format, freq, set_index_date=True, freq_multiplier=freq_multiplier, ) dates = ["1997-01-11", "1997-01-12", "1997-01-13", "1997-01-14"] actual_trans = [11, 12, 15, 19] expected_trans = [10.67, 12.67, 14.87, 17.24] date_index = df_cum.iloc[-4:].index.to_timestamp().astype(str) actual = df_cum["actual"].iloc[-4:].values predicted = df_cum["predicted"].iloc[-4:].values.round(2) assert all(dates == date_index) assert_allclose(actual, actual_trans) assert_allclose(predicted, expected_trans, atol=1e-2)
def test_purchase_predictions_do_not_differ_much_if_looking_at_hourly_or_daily_frequencies(self): transaction_data = load_transaction_data(parse_dates=['date']) daily_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='D') hourly_summary = utils.summary_data_from_transaction_data(transaction_data, 'id', 'date', observation_period_end=max(transaction_data.date), freq='h') thirty_days = 30 hours_in_day = 24 mbfg = estimation.ModifiedBetaGeoFitter() np.random.seed(0) mbfg.fit(daily_summary['frequency'], daily_summary['recency'], daily_summary['T']) thirty_day_prediction_from_daily_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days) np.random.seed(0) mbfg.fit(hourly_summary['frequency'], hourly_summary['recency'], hourly_summary['T']) thirty_day_prediction_from_hourly_data = mbfg.expected_number_of_purchases_up_to_time(thirty_days * hours_in_day) npt.assert_almost_equal(thirty_day_prediction_from_daily_data, thirty_day_prediction_from_hourly_data)
def test_summary_data_from_transaction_data_squashes_period_purchases_to_one_purchase( ): transactions = pd.DataFrame([[1, '2015-01-01'], [1, '2015-01-01']], columns=['id', 't']) actual = utils.summary_data_from_transaction_data(transactions, 'id', 't', freq='W') assert actual.ix[1]['frequency'] == 1. - 1.
def test_summary_data_from_transaction_data_returns_correct_results( transaction_level_data): today = "2015-02-07" actual = utils.summary_data_from_transaction_data( transaction_level_data, "id", "date", observation_period_end=today) expected = pd.DataFrame( [[1, 1.0, 5.0, 6.0], [2, 0.0, 0.0, 37.0], [3, 2.0, 4.0, 37.0]], columns=["id", "frequency", "recency", "T"]).set_index("id") assert_frame_equal(actual, expected)
def summary_create(self, df): ''' Subset df on sales data, create trans summary ''' sales = subset_data(df, 'OrderType', 1) #make sure all sales kosher - keep only +0 sales sales = sales[sales.OrderTotal>0] self.transaction_data = sales[['OrderDate', 'CustomerNo']] return summary_data_from_transaction_data(self.transaction_data, 'CustomerNo', 'OrderDate', observation_period_end='2017-02-08')
def test_summary_data_from_transaction_data_squashes_period_purchases_to_one_purchase( ): transactions = pd.DataFrame([[1, "2015-01-01"], [1, "2015-01-01"]], columns=["id", "t"]) actual = utils.summary_data_from_transaction_data(transactions, "id", "t", freq="W") assert actual.loc[1]["frequency"] == 1.0 - 1.0
def test_summary_data_from_transaction_data_with_specific_datetime_format(transaction_level_data): transaction_level_data['date'] = transaction_level_data['date'].map(lambda x: x.replace('-','')) format = '%Y%m%d' today = '20150207' actual = utils.summary_data_from_transaction_data(transaction_level_data, 'id', 'date', observation_period_end=today, datetime_format=format) expected = pd.DataFrame([[1, 1., 5., 6.], [2, 0., 0., 37.], [3, 2., 4., 37.]], columns=['id', 'frequency', 'recency', 'T']).set_index('id') assert_frame_equal(actual, expected)
def bgf_transactions(cdnow_transactions): transactions_summary = utils.summary_data_from_transaction_data( cdnow_transactions, 'id_sample', 'date', datetime_format='%Y%m%d', observation_period_end='19970930', freq='W') bgf = BetaGeoFitter(penalizer_coef=0.01) bgf.fit(transactions_summary['frequency'], transactions_summary['recency'], transactions_summary['T']) return bgf
def test_summary_date_from_transaction_data_with_specific_non_daily_frequency(large_transaction_level_data): today = '20150207' actual = utils.summary_data_from_transaction_data(large_transaction_level_data, 'id', 'date', observation_period_end=today, freq='W') expected = pd.DataFrame([[1, 1., 5., 5.], [2, 0., 0., 5.], [3, 1., 1., 5.], [4, 1., 3., 3.], [5, 0., 0., 3.], [6, 0., 0., 0.]], columns=['id', 'frequency', 'recency', 'T']).set_index('id') assert_frame_equal(actual, expected)
def test_summary_date_from_transaction_with_monetary_values(large_transaction_level_data_with_monetary_value): today = '20150207' actual = utils.summary_data_from_transaction_data(large_transaction_level_data_with_monetary_value, 'id', 'date', monetary_value_col='monetary_value', observation_period_end=today) expected = pd.DataFrame([[1, 1., 36., 37., 2], [2, 0., 0., 37., 0], [3, 2., 4., 37., 3], [4, 2., 20., 22., 3], [5, 2., 2., 22., 4.5], [6, 0., 0., 5., 0]], columns=['id', 'frequency', 'recency', 'T', 'monetary_value']).set_index('id') assert_frame_equal(actual, expected)
def summaryDataFromTransactionData( clvWithBG_NBDGammaGammModelProcessedDataset): summaryDataFromTransactionDataForCLV = summary_data_from_transaction_data( clvWithBG_NBDGammaGammModelProcessedDataset, "CustomerID", "InvoiceDate", monetary_value_col="Total_Sales", observation_period_end="2011-12-9") return summaryDataFromTransactionDataForCLV
def summary_trans_create(self, df): ''' Subset df on sales data, return trans summary with monetary spend ''' sales = subset_data(df, 'OrderType', 1) sales = sales[sales.OrderTotal>0] transaction_data_monetary = sales[['OrderDate', 'CustomerNo', 'OrderTotal']] self.summary_monetary = summary_data_from_transaction_data(transaction_data_monetary, 'CustomerNo', 'OrderDate', 'OrderTotal', observation_period_end='2017-02-08') #keep customers with more than one spend self.return_customers = self.summary_monetary[self.summary_monetary['frequency']>0] return self.return_customers
def test_summary_data_from_transaction_data_works_with_int_customer_ids_and_doesnt_coerce_to_float(transaction_level_data): d = [ [1, '2015-02-01'], [1, '2015-02-06'], [1, '2015-01-01'], [2, '2015-01-01'], [2, '2015-01-02'], [2, '2015-01-05'], ] df = pd.DataFrame(d, columns=['id', 'date']) actual = utils.summary_data_from_transaction_data(df, 'id', 'date') assert actual.index.dtype == 'int64'
def test_summary_data_from_transaction_data_works_with_int_customer_ids_and_doesnt_coerce_to_float( transaction_level_data): d = [ [1, "2015-02-01"], [1, "2015-02-06"], [1, "2015-01-01"], [2, "2015-01-01"], [2, "2015-01-02"], [2, "2015-01-05"], ] df = pd.DataFrame(d, columns=["id", "date"]) actual = utils.summary_data_from_transaction_data(df, "id", "date") assert actual.index.dtype == "int64"
def predictSpending(customerId): # initialize the data dictionary that will be returned data = {"success": False, "result": {"customerId": "", "y": 0.0}} # ensure the customer ID was properly uploaded to our endpoint if customerId: print("* get data") data = pandas.read_csv("sample_transactions.csv") #data = pandas.read_json(baseURL + "/api/transactions") #data = data.drop(columns="_id") print("* prepare data") # prepare and shaping the data # columns - # customerId # frequency : number of repeat purchase transactions # recency: time (in days) between first purchase and latest purchase # T: time (in days) between first purchase and end of the period under study # monetary_value: average transactions amount today = pandas.to_datetime(datetime.date.today()) summaryData = summary_data_from_transaction_data( data, "customerId", "transactionDate", monetary_value_col="transactionAmount", observation_period_end=today) # filter the customer data that has no transaction analysisData = summaryData[summaryData["frequency"] > 0] # get the stat of the particular customer customer = analysisData.loc[customerId] # load model ggf_loaded = GammaGammaFitter() ggf_loaded.load_model('ggf.pkl') # estimate the average transaction amount predict = ggf_loaded.conditional_expected_average_profit( customer["frequency"], customer['monetary_value']) # add the input and predicted output to the return data data = { "success": True, "result": { "customerId": customerId, "y": predict } } # return the data dictionary as a JSON response return flask.jsonify(data)
def add_rfm_features(features, calib_invoices, period_end): features = features.copy() rfm_features = summary_data_from_transaction_data( transactions=calib_invoices, customer_id_col='CustomerID', datetime_col='InvoiceDate', monetary_value_col='Revenue', observation_period_end=period_end, freq='D') rfm_features[ 'T_Minus_Recency'] = rfm_features['T'] - rfm_features['recency'] features = features.merge(rfm_features, how='left', on='CustomerID') return features
def test_summary_statistics_are_indentical_to_hardies_paper_confirming_correct_aggregations(): # see http://brucehardie.com/papers/rfm_clv_2005-02-16.pdf # RFM and CLV: Using Iso-value Curves for Customer Base Analysis df = pd.read_csv('lifetimes/datasets/CDNOW_sample.txt', sep='\s+', header=None, names=['_id', 'id', 'date', 'cds_bought', 'spent']) df['date'] = pd.to_datetime(df['date'].astype(unicode)) df_train = df[df['date'] < '1997-10-01'] summary = utils.summary_data_from_transaction_data(df_train, 'id', 'date', 'spent') results = summary[summary['frequency'] > 0]['monetary_value'].describe() assert np.round(results.ix['mean']) == 35 assert np.round(results.ix['std']) == 30 assert np.round(results.ix['min']) == 3 assert np.round(results.ix['50%']) == 27 assert np.round(results.ix['max']) == 300 assert np.round(results.ix['count']) == 946
def get_rfm_features(features, cohort_invoices): cohort_invoices = cohort_invoices.copy() features = features.copy() rfm_features = summary_data_from_transaction_data( transactions=cohort_invoices, customer_id_col='CustomerID', datetime_col='InvoiceDate', monetary_value_col='Revenue', freq='D').reset_index() features = features.merge(rfm_features, how='left', on='CustomerID') features['T_Minus_Recency'] = rfm_features['T'] - rfm_features['recency'] return features
def test_summary_data_from_transaction_data_with_specific_datetime_format( transaction_level_data): transaction_level_data["date"] = transaction_level_data["date"].map( lambda x: x.replace("-", "")) format = "%Y%m%d" today = "20150207" actual = utils.summary_data_from_transaction_data( transaction_level_data, "id", "date", observation_period_end=today, datetime_format=format) expected = pd.DataFrame( [[1, 1.0, 5.0, 6.0], [2, 0.0, 0.0, 37.0], [3, 2.0, 4.0, 37.0]], columns=["id", "frequency", "recency", "T"]).set_index("id") assert_frame_equal(actual, expected)
def get_data_from_server(self,cmd=None): """ Gets data from sales_db and stores the query results in self.data INPUT cmd (str) the default sql query is below The default query has been replaced. The original query was an 8 line select command. """ # server name dsn = "THE SERVER NAME" cnxn_name = "DSN=%s" % dsn connection = odbc.connect(cnxn_name) # use to access the database c = connection.cursor() # generate cursor object # Grab transaction data from Postgres if not cmd: cmd = """SQL DEFAULT COMMAND GOES HERE""" % (self.pmg_num,self.date_range[0],self.date_range[1]) c.execute(cmd) # execute the sql command # list to store the query data transaction_data = [] # create a dictionary to convert customer ids to name to_name = dict(np.genfromtxt('../data/names.csv',dtype=str,delimiter='\t')) for row in c: cust, rsv_date, sales = row # pull data from each row of the query data cust_id = str(int(cust)) name = to_name[cust_id] # check to see if customer is inactive if use(name): rsv_date1_readable = rsv_date.strftime('%Y-%m-%d') # date formatting sales_float = float(sales) # convert to float; represents the transaction amount transaction_data.append({"id":cust, "date":rsv_date, "sales":sales_float}) # add dictionary of data to list # convert to dataframe df = pd.DataFrame(transaction_data, columns=['id', 'date', 'sales']) # store results df.to_csv(self.outfile1,index=False) # IMPORTANT: use correct observation_period_end date self.data = summary_data_from_transaction_data(df, 'id', 'date', 'sales', observation_period_end=self.date_range[1], freq='M')
def test_summary_date_from_transaction_data_with_specific_non_daily_frequency( large_transaction_level_data): today = "20150207" actual = utils.summary_data_from_transaction_data( large_transaction_level_data, "id", "date", observation_period_end=today, freq="W") expected = pd.DataFrame( [ [1, 1.0, 5.0, 5.0], [2, 0.0, 0.0, 5.0], [3, 1.0, 1.0, 5.0], [4, 1.0, 3.0, 3.0], [5, 0.0, 0.0, 3.0], [6, 0.0, 0.0, 0.0], ], columns=["id", "frequency", "recency", "T"], ).set_index("id") assert_frame_equal(actual, expected)
def rfm_model(df, customer_column, date_column, monetary_column): """Return an RFM score for each customer using the Lifetimes RFM model. This score is calculated across the whole DataFrame, so if you have a customer with numerous orders, it will calculate one value and apply it across all orders and won't calculate the figure historically. Args: :param df: Pandas DataFrame :param monetary_column: Column containing monetary value of order :param date_column: Column containing date :param customer_column: Column containing customer Returns: New DataFrame containing RFM data by customer. T is equal to days since first order and end of period. Customers with 1 order will be assigned 0 for RFM scores. """ # Ensure that inf and NaN values are filled rfm_df = summary_data_from_transaction_data( df, customer_column, date_column, monetary_value_col=monetary_column) return rfm_df
def test_summary_statistics_are_indentical_to_hardies_paper_confirming_correct_aggregations( ): # see http://brucehardie.com/papers/rfm_clv_2005-02-16.pdf # RFM and CLV: Using Iso-value Curves for Customer Base Analysis df = pd.read_csv( "lifetimes/datasets/CDNOW_sample.txt", sep=r"\s+", header=None, names=["_id", "id", "date", "cds_bought", "spent"], ) df["date"] = pd.to_datetime(df["date"], format="%Y%m%d") df_train = df[df["date"] < "1997-10-01"] summary = utils.summary_data_from_transaction_data(df_train, "id", "date", "spent") results = summary[summary["frequency"] > 0]["monetary_value"].describe() assert np.round(results.loc["mean"]) == 35 assert np.round(results.loc["std"]) == 30 assert np.round(results.loc["min"]) == 3 assert np.round(results.loc["50%"]) == 27 assert np.round(results.loc["max"]) == 300 assert np.round(results.loc["count"]) == 946
def test_summary_data_from_transaction_data_squashes_period_purchases_to_one_purchase(): transactions = pd.DataFrame([[1, '2015-01-01'], [1, '2015-01-01']], columns=['id', 't']) actual = utils.summary_data_from_transaction_data(transactions, 'id', 't', freq='W') assert actual.ix[1]['frequency'] == 1. - 1.
def example_summary_data(example_transaction_data): return utils.summary_data_from_transaction_data(example_transaction_data, 'id', 'date', observation_period_end=max(example_transaction_data.date))
def get_data_from_file(self,filename,**kwargs): df = pd.read_csv(filename,**kwargs) self.data = summary_data_from_transaction_data(df, 'id', 'date', 'sales', observation_period_end=self.date_range[1], freq='M')