Python save_file示例，src.data.make_dataset.save_file Python示例

示例#1

0

显示文件

def date_features(transcript, save=None):
    ''' 
    Create various date/time features
    
    Parameters
    -----------
    transcript: DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder 
            '../../data/interim' 
    Returns
    -------
    DataFrame
    '''
    transcript['time_days'] = transcript.time / 24
    transcript['date'] = transcript.signed_up + pd.to_timedelta(
        transcript.time_days, unit='D')
    transcript['day'] = transcript.date.dt.day
    transcript['weekday'] = transcript.date.dt.dayofweek
    transcript['month'] = transcript.date.dt.month
    transcript['year'] = transcript.date.dt.year

    save_file(transcript, save)

    return transcript

示例#2

0

显示文件

def label_encode_categories(transcript, save=None):
    '''
    Label encodes gender and id, removing previous created one hot 
    encoding.
    
    Parameters
    -----------
    transcript:  DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder 
            '../../data/interim'
           
    Returns
    -------
    DataFrame
    '''
    le = LabelEncoder()
    transcript.id = le.fit_transform(transcript.id)
    le.fit_transform(transcript.id)
    transcript['gender'] = transcript[['F', 'M', 'O']].idxmax(1)
    transcript.gender = le.fit_transform(transcript.gender)
    transcript = transcript.drop(['F', 'M', 'O'], axis=1)

    save_file(transcript, save)
    return transcript

示例#3

0

显示文件

def overlap_offer_effect(transcript, save=None):
    '''
    Creates overlap offer feature columns [a,b,c,d,e,f,g,h,i,j] with 
    integer value equal to the duration for which the previous offer 
    is still valid.
        
    Parameters
    -----------
    transcript:  DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder 
            '../../data/interim'
            
    Returns
    -------
    DataFrame
    '''
    # convert required dataframe columns to dictionary of numpy arrays
    t = df_numpydict(transcript,
                     ['event', 'person', 'time_days', 'duration', 'id'])

    overlap_offer = np.empty(transcript.shape[0], dtype=str)
    overlap_offer_days = np.full(transcript.shape[0], np.nan)

    bar = progressbar.ProgressBar()
    for i in bar(t['index']):
        if t['event'][i] == 'offer received':

            # loop backwards through events of customer
            for j in t['index'][0:i][::-1]:
                if t['person'][j] != t['person'][i]:
                    break
                if t['event'][j] == 'offer completed':
                    break

                if t['event'][j] == 'offer received':
                    days_left = t['time_days'][j] - t['time_days'][i] + t[
                        'duration'][j]

                    if days_left <= 0:
                        continue

                    overlap_offer_days[i] = days_left
                    overlap_offer[i] = t['id'][j]

    offer_overlap_features = pd.get_dummies(overlap_offer, drop_first=True)\
                                            .mul(overlap_offer_days, axis=0).replace(0, np.nan)
    transcript = try_join(offer_overlap_features, transcript)

    save_file(transcript, save)

    return transcript

示例#4

0

显示文件

def last_transaction(transcript, save=None):
    '''
    Creates last transaction in days and last amount spent features.
    
    Parameters
    -----------
    transcript:  DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder 
            '../../data/interim'
            
    Returns
    -------
    DataFrame
    '''
    t = df_numpydict(transcript, ['event', 'person', 'time_days', 'amount'])

    tran_index = transcript.index.to_numpy()
    tran_event = transcript.event.to_numpy()
    person = transcript.person.to_numpy()
    time = transcript.time_days.to_numpy()
    amount = transcript.amount.to_numpy()

    last_transaction_days = np.full(transcript.shape[0], np.nan)
    last_amount = np.full(transcript.shape[0], np.nan)

    bar = progressbar.ProgressBar()

    # loop through each row
    for i in bar(t['index']):
        if t['event'][i] == 'offer received':

            # loop backwards through events of customer
            for j in t['index'][0:i][::-1]:

                if t['person'][j] != t['person'][i]:
                    break

                if tran_event[j] == 'transaction':
                    last_transaction_days[
                        i] = t['time_days'][i] - t['time_days'][j]
                    last_amount[i] = amount[j]
                    break

    transcript['last_transaction_days'] = last_transaction_days
    transcript['last_amount'] = last_amount

    save_file(transcript, save)

    return transcript

示例#5

0

显示文件

def feature_cleanup(transcript, save=None):
    '''
    Various features creation and redundant features dropped.
    
    Parameters
    -----------
    transcript:  DataFrame
 
    Returns
    -------
    DataFrame 
    '''

    # drop cumulative amount since equal to t-30
    transcript.drop(['cum_amount'], axis=1, inplace=True)

    # spending during offer period - to be predicted
    transcript[
        'offer_spend'] = transcript.received_spend + transcript.viewed_spend

    # adding boolean target variable to be predicted - was offer completed?
    transcript['spend>required'] = (transcript.received_spend + transcript.viewed_spend\
                                    > transcript.difficulty).astype(int)

    # filtering event by only offer received, data now all included in this row
    transcript = transcript[transcript.event == 'offer received']

    # dropping original event column
    transcript.drop(['event'], axis=1, inplace=True)

    # dropping offer_id since now each row is a unique offer
    transcript.drop(['offer_id'], axis=1, inplace=True)

    # dropping joined since same as signed_up
    transcript.drop('joined', axis=1, inplace=True)

    # removing 'amount' and 'transaction' since this data related to
    # individual transactions
    transcript.drop(['amount', 'transaction'], axis=1, inplace=True)

    save_file(transcript, save)
    return transcript

示例#6

0

显示文件

def to_numerical_nan(transcript, save=None):
    '''
    Converts date and other features to numerical in preparation. 
    Where zero values are not correctly descriptive, converts these 
    to NaNs.
        
    Parameters
    -----------
    transcript:  DataFrame
 
    Returns
    -------
    DataFrame 
    '''

    # creating columns converting date time and time deltas to floats
    transcript['signed_up0'] = (transcript.signed_up -
                                transcript.signed_up.max()).dt.days
    transcript['date0'] = (transcript.date -
                           transcript.signed_up.max()).dt.days

    # replacing old date time and time delta columns with new float versions:
    transcript.signed_up = transcript.signed_up0
    transcript.date = transcript.date0

    # dropping interim new columns:
    transcript.drop(['signed_up0', 'date0'], axis=1, inplace=True)

    # replacing zeros with nulls for viewed_days_left and remaining to complete, since this value
    # is only relevent if customer viewed offer

    # transcript.viewed_days_left[transcript.viewed_in_valid == 0] = np.nan
    transcript['remaining_to_complete'][transcript.viewed_in_valid ==
                                        0] = np.nan

    save_file(transcript, save)
    return transcript

示例#7

0

显示文件

def historical_features(transcript, save=None):
    '''
    Creates historical features.
    
    Parameters
    -----------
    transcript:  DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder 
            '../../data/interim'
    Returns
    -------
    DataFrame
    '''

    transcript.reset_index(inplace=True, drop=True)

    hist_feature_names = [
        'hist_reward_completed', 'hist_reward_possible',
        'hist_difficulty_completed', 'hist_difficulty_possible',
        'hist_previous_completed', 'hist_previous_offers',
        'hist_viewed_and_completed', 'hist_complete_not_viewed',
        'hist_failed_complete', 'hist_viewed', 'hist_received_spend',
        'hist_viewed_spend'
    ]

    hist_features = {
        features: np.zeros(transcript.shape[0])
        for features in hist_feature_names
    }

    t = df_numpydict(transcript, [
        'person', 'completed', 'reward', 'difficulty', 'viewed',
        'received_spend', 'viewed_spend'
    ])

    t = {**t, **hist_features}

    bar = progressbar.ProgressBar()

    for i in bar(t['index']):
        for j in t['index'][0:i][::-1]:

            # if different customer, break
            if t['person'][j] != t['person'][i]:
                break

            # looping through previous offers, if completed, make
            # additions to total features:
            if t['completed'][j] == 1:
                t['hist_reward_completed'][i] += t['reward'][j]
                t['hist_reward_possible'][i] += t['reward'][j]
                t['hist_difficulty_completed'][i] += t['difficulty'][j]
                t['hist_difficulty_possible'][i] += t['difficulty'][j]
                t['hist_previous_completed'][i] += 1
                t['hist_previous_offers'][i] += 1

                # if viewed, make additions to view features:
                if t['viewed'][j] == 1:
                    t['hist_viewed_and_completed'][i] += 1
                else:
                    t['hist_complete_not_viewed'][i] += 1

            # if didn't complete offer, make additions to possible
            # features:
            else:
                t['hist_reward_possible'][i] += t['reward'][j]
                t['hist_difficulty_possible'][i] += t['difficulty'][j]
                t['hist_previous_offers'][i] += 1
                t['hist_failed_complete'][i] += 1

            # if viewed, make addition to viewed
            if t['viewed'][j] == 1:
                t['hist_viewed'][i] += 1

            # increment viewed and received spend
            t['hist_received_spend'][i] += t['received_spend'][j]
            t['hist_viewed_spend'][i] += t['viewed_spend'][j]

    for feature in hist_feature_names:
        transcript[feature] = t[feature]

    save_file(transcript, save)
    return transcript

示例#8

0

显示文件

def time_buckets(transcript, save=None):
    '''
    Creates time bucket fields for total transaction value within that
    period and which specific offer (if any) was given during that 
    time period.
    
    Parameters
    -----------
    transcript:  DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder 
            '../../data/interim'
    Returns
    -------
    DataFrame
    '''
    transcript.reset_index(inplace=True)

    day_buckets = [7, 14, 17, 21, 24, 30]

    # creates list of amount and offer bucket column names
    amount_buckets = [f'amount_{bucket}' for bucket in day_buckets]
    offer_buckets = [f'offer_{bucket}' for bucket in day_buckets]

    # instansiate a dictionaries of numpy arrays for each bucket
    # column name
    amount_buckets_dict = {
        bucket: np.zeros(transcript.shape[0])
        for bucket in amount_buckets
    }
    offer_buckets_dict = {
        bucket: np.full(transcript.shape[0], '')
        for bucket in offer_buckets
    }

    # merges both dictionaries
    buckets = {**amount_buckets_dict, **offer_buckets_dict}

    t = df_numpydict(transcript,
                     ['person', 'time_days', 'event', 'amount', 'id'])

    bar = progressbar.ProgressBar()

    for i in bar(t['index']):

        # loop backwards from each row
        for j in t['index'][0:i][::-1]:

            # check if same person
            if t['person'][j] != t['person'][i]:
                break

            # if time_days is below bucket day value, increase the
            # amount of that bucket. Goes to next incremental bucket
            # amount if not below and checks again.
            if t['event'][j] == 'transaction':
                for k in day_buckets:
                    if t['time_days'][j] <= k:
                        amount_buckets_dict[f'amount_{k}'][i] += t['amount'][j]
                        break

            # if time_days equauls bucket day value, add offer id to
            # that time bucket
            if t['event'][j] == 'offer received':
                for k in day_buckets:
                    if t['time_days'][j] == k:
                        offer_buckets_dict[f'offer_{k}'][i] = t['id'][j]
                        break

        # replaces amount features with NaN if time_days is below
        # time bucket values. Distinguishes a zero value from an
        # impossible temporal value.
        for m, n in enumerate(day_buckets):
            if t['time_days'][i] < n:
                for z in day_buckets[m + 1:]:
                    amount_buckets_dict[f'amount_{z}'][i] = np.nan

    # adds new created features to DataFrame
    for bucket in amount_buckets_dict:
        transcript[bucket] = amount_buckets_dict[bucket]

    for bucket in offer_buckets_dict:
        transcript[bucket] = offer_buckets_dict[bucket]

    save_file(transcript, save)
    return transcript

示例#9

0

显示文件

def mapping_event(transcript, event=None, save=None):
    '''
    Maps the events 'offer completed' or 'offer viewed' to the corresponding 'offer received' 
    row of the dataset
    
    Parameters
    -----------
    transcript:  DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder '../../data/interim'
            
    Returns
    -------
    DataFrame
    '''

    t = df_numpydict(transcript,
                     ['event', 'person', 'time_days', 'duration', 'id'])

    if event == 'offer completed':
        eventid = 'completed'
        event_check = 'already_completed'
    elif event == 'offer viewed':
        eventid = 'viewed'
        event_check = 'already_viewed'

    t[event_check] = np.zeros(transcript.shape[0])
    t[eventid] = np.zeros(transcript.shape[0])
    t[event + '_date'] = np.full(transcript.shape[0], np.nan)

    bar = progressbar.ProgressBar()

    for i in bar(t['index']):
        if t['event'][i] == 'offer received':

            for j in t['index'][i + 1:]:

                if t[event_check][j] == 1:
                    continue

                # check if still same person
                if t['person'][j] != t['person'][i]:
                    break

                # check if period is within duration
                if event == 'offer completed':
                    if t['time_days'][j] - t['time_days'][i] > t['duration'][i]:
                        break

                # if offer viewed, update how many days left in the offer, update how much
                # remaining spending needed
                if t['event'][j] == event:

                    if t['id'][j] == t['id'][i]:
                        t[event_check][j] = 1
                        t[eventid][i] = 1
                        t[event + '_date'][i] = t['time_days'][j]
                        break

    transcript[event_check] = t[event_check]
    transcript[eventid] = t[eventid]
    transcript[event + '_date'] = t[event + '_date']

    match_verification(transcript, event=event)
    transcript.drop([event_check], axis=1, inplace=True)

    save_file(transcript, save)

    return transcript

示例#10

0

显示文件

def viewed_received_spend(transcript, save=None):
    '''
    Creates received_spend, viewed_spend, viewed_days_left, 
    remaining_to_complete, viewed_in_valid feautures
    
    Parameters
    -----------
    transcript:  DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder 
            '../../data/interim'
            
    Returns
    -------
    DataFrame
    '''
    t = df_numpydict(transcript, [
        'event', 'person', 'difficulty', 'time_days', 'duration', 'amount',
        'id'
    ])

    viewed_in_valid = np.zeros(transcript.shape[0])
    received_spend = np.zeros(transcript.shape[0])
    viewed_days_left = np.full(transcript.shape[0], 0.0)
    viewed_spend = np.zeros(transcript.shape[0])
    remaining_to_complete = np.full(transcript.shape[0], np.nan)
    viewed_already = np.zeros(transcript.shape[0])

    bar = progressbar.ProgressBar()
    for i in bar(t['index']):
        if t['event'][i] == 'offer received':

            for j in t['index'][i + 1:]:

                # check if still same person
                if t['person'][j] != t['person'][i]:
                    break

                # check if period is within duration
                if t['time_days'][j] - t['time_days'][i] > t['duration'][i]:
                    break

                # if offer viewed, update how many days left in the
                # offer, update how much remaining spending needed
                if t['event'][j] == 'offer viewed':
                    if (viewed_already[j] != 1) and (t['id'][i] == t['id'][j]):
                        viewed_in_valid[i] = 1

                        if received_spend[i] <= t['difficulty'][i]:
                            viewed_days_left[i] = t['duration'][i]\
                                                  - (t['time_days'][j] - t['time_days'][i])

                            remaining_to_complete[
                                i] = t['difficulty'][i] - received_spend[i]

                            if remaining_to_complete[i] < 0:
                                remaining_to_complete[i] = 0

                            viewed_already[j] = 1

                        if received_spend[i] > t['difficulty'][i]:
                            viewed_days_left[i] = 0
                            remaining_to_complete[i] = 0
                            viewed_already[j] = 1

                if t['event'][j] == 'transaction':

                    # update spending when received but not viewed
                    if viewed_days_left[i] <= 0:
                        received_spend[i] += t['amount'][j]

                    # update spending when viewed
                    if viewed_days_left[i] > 0:
                        viewed_spend[i] += t['amount'][j]

    transcript['received_spend'] = received_spend
    transcript['viewed_spend'] = viewed_spend
    transcript['viewed_days_left'] = viewed_days_left
    transcript['remaining_to_complete'] = remaining_to_complete
    transcript['viewed_in_valid'] = viewed_in_valid

    save_file(transcript, save)

    return transcript

示例#11

0

显示文件

def create_transaction_ranges(transcript, save=None):
    '''
    Creates time bucket fields for total transaction value and number 
    of transactions going back in time from offer received.
    
    Parameters
    -----------
    transcript:  DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder 
            '../../data/interim'
    Returns
    -------
    DataFrame
    '''

    transaction_days_range = [30, 21, 14, 7, 3, 1]
    hist = {}

    # initialsiing
    for m in transaction_days_range:
        transaction_range = f't_{m}'
        transaction_range_count = f'{transaction_range}c'
        hist[transaction_range] = np.zeros(transcript.shape[0])
        hist[transaction_range_count] = np.zeros(transcript.shape[0])

    # convert required dataframe columns to dictionary of numpy arrays
    t = df_numpydict(transcript, ['event', 'person', 'time_days', 'amount'])

    bar = progressbar.ProgressBar()

    # loop through each row
    for i in bar(t['index']):
        if t['event'][i] == 'offer received':

            # loop backwards through events of customer
            for j in t['index'][0:i][::-1]:
                if t['person'][j] != t['person'][i]:
                    break

                # if transaction, how many days before offer received?
                if t['event'][j] == 'transaction':
                    day_diff = t['time_days'][i] - t['time_days'][j]

                    # loop through transaction day ranges and add increment transaction value and
                    # increment transaction count
                    for m in transaction_days_range:
                        transaction_range = 't_' + str(m)
                        transaction_range_count = transaction_range + 'c'

                        if day_diff <= m:
                            hist[transaction_range][i] += t['amount'][j]
                            hist[transaction_range_count][i] += 1

                        else:
                            break

    new_data = pd.DataFrame(hist)[[
        't_1', 't_3', 't_7', 't_14', 't_21', 't_30', 't_1c', 't_3c', 't_7c',
        't_14c', 't_21c', 't_30c'
    ]]

    transcript = try_join(new_data, transcript)

    save_file(transcript, save)

    return transcript

示例#12

0

显示文件

def column_order(transcript, save=None):
    '''
    Removing redundant features and moving target features 'offer_spend' and 'completed'
    to last columns.
        
    Parameters
    -----------
    transcript:  DataFrame
 
    Returns
    -------
    DataFrame 
    '''

    transcript = transcript[[
        'person',
        'age',
        'income',
        'signed_up',
        'gender',
        'id',
        'rewarded',
        'difficulty',
        'reward',
        'duration',
        'mobile',
        'web',
        'social',
        'bogo',
        'discount',
        'informational',
        'time_days',
        'day',
        'weekday',
        'month',
        'year',
        't_1',
        't_3',
        't_7',
        't_14',
        't_21',
        't_30',
        't_1c',
        't_3c',
        't_7c',
        't_14c',
        't_21c',
        't_30c',
        'last_amount',
        'received_spend',
        'viewed_spend',
        'viewed_days_left',
        'remaining_to_complete',
        'viewed_in_valid',
        'viewed',
        'last_transaction_days',
        'spend>required',
        'hist_reward_completed',
        'hist_reward_possible',
        'hist_difficulty_completed',
        'hist_difficulty_possible',
        'hist_previous_completed',
        'hist_previous_offers',
        'hist_viewed_and_completed',
        'hist_complete_not_viewed',
        'hist_failed_complete',
        'hist_viewed',
        'hist_received_spend',
        'hist_viewed_spend',
        'a',
        'b',
        'c',
        'd',
        'e',
        'f',
        'g',
        'h',
        'i',
        'received_0',
        'received_1',
        'received_2',
        'received_3',
        'received_4',
        'received_5',
        'received_6',
        'received_7',
        'received_8',
        'received_9',
        'viewed_0',
        'viewed_1',
        'viewed_2',
        'viewed_3',
        'viewed_4',
        'viewed_5',
        'viewed_6',
        'viewed_7',
        'viewed_8',
        'viewed_9',
        'count_0',
        'count_1',
        'count_2',
        'count_3',
        'count_4',
        'count_5',
        'count_6',
        'count_7',
        'count_8',
        'count_9',
        'amount_7',
        'amount_14',
        'amount_17',
        'amount_21',
        'amount_24',
        'amount_30',
        'offer_7',
        'offer_14',
        'offer_17',
        'offer_21',
        'offer_24',
        'offer_30',
        'offer_spend',
        'completed',
        'percentage_offer_active',
        'offer_active_count',
        'no_offer_count',
        'offer_cum_amount',
        'no_offer_cum_amount',
        'amount_per_day_offer',
        'amount_per_day_not_offer',
    ]]

    save_file(transcript, save)
    return transcript

示例#13

0

显示文件

def previous_offer_features(transcript, save=None):
    '''
    Creates previous offers historical features:
    
    offer_received_spend
    offer_viewed_spend
    offer_counts
    
    Parameters
    -----------
    transcript:  DataFrame
    save:   string filename (default=None)
            if filename entered, saves output to folder 
            '../../data/interim'
    Returns
    -------
    DataFrame
    '''

    transcript.reset_index(inplace=True, drop=True)

    offer_received_spend = [f'received_{i}' for i in list('0123456789')]
    offer_viewed_spend = [f'viewed_{i}' for i in list('0123456789')]
    offer_counts = [f'count_{i}' for i in list('0123456789')]

    offer_received_spend_dict = {
        offer: np.full(transcript.shape[0], np.nan)
        for offer in offer_received_spend
    }
    offer_viwed_spend_dict = {
        offer: np.full(transcript.shape[0], np.nan)
        for offer in offer_viewed_spend
    }
    offer_counts_dict = {
        offer: np.full(transcript.shape[0], np.nan)
        for offer in offer_counts
    }

    t = df_numpydict(
        transcript,
        ['person', 'completed', 'viewed_spend', 'received_spend', 'id'])

    offers = {
        **offer_received_spend_dict,
        **offer_viwed_spend_dict,
        **offer_counts_dict
    }

    bar = progressbar.ProgressBar()

    for i in bar(t['index']):
        for j in t['index'][0:i][::-1]:

            # if different customer, break
            if t['person'][j] != t['person'][i]:
                break

            # looping through previous offers, if completed, make additions to total features:
            previous_offer = t['id'][j]

            if offers[f'received_{previous_offer}'][i] >= 0:
                offers[f'received_{previous_offer}'][i] += t['received_spend'][
                    j]
            else:
                offers[f'received_{previous_offer}'][i] = t['received_spend'][
                    j]

            if offers[f'viewed_{previous_offer}'][i] >= 0:
                offers[f'viewed_{previous_offer}'][i] += t['viewed_spend'][j]
            else:
                offers[f'viewed_{previous_offer}'][i] = t['viewed_spend'][j]

            if t['completed'][j]:
                if offers[f'count_{previous_offer}'][i] >= 1:
                    offers[f'count_{previous_offer}'][i] += 1
                else:
                    offers[f'count_{previous_offer}'][i] = 1

    for offer in offers:
        transcript[offer] = offers[offer]

    save_file(transcript, save)
    return transcript

示例#14

0

显示文件

文件： train_model.py 项目： GarethEgerton/Starbucks_Udacity

def gridsearch_early_stopping(cv,
                              X,
                              y,
                              folds,
                              grid,
                              cat_features=None,
                              save=None):
    '''
    Perform grid search with early stopping across folds specified by index 
    
    Parameters
    -----------
    cv: cross validation
    X: DataFrame or Numpy array
    y: DataFrame or Numpy array
    fold: list of fold indexes
    grid: parameter grid
    save:   string, excluding file extension (default=None)
            saves results_df for each fold to folder '../../data/interim'
    '''

    if np.unique(y).size <= 2:
        loss_function = 'Logloss'
    else:
        loss_function = 'MultiClass'

    # generate data folds
    train_X, train_y, test_X, test_y = generate_folds(cv, X, y)

    # iterate through specified folds
    for fold in folds:
        # assign train and test pools
        test_pool = Pool(data=test_X[fold],
                         label=test_y[fold],
                         cat_features=cat_features)
        train_pool = Pool(data=train_X[fold],
                          label=train_y[fold],
                          cat_features=cat_features)

        # creating results_df dataframe
        results_df = pd.DataFrame(columns=[
            'params' + str(fold), loss_function + str(fold), 'Accuracy' +
            str(fold), 'iteration' + str(fold)
        ])

        best_score = 99999

        # iterate through parameter grid
        for params in ParameterGrid(grid):

            # create catboost classifer with parameter params
            model = CatBoostClassifier(
                cat_features=cat_features,
                early_stopping_rounds=50,
                task_type='GPU',
                custom_loss=['Accuracy'],
                iterations=3000,
                #class_weights=weights,
                **params)

            # fit model
            model.fit(train_pool, eval_set=test_pool, verbose=400)

            # append results to results_df

            print(model.get_best_score()['validation'])
            results_df = results_df.append(
                pd.DataFrame([[
                    params,
                    model.get_best_score()['validation'][loss_function],
                    model.get_best_score()['validation']['Accuracy'],
                    model.get_best_iteration()
                ]],
                             columns=[
                                 'params' + str(fold),
                                 loss_function + str(fold),
                                 'Accuracy' + str(fold),
                                 'iteration' + str(fold)
                             ]))

            # save best score and parameters
            if model.get_best_score(
            )['validation'][loss_function] < best_score:
                best_score = model.get_best_score(
                )['validation'][loss_function]
                best_grid = params

        print("Best logloss: ", best_score)
        print("Grid:", best_grid)

        save_file(results_df,
                  save + str(fold) + '.joblib',
                  dirName='../../models')
        display(results_df)