Exemplo n.º 1
0
def latent_sim_features(base_path,
                        log,
                        examples,
                        latent_path=None,
                        keys=KEYS,
                        sizes=SIZES,
                        redo=False):

    name = 'latent_sim_features'
    if latent_path is None:
        latent_path = base_path

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(log,
                                         examples,
                                         latent_path=latent_path,
                                         keys=keys,
                                         sizes=sizes)
        examples = reduce_mem_usage(examples, cols=cols)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','label','step'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples
Exemplo n.º 2
0
Arquivo: time.py Projeto: rn5l/rsc19
def time_features(base_path,
                  log,
                  examples,
                  preprocessed_path=PREPROCESSED_FOLDER,
                  redo=False):

    name = 'time_features'

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(log,
                                         examples,
                                         preprocessed_path=preprocessed_path)
        examples = reduce_mem_usage(examples, cols=cols)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','label','step'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples
Exemplo n.º 3
0
def meta_features(base_path,
                  meta_path,
                  log,
                  examples,
                  latent='d2v',
                  latent_size=16,
                  redo=False):

    name = 'meta_features'
    if latent == None:
        name += '_all'
    else:
        name += '_' + str(latent_size)

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(meta_path,
                                         log,
                                         examples,
                                         latent_prefix=latent,
                                         latent_size=latent_size)
        examples = reduce_mem_usage(examples)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','prices','label'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples
Exemplo n.º 4
0
def resolve_na(train):

    cols = []
    for col in train.columns:
        if sum(train[col].isnull()) > 0:
            #print('na ',col)
            cols.append(col)
    print_col_list(cols, name='NA_COLS')
    exit()
Exemplo n.º 5
0
def crawl_features(base_path, crawl_path, log, examples, redo=False):
    
    name = 'crawl_features'
    
    path = Path( base_path + 'features/' + name + '.fthr' )
    if path.is_file() and not redo:
        features = load_feather( path )
        features = features[features.session_id.isin( examples.session_id.unique() )]
        examples = copy_features( examples, features )
    else:
        examples, cols = create_features( crawl_path, log, examples )
        examples = reduce_mem_usage(examples)
        write_feather( examples[['session_id','impressions'] + list(cols)], path )
        #examples[['session_id','impressions','prices','label'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list( cols )
    return examples
Exemplo n.º 6
0
def session_features(base_path, log, examples, price_path=None, crawl_path=CRAWL_FOLDER, poi_path=POI_FOLDER, redo=False):
    
    name = 'session_features'
    if price_path is None:
        price_path = base_path
    
    path = Path( base_path + 'features/' + name + '.fthr' )
    if path.is_file() and not redo:
        features = load_feather( path )
        features = features[features.session_id.isin( examples.session_id.unique() )]
        examples = copy_features( examples, features )
    else:
        examples, cols = create_features( log, examples, price_path=price_path, crawl_path=crawl_path, poi_path=poi_path )
        examples = reduce_mem_usage(examples, cols=cols)
        write_feather( examples[['session_id','impressions'] + list(cols)], path )
        #examples[['session_id','impressions','label','step'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list( cols )
        
    return examples
Exemplo n.º 7
0
def list_context_features(base_path, log, examples, shifts=SHIFTS, redo=False):

    name = 'list_context_features_' + str(shifts)

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(log, examples, shifts=shifts)
        examples = reduce_mem_usage(examples)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','prices','label','position'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples
Exemplo n.º 8
0
Arquivo: price.py Projeto: rn5l/rsc19
def price_features(base_path,
                   log,
                   examples,
                   min_occurences=None,
                   hidden=False,
                   train_only=False,
                   fillna_mean=False,
                   redo=False):

    name = 'price_features'
    if train_only:
        name += '_trainonly'
    if hidden:
        name += '_hidden'
    if min_occurences is not None:
        name += '_min' + str(min_occurences)
    if fillna_mean:
        name += '_fillmean'

    path = Path(base_path + 'features/' + name + '.fthr')
    if path.is_file() and not redo:
        features = load_feather(path)
        features = features[features.session_id.isin(
            examples.session_id.unique())]
        examples = copy_features(examples, features)
    else:
        examples, cols = create_features(base_path,
                                         log,
                                         examples,
                                         min_occurences=min_occurences,
                                         hidden=hidden,
                                         train_only=train_only,
                                         fillna_mean=fillna_mean)
        examples = reduce_mem_usage(examples, cols=cols)
        write_feather(examples[['session_id', 'impressions'] + list(cols)],
                      path)
        #examples[['session_id','impressions','prices','city','platform','label'] + list(cols)].to_csv( base_path + 'features/' + name + '.csv' )
        print_col_list(cols)

    return examples
Exemplo n.º 9
0
def main():

    examples = create_set(SET, conf=None, redo=True)
    print_col_list(examples)