Exemplo n.º 1
0
def generate_dllstm_filtered_dict(pars):
    # get common terms
    current_word_path = pars['current_word_path']
    # check for filtered_dict_path
    filtered_dict_path = pars['filtered_dict_path']
    search_terms_dict_path = pars['search_terms_dict_path']
    seed_word_path = pars['seed_word_path']
    input_data_path = pars['input_data_path']
    search_volume_df = read_raw_data(input_data_path)
    with open(search_terms_dict_path, 'rb') as f:
        a = pickle.load(f)
    SEED = True
    if SEED:
        seed_word_path = seed_word_path
        seed_word_list = [
            k.lower()
            for k in pd.read_csv(seed_word_path, header=None)[0].values
        ]
        terms = []
        for c in seed_word_list:
            if c in a.keys() and c in search_volume_df.columns:
                terms.append(c)
    else:
        terms = []
        for c in a.keys():
            if c in search_volume_df.columns:
                terms.append(c)
    # print(search_volume_df.shape, len(terms))
    # Store a dictionary of terms currently in use and their glove embeddings.
    terms = list(set(terms))
    with open(filtered_dict_path, 'wb') as f:
        pickle.dump({k: a[k] for k in terms}, f)
    with open(current_word_path, 'wb') as f:
        pickle.dump(terms, f)
Exemplo n.º 2
0
def test_get_features_shape():
    dframe = read_raw_data()
    processed = preprocess_data(dframe)
    features = get_featues(processed)
    label = get_label(processed)

    assert features.shape == (150, 4)
    assert label.shape == (150, )
Exemplo n.º 3
0
def test_get_features_shape():
    dframe = read_raw_data()
    processed = preprocess_data(dframe)
    features = get_features(processed)
    label = get_target(processed)

    assert features.shape == (34109, 7)
    assert label.shape == (34109)
Exemplo n.º 4
0
def process_features(train_data_path, seq_length, search_lag):
    train_data = read_raw_data(train_data_path)
    y_data, pol_val, trend_fea, phys_fea = process_data(train_data)
    processed_pol = get_pol_value_series(pol_val, seq_length)
    processed_trend = lag_search_features(trend_fea, search_lag)
    phys_fea = lag_search_features(phys_fea, -1)
    # fill NAs with 0 for phys_fea
    phys_fea.fillna(0, inplace=True)
    process_phys = np.array(phys_fea)
    return y_data, processed_pol, process_phys, processed_trend
Exemplo n.º 5
0
def test_raw_shape():
    dframe = read_raw_data()
    assert dframe.shape == (150, 5)
Exemplo n.º 6
0
def test_raw_shape():
    dframe = read_raw_data()
    assert dframe.shape == (34109, 8)