def test(path_test, input_size, hidden_size, batch_size, save_dir, model_name, maxlen): db = read_data(path_test) X = create_sequences(db[:-maxlen], win_size=maxlen, step=maxlen) X = np.reshape(X, (X.shape[0], X.shape[1], input_size)) # build the model: 1 layer LSTM print('Build model...') model = Sequential() model.add( LSTM(hidden_size, return_sequences=False, input_shape=(maxlen, input_size))) model.add(Dense(maxlen)) model.load_weights(save_dir + model_name) model.compile(loss='mse', optimizer='adam') prediction = model.predict(X, batch_size, verbose=1) prediction = prediction.flatten() # prediction_container = np.array(prediction).flatten() Y = db[maxlen:] plt.plot(prediction, label='prediction') plt.plot(Y, label='true') plt.legend() plt.show()
def prepare_datasets(input_data_files, max_len): """ Reads the input data, and prepares the train, and test files :param input_data_files: tuple containing (<path_to_data_file>, <path_to_labels_file>) :param max_len: maximum length of the sentence (number of words) in input data :return: """ sentences, sentence_labels = utils.read_data(input_data_files[0], input_data_files[1]) vocab = get_vocabulary(sentences) word_to_idx = dict() word_to_idx["__PAD__"] = 0 data = np.zeros(shape=(len(sentences), max_len), dtype=long) labels = np.zeros(shape=(len(sentences)), dtype=long) for idx, w in enumerate(vocab): word_to_idx[w] = idx + 1 for i in range(len(sentences)): labels[i] = sentence_labels[i] offset = max_len - len(sentences[i]) for j in range(len(sentences[i])): data[i][offset + j] = word_to_idx[sentences[i][j]] np.save(constants.SENTIMENT_DATA_PATH, data) np.save(constants.SENTIMENT_LABELS_PATH, labels) with open(constants.WORD_TO_IDX_PATH, "wb") as w_idx_fp: pickle.dump(word_to_idx, w_idx_fp) print "saved the train data, labels, and test data, labels" pass
def train_normal_model(path_train, input_size, hidden_size, batch_size, early_stopping_patience, val_percentage, save_dir, model_name, maxlen): if not os.path.exists(save_dir): os.mkdir(save_dir) db = read_data(path_train) train_x = db[:-maxlen] train_y = db[maxlen:] X = create_sequences(train_x, maxlen, maxlen) y = create_sequences(train_y, maxlen, maxlen) X = np.reshape(X, (X.shape[0], X.shape[1], 1)) y = np.reshape(y, (y.shape[0], y.shape[1], 1)) # # preparing the callbacks check_pointer = callbacks.ModelCheckpoint(filepath=save_dir + model_name, verbose=1, save_best_only=True) early_stop = callbacks.EarlyStopping(patience=early_stopping_patience, verbose=1) # build the model: 1 layer LSTM print('Build model...') model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). model.add(LSTM(hidden_size, input_shape=(maxlen, input_size))) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(maxlen)) # The decoder RNN could be multiple layers stacked or a single layer model.add(LSTM(hidden_size, return_sequences=True)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributed(Dense(1))) model.compile(loss='mae', optimizer='adam') model.summary() model.fit(X, y, batch_size=batch_size, nb_epoch=50, validation_split=val_percentage, callbacks=[check_pointer, early_stop]) return model
def creat_url(): """ 生产url :return: """ for city_data in read_data('other/city_code.txt'): page_index = 0 city_data = eval(city_data) while page_index < 55: page_index += 1 url = 'https://appv3.qichacha.net/app/v1/base/getNewCompanys?province=' + str( city_data['provinceCode']) + '&cityCode=' + str( city_data['Value'] ) + '&pageIndex=' + str(page_index) + '×tamp=' + str( tim) + '&sign=' + sign + '&platform=other&app_channel=qq' yield url, city_data['Desc'], city_data['provinceName']
def train_normal_model(path_train, input_size, hidden_size, batch_size, early_stopping_patience, val_percentage, save_dir, model_name, maxlen): if not os.path.exists(save_dir): os.mkdir(save_dir) db = read_data(path_train) train_x = db[:-140] train_y = db[140:] X = create_sequences(train_x, 140, 140) y = create_sequences(train_y, 140, 140) X = np.reshape(X, (X.shape[0], X.shape[1], 1)) # preparing the callbacks check_pointer = callbacks.ModelCheckpoint(filepath=save_dir + model_name, verbose=1, save_best_only=True) early_stop = callbacks.EarlyStopping(patience=early_stopping_patience, verbose=1) # build the model: 1 layer LSTM print('Build model...') model = Sequential() model.add( LSTM(hidden_size, return_sequences=False, input_shape=(maxlen, input_size))) model.add(Dense(140)) model.compile(loss='mse', optimizer='adam') model.summary() model.fit(X, y, batch_size=batch_size, nb_epoch=100, validation_split=val_percentage, callbacks=[check_pointer, early_stop]) return model
def test(path_test, input_size, hidden_size, batch_size, save_dir, model_name, maxlen): db = read_data(path_test) X = create_sequences(db, maxlen, maxlen) y = create_sequences(db, maxlen, maxlen) X = np.reshape(X, (X.shape[0], X.shape[1], 1)) y = np.reshape(y, (y.shape[0], y.shape[1], 1)) # build the model: 1 layer LSTM print('Build model...') model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). model.add(LSTM(hidden_size, input_shape=(maxlen, input_size))) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(maxlen)) # The decoder RNN could be multiple layers stacked or a single layer model.add(LSTM(hidden_size, return_sequences=True)) # For each of step of the output sequence, decide which character should be chosen model.add(TimeDistributed(Dense(1))) model.load_weights(save_dir + model_name) model.compile(loss='mae', optimizer='adam') model.summary() prediction = model.predict( X, batch_size, verbose=1, ) prediction = prediction.flatten() # prediction_container = np.array(prediction).flatten() plt.plot(prediction.flatten()[:4000], label='prediction') plt.plot(y.flatten()[maxlen:4000 + maxlen], label='true') plt.legend() plt.show() store_prediction_and_ground_truth(model)
desc = args.description model_name = 'LR-Cate' model_file = model_name + '-Sample' + '-' + version + '.model' if USE_SAMPLE else model_name + '-' + version + '.model' model_metainfo_file = model_name + '-Sample' + '-' + version + '.json' if USE_SAMPLE else model_name + '-' + version + '.json' sub_file = 'Sub-' + model_name + '-Sample' + '-' + version + '.txt' if USE_SAMPLE else 'Sub-' + model_name + '-' + version + '.txt' if os.path.exists(model_file): print('There already has a model with the same version.') sys.exit(-1) feature_store_path = '../sample/features' if USE_SAMPLE else '../data/features' CATE_TRAIN_FILE = 'ensemble_cate_feature_train' CATE_TRAIN_FILE = CATE_TRAIN_FILE + '_sample' + '.' + fmt if USE_SAMPLE else CATE_TRAIN_FILE + '.' + fmt ensemble_train = read_data( os.path.join(feature_store_path, CATE_TRAIN_FILE), fmt) CATE_TEST_FILE = 'ensemble_cate_feature_test' CATE_TEST_FILE = CATE_TEST_FILE + '_sample' + '.' + fmt if USE_SAMPLE else CATE_TEST_FILE + '.' + fmt ensemble_test = read_data(os.path.join(feature_store_path, CATE_TEST_FILE), fmt) print(ensemble_train.info()) print(ensemble_test.info()) all_features = list(ensemble_train.columns.values) print("all original features") print(all_features) y = ensemble_train[y_label].values
96/96 [==============================] - 15s - loss: 0.5074 - acc: 0.8854 - val_loss: 0.5017 - val_acc: 0.9048 Epoch 4/20 96/96 [==============================] - 16s - loss: 0.4007 - acc: 0.8854 - val_loss: 0.3971 - val_acc: 0.9048 Epoch 5/20 96/96 [==============================] - 15s - loss: 0.3400 - acc: 0.8958 - val_loss: 0.3234 - val_acc: 0.9286 Epoch 6/20 96/96 [==============================] - 15s - loss: 0.2773 - acc: 0.9167 - val_loss: 0.3014 - val_acc: 0.9286 Epoch 7/20 96/96 [==============================] - 15s - loss: 0.2409 - acc: 0.9167 - val_loss: 0.2914 - val_acc: 0.9286 Epoch 8/20 96/96 [==============================] - 15s - loss: 0.2181 - acc: 0.9375 - val_loss: 0.2629 - val_acc: 0.9286 ''' method = 'hks' x_data, y_data = read_data(descriptor_dir='shrec11-kp', method=method, descriptor_rows=KP_DESCRIPTOR_ROWS, descriptor_cols=KP_DESCRIPTOR_COLS) (train_x, val_x, train_y, val_y) = split_data(x_data, y_data, split_percentage=0.7) train_x = train_x.reshape((-1, KP_UNITS)) val_x = val_x.reshape((-1, KP_UNITS)) mlp_model = MLP(input_units=KP_UNITS, output_units=OUTPUT_UNITS, hidden_layers=(10000, ), activations=('relu', 'softmax')) scores = mlp_model.train(train_x,
'--format', help='store pandas feature format, csv, pkl') args = parser.parse_args() if __name__ == '__main__': USE_SAMPLE = args.sample fmt = args.format if args.format else 'csv' feature_store_path = '../sample/features' if USE_SAMPLE else '../data/features' if not os.path.exists(feature_store_path): os.mkdir(feature_store_path) FACE_FEATURE_FILE = 'face_feature' FACE_FEATURE_FILE = FACE_FEATURE_FILE + '_sample' + '.' + fmt if USE_SAMPLE else FACE_FEATURE_FILE + '.' + fmt face_data = read_data(os.path.join(feature_store_path, FACE_FEATURE_FILE), fmt) TEXT_FEATURE_FILE = 'text_feature' TEXT_FEATURE_FILE = TEXT_FEATURE_FILE + '_sample' + '.' + fmt if USE_SAMPLE else TEXT_FEATURE_FILE + '.' + fmt text_data = read_data(os.path.join(feature_store_path, TEXT_FEATURE_FILE), fmt) TRAIN_USER_INTERACT = '../sample/train_interaction.txt' if USE_SAMPLE else '../data/train_interaction.txt' TEST_INTERACT = '../sample/test_interaction.txt' if USE_SAMPLE else '../data/test_interaction.txt' user_item_train = pd.read_csv(TRAIN_USER_INTERACT, sep='\t', header=None, names=[ 'user_id', 'photo_id', 'click', 'like', 'follow', 'time', 'playing_time',
sep='\t', header=None, names=[ 'user_id', 'photo_id', 'click', 'like', 'follow', 'time', 'playing_time', 'duration_time' ]) user_item_test = pd.read_csv( TEST_INTERACT, sep='\t', header=None, names=['user_id', 'photo_id', 'time', 'duration_time']) PHOTO_FEATURE_FILE = 'photo_feature' PHOTO_FEATURE_FILE = PHOTO_FEATURE_FILE + '_sample' + '.' + fmt if USE_SAMPLE else PHOTO_FEATURE_FILE + '.' + fmt photo_data = read_data( os.path.join(feature_store_path, PHOTO_FEATURE_FILE), fmt) USER_FEATURE_FILE = 'user_feature' USER_FEATURE_FILE = USER_FEATURE_FILE + '_sample' + '.' + fmt if USE_SAMPLE else USER_FEATURE_FILE + '.' + fmt users = read_data(os.path.join(feature_store_path, USER_FEATURE_FILE), fmt) user_item_train = pd.merge(user_item_train, users, how='inner', on=['user_id']) user_item_train = pd.merge(user_item_train, photo_data, how='left', on=['photo_id'])