def data_processing(dataset_dir): """ Data Processing """ train, validation, OO, NO, ON, NN, \ pre_treatment, drug_fingerprint_info \ = utils.data_processing(dataset_dir, parameters.train_data_path, parameters.validation_data_path, parameters.OO_data_path, parameters.NO_data_path, parameters.ON_data_path, parameters.NN_data_path, parameters.pre_treatment_data_path, parameters.drug_fingerprint_data_path ) """ Printing Data Statistics """ utils.print_statistics(len(train), len(validation), len(OO), len(NO), len(ON), len(NN), len(pre_treatment), len(drug_fingerprint_info)) return train, validation, OO, NO, ON, NN, \ pre_treatment, drug_fingerprint_info
len(weight_new.index), 1, ], 1)[:, 0], index=weight_new.index) unit[weight_new[weight_new < 0].index] *= -1.0 stop_info = (df[stock].ix[i - 1] - max_new[stock]) / max_new[stock] * unit[stock] if len(stop_info[stop_info < -re]) != 0: weight_new[stop_info[stop_info < -re].index] = 0 weight_new = weight_new / weight_new.sum() flag = 1 return weight_new, flag if __name__ == "__main__": df = ut.data_processing(file_address, asset_data_files) print(df.head()) unit = np.full([ len(df.index), 1, ], 1)[:, 0] df['rebalancing'] = pd.Series() df['stoploss'] = pd.Series() df['nav'] = pd.Series(unit, index=df.index) weight_new = [] max_new = [] reb_index = 0 for i in range(return_period, len(df.index)): if i < data_need: continue # record max price
print(f'Loading checkpoint {args.checkpoint}') state_dict = torch.load(args.checkpoint, map_location=device) model.load_state_dict(state_dict) # load dataset data_df = pd.read_csv(args.data_path) data_folder = os.path.dirname(args.data_path) dataset = AudioDataset(data_folder, data_df) print('Setup loaders') kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} loader = DataLoader(dataset=dataset, batch_size=hparams['batch_size'], shuffle=False, collate_fn=lambda x: data_processing( x, text_transform, audio_transforms), **kwargs) blank_id = len(text_transform) preds = [] print('Making prediction') data_len = len(loader) for i, batch in enumerate(loader): print(f'{i}/{data_len}') spectrograms, labels, input_lengths, label_lengths = batch spectrograms, labels = spectrograms.to(device), labels.to(device) output = model(spectrograms) # (batch, time, n_class) output = F.log_softmax(output, dim=2) output = output.transpose(0, 1) # (time, batch, n_class)
config['stride'], config['dropout']).to(device) # load dataset data_df = pd.read_csv(os.path.join(config['data_path'], 'train.csv')) train_df, val_df = train_test_split(data_df, test_size=config['val_fraction']) train_dataset = AudioDataset(config['data_path'], train_df) val_dataset = AudioDataset(config['data_path'], val_df) print('Setup loaders') kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = DataLoader(dataset=train_dataset, batch_size=config['batch_size'], shuffle=True, collate_fn=lambda x: data_processing( x, text_transform, train_audio_transforms), **kwargs) val_loader = DataLoader(dataset=val_dataset, batch_size=config['batch_size'], shuffle=False, collate_fn=lambda x: data_processing( x, text_transform, valid_audio_transforms), **kwargs) optimizer = optim.AdamW(model.parameters(), config['learning_rate']) blank_id = len(text_transform) criterion = nn.CTCLoss(blank=blank_id).to(device) scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=config['learning_rate'], steps_per_epoch=int( len(train_loader)),
def model_train(): input_shape = 60 input_data = load_data() data_processing() with open(CONSTANTS[1], 'rb') as f: word_dictionary = pickle.load(f) with open(CONSTANTS[2], 'rb') as f: inverse_word_dictionary = pickle.load(f) with open(CONSTANTS[3], 'rb') as f: label_dictionary = pickle.load(f) with open(CONSTANTS[4], 'rb') as f: output_dictionary = pickle.load(f) vocab_size = len(word_dictionary.keys()) label_size = len(label_dictionary.keys()) # 处理输入数据 aggregate_function = lambda input: [ (word, pos, label) for word, pos, label in zip(input['word'].values.tolist(), input[ 'pos'].values.tolist(), input['tag'].values.tolist()) ] grouped_input_data = input_data.groupby('sent_no').apply( aggregate_function) sentences = [sentence for sentence in grouped_input_data] x = [[word_dictionary[word[0]] for word in sent] for sent in sentences] x = pad_sequences(maxlen=input_shape, sequences=x, padding='post', value=0) y = [[label_dictionary[word[2]] for word in sent] for sent in sentences] y = pad_sequences(maxlen=input_shape, sequences=y, padding='post', value=0) y = [ np_utils.to_categorical(label, num_classes=label_size + 1) for label in y ] train_end = int(len(x) * 0.9) train_x, train_y = x[0:train_end], np.array(y[0:train_end]) test_x, test_y = x[train_end:], np.array(y[train_end:]) #输入参数 activation = 'selu' out_act = 'softmax' n_units = 100 batch_size = 32 epochs = 10 output_dim = 20 # 模型训练 lstm_model = create_Bi_LSTM(vocab_size, label_size, input_shape, output_dim, n_units, out_act, activation) lstm_model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=1) # 模型保存 model_save_path = CONSTANTS[0] lstm_model.save(model_save_path) # 测试 N = test_x.shape[0] avg_accuracy = 0 for start, end in zip(range(0, N, 1), range(1, N + 1, 1)): sentence = [ inverse_word_dictionary[i] for i in test_x[start] if i != 0 ] y_predict = lstm_model.predict(test_x[start:end]) input_sequences, output_sequences = [], [] for i in range(0, len(y_predict[0])): output_sequences.append(np.argmax(y_predict[0][i])) input_sequences.append(np.argmax(test_y[start][i])) eval = lstm_model.evaluate(test_x[start:end], test_y[start:end]) print('Test Accuracy: loss = %0.6f accuracy = %0.2f%%' % (eval[0], eval[1] * 100)) avg_accuracy += eval[1] output_sequences = ' '.join([ output_dictionary[key] for key in output_sequences if key != 0 ]).split() input_sequences = ' '.join([ output_dictionary[key] for key in input_sequences if key != 0 ]).split() output_input_comparison = pd.DataFrame( [sentence, output_sequences, input_sequences]).T print(output_input_comparison.dropna()) print('\n\n') avg_accuracy /= N print("模型准确率:%.2f%%." % (avg_accuracy * 100))