def split(self, n, ntest, train_amount=1.0, debug=False): """ Split sequence into train and test sets. n - size of subsequences ntest - number of tokens to use for test sequence train_amount - percentage of the total training sequence to use, 0.0-1.0 """ nelements = len(self.sequence) ntrain_total = nelements - ntest if ntrain_total<0: ntrain_total = nelements # for debugging cases ntrain = int(ntrain_total * train_amount) if debug: print('total training tokens available:',ntrain_total) print('ntraining tokens that will be used:',ntrain) print('ntest tokens:', ntest) print('Create train and test sets...') # ~5sec x_train, y_train = util.create_dataset(self.sequence, n=n, noffset=0, nelements=ntrain) x_test, y_test = util.create_dataset(self.sequence, n=n, noffset=ntrain, nelements=ntest) if debug: print('train data size:',len(x_train)) print('test data size:',len(x_test)) # ntest - (n-1) print('x_train sample:') print(x_train[:5]) print('y_train sample:') print(y_train[:5]) return x_train, y_train, x_test, y_test
def main(): """ Main method. """ # create the dataset x_train, y_train = create_dataset() train_and_show(x_train, y_train)
def main(): en, pt = create_dataset(PATH, 10000, 0) print(en[100]) print(pt[100]) translator = EATranslator(pt, en[100], pt[100]) translator.run_evaluations()
def accuracy_dir(self, dir_path): x, y = util.create_dataset(self.sess, self.mtcnn, self.input, self.embedding, dir_path, self.phase_train) predicted_labels = self.clf.predict(x) predicted_names = self.encoder.inverse_transform(predicted_labels) acc = np.mean((predicted_names == y).astype(int)) cannot = np.mean((predicted_names == 'Cannot Recognize').astype(int)) far = 1 - acc - cannot results = { 'VAL': np.mean(acc) * 100, 'FAR': far * 100, 'Cannot Recognize': np.mean(cannot) * 100 } return results
def create_dictionary(): en, pt = create_dataset(PATH, 10000, 0) vocabulary = list(get_vocabulary(en)) dictionary = {} for word in vocabulary: print(word) try: translation = Translator(to_lang='pt-br').translate(word) dictionary.setdefault(word, [translation]) except NotTranslated: dictionary.setdefault(word, [word]) df = pd.DataFrame.from_dict(dictionary, orient='index') df = df.T df.to_csv('en-pt1.csv', index=False) return df
def __init__(self, gallery_dir, name='MyGallery', reuse=False, r=1, p=2): self.path = os.path.join(os.path.dirname(__file__), 'trained_params', name) self.clf = None self.encoder = None self.lmnn = None if (not reuse): x, y = util.create_dataset(self.sess, self.mtcnn, self.input, self.embedding, gallery_dir, self.phase_train) print('Dataset Created\n', 'Input to RNN (X,y):', x.shape) y_copy = y.copy() y_copy.append('Cannot Recognize') self.encoder = LabelEncoder() self.encoder.fit(y_copy) outlier_label = self.encoder.transform(['Cannot Recognize']) self.clf = RadiusNeighborsClassifier(radius=r, weights='distance', outlier_label=outlier_label, p=p) self.clf.fit(x, self.encoder.transform(y)) if not os.path.exists(self.path): os.makedirs(self.path) joblib.dump(self.encoder, os.path.join(self.path, 'enc.joblib.pkl')) joblib.dump(self.clf, os.path.join(self.path, 'clf.joblib.pkl')) else: clf_path = os.path.join(self.path, 'clf.joblib.pkl') self.clf = joblib.load(clf_path) enc_path = os.path.join(self.path, 'enc.joblib.pkl') self.encoder = joblib.load(enc_path)
n_split = len(data) test_data = data[4] # Test set of data test_data = remove_duplicate_images(test_data) trans = [ transforms.Resize((224,224)), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) ] trans = transforms.Compose(trans) test_dataset, test_char_idx = create_dataset(args.root, test_data, trans) # Load model device = "cpu" if args.gpu < 0 else "cuda:{}".format(args.gpu) saved_models = torch.load(args.model_fn, map_location="cpu") trunk, model = create_models(*saved_models["args"]) models = {"trunk": trunk, "embedder": model} for key in models.keys(): models[key].load_state_dict(saved_models[key]) models[key].to(device) colormap = plt.get_cmap("hsv") plots = [] for enroll, color in zip(args.n_enroll, np.linspace(0,0.8,len(args.n_enroll))): print("Enroll", enroll)
train = [] num_graphs = 10 model = None stacks_cap = [] ##################################Create Training Data#################################### for i in range(num_graphs): graph = nx.random_lobster(100,0.9,0.9) learned_stack = LearnedDynamicArray() answer = util.dfs_iterative(graph, 1,learned_stack) train.append(learned_stack.history_n) stacks_cap.append(learned_stack.history_capacity) ##################################Format Training Data#################################### buckets = 20 look_ahead_rate = 2 train_x,train_y,choices = util.create_dataset(train,buckets=buckets,look_ahead_rate = look_ahead_rate) train_x = np.expand_dims(train_x ,2) train_y = np.expand_dims(train_y ,2) ##################################Train Model#################################### lstm_units = 4 model = util.build_lstm_model(lstm_units,buckets) model.fit(train_x,train_y, batch_size=4, epochs=5,verbose=1) ##################################Visualize Results#################################### num_examples_to_view = 10 for i in range(num_examples_to_view): predictions = model.predict(np.expand_dims(train_x[i] ,0)) plt.figure() plt.plot(train[0]) plt.plot(stacks_cap[0])
import matplotlib matplotlib.use('TkAgg') import matplotlib.pyplot as plt import numpy as np from sklearn import datasets print("Hello Logistic Regression") from util import create_dataset x, y = create_dataset(100, insert_x0=False) learning_rate = 0.01 n = len(x) w = np.array([0, 0]) def error(x, y, w): n = len(x) err = 0 for i in range(0, n): err += np.log(1 + np.exp(np.dot(-y[i] * w.T, x[i]))) err = err / n return err err = err = error(x, y, w) iter = 0 max_iter = 1000 while err > 0.01 and iter < max_iter: iter += 1
new_temp = new_temp.loc[new_df[(new_df == 0).sum( axis=1).values < 11].index] new_rain = new_rain.loc[new_df[(new_df == 0).sum( axis=1).values < 11].index] new_df = new_df[(new_df == 0).sum(axis=1).values < 11] ## dataset 생성 함수 불러오기 train_data = train_generator(new_df, new_temp, new_rain, new_trend, timesteps, date(2019, 6, 2), n_range=40, batch_size=30) x_val, y_val = create_dataset(new_df, new_temp, new_rain, new_trend, timesteps, date(2019, 6, 30)) x_test, _ = create_dataset(new_df, new_temp, new_rain, new_trend, timesteps, date(2019, 7, 28)) ## 모델 불러오기 model = makeModel(filter_num, layer_num, dropout_rate, timesteps) optimizer = optimizers.Adam(learning_rate=lr) model.compile(optimizer=optimizer, loss='mean_squared_error') ####훈련 history = model.fit(train_data, steps_per_epoch=steps_per_epoch, workers=1, use_multiprocessing=False, epochs=epochs, verbose=1,
u.create_dataset(args.datasetpath, class_names=class_names, pre_emphasis_coef=args.pre_emphasis_coef, frame_length=args.frame_length, frame_step=args.frame_step, window_function=np.hamming, target_frame_number=args.target_frame_number, random_time_shift=args.random_time_shift, smooth=args.smooth, smooth_length=args.smooth_length, hertz_from=args.hertz_from, hertz_to=None, number_of_filters=args.number_of_filters, power_of_2=args.power_of_2, dtype='float32', use_dct=args.use_dct, add_delta=args.add_delta, # NORMALIZATION shift_static=shift_static, scale_static=scale_static, shift_delta=shift_delta, scale_delta=scale_delta, shift_delta_delta=shift_delta_delta, scale_delta_delta=scale_delta_delta, exclude_augmentation=args.exclude_augmentation, augmentation_folder=args.augmentation_folder, print_info=True)
def run_model(symbol, plot=False): # dataset ohlcv_histories, technical_indicators, next_day_open_values, unscaled_y, y_normaliser = create_dataset( symbol) test_split = 0.9 n = int(ohlcv_histories.shape[0] * test_split) test_length = len(ohlcv_histories) - n ohlcv_train = ohlcv_histories[:n] tech_ind_train = technical_indicators[:n] y_train = next_day_open_values[:n] ohlcv_test = ohlcv_histories[n:] tech_ind_test = technical_indicators[n:] y_test = next_day_open_values[n:] unscaled_y_test = unscaled_y[n:] print("Training Shape " + str(ohlcv_train.shape)) print("Testing Shape " + str(ohlcv_test.shape)) # model architecture # define two sets of inputs lstm_input = Input(shape=(history_points, 5), name='lstm_input') dense_input = Input(shape=(technical_indicators.shape[1], ), name='tech_input') # the first branch operates on the first input x = LSTM(50, name='lstm_0')(lstm_input) x = Dropout(0.2, name='lstm_dropout_0')(x) lstm_branch = Model(inputs=lstm_input, outputs=x) # the second branch opreates on the second input y = Dense(20, name='tech_dense_0')(dense_input) y = Activation("relu", name='tech_relu_0')(y) y = Dropout(0.2, name='tech_dropout_0')(y) technical_indicators_branch = Model(inputs=dense_input, outputs=y) # combine the output of the two branches combined = concatenate( [lstm_branch.output, technical_indicators_branch.output], name='concatenate') z = Dense(64, activation="sigmoid", name='dense_pooling')(combined) z = Dense(1, activation="linear", name='dense_out')(z) # our model will accept the inputs of the two branches and # then output a single value model = Model( inputs=[lstm_branch.input, technical_indicators_branch.input], outputs=z) adam = optimizers.Adam(lr=0.0005) model.compile(optimizer=adam, loss='mse') model.fit(x=[ohlcv_train, tech_ind_train], y=y_train, batch_size=32, epochs=50, shuffle=True, validation_split=0.1, verbose=0) # evaluation y_test_predicted = model.predict([ohlcv_test, tech_ind_test]) y_test_predicted = y_normaliser.inverse_transform(y_test_predicted) y_predicted = model.predict([ohlcv_histories, technical_indicators]) y_predicted = y_normaliser.inverse_transform(y_predicted) assert unscaled_y_test.shape == y_test_predicted.shape real_mse = np.mean(np.square(unscaled_y_test - y_test_predicted)) scaled_mse = real_mse / (np.max(unscaled_y_test) - np.min(unscaled_y_test)) * 100 #print(scaled_mse) if plot: import matplotlib #matplotlib.use('MacOSX') import matplotlib.pyplot as plt plt.gcf().set_size_inches(22, 15, forward=True) start = 0 end = -1 real = plt.plot(unscaled_y_test[start:end], label='real') pred = plt.plot(y_test_predicted[start:end], label='predicted') # real = plt.plot(unscaled_y[start:end], label='real') # pred = plt.plot(y_predicted[start:end], label='predicted') plt.legend(['Real', 'Predicted']) plt.show() from datetime import datetime os.makedirs(f"./files/models/{symbol}", exist_ok=True) model.save(f'files/models/{symbol}/{symbol}_technical_model.h5')
import torch.nn as nn import data import util device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #options from options import options options = options() opts = options.parse() #data loader data_loader = data.dataloader(opts) train_loader = util.create_dataset(data_loader.train_data, data_loader.letteridx, data_loader.labelidx, opts) test_loader = util.create_dataset(data_loader.test_data, data_loader.letteridx, data_loader.labelidx, opts) from network import RNN from train import trainer from test import tester '''RNN model''' RNN = RNN(opts, data_loader.letteridx).to(device) if opts.print_model: print(RNN) '''Optimizers''' import torch.optim as optim
import data import util device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #options from options import options options = options() opts = options.parse() #data loader data_loader = data.dataloader(opts) train_loader = util.create_dataset( [data_loader.train_data, data_loader.train_label], data_loader.wordIdx, data_loader.labelIdx, opts) from network import RNN from train import train from test import test '''RNN model''' RNN = RNN(opts, data_loader.wordIdx, data_loader.labelIdx, len(data_loader.labelIdx.items())).to(device) if opts.print_model: print(RNN) '''Optimizers''' import torch.optim as optim RNN_optim = optim.Adam(RNN.parameters(),
def train_eval(args, train_data, dev_data): logger = logging.getLogger("main") # Create dataset & dataloader trans = [ transforms.Resize((224, 224)), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ] trans = transforms.Compose(trans) train_dataset, train_char_idx = \ create_dataset(args.root, train_data, trans) train_sampler = MetricBatchSampler(train_dataset, train_char_idx, n_max_per_char=args.n_max_per_char, n_batch_size=args.n_batch_size, n_random=args.n_random) train_dataloader = DataLoader(train_dataset, batch_sampler=train_sampler, collate_fn=collate_fn) # number of batches given to trainer n_batch = int(len(train_dataloader)) eval_train_dataloaders = \ prepare_evaluation_dataloaders(args, args.eval_split*3, train_data, trans) eval_dev_dataloaders = \ prepare_evaluation_dataloaders(args, args.eval_split, dev_data, trans) # Construct model & optimizer device = "cpu" if args.gpu < 0 else "cuda:{}".format(args.gpu) trunk, model = create_models(args.emb_dim, args.dropout) trunk.to(device) model.to(device) if args.metric_loss == "triplet": loss_func = losses.TripletMarginLoss( margin=args.margin, normalize_embeddings=args.normalize, smooth_loss=args.smooth) elif args.metric_loss == "arcface": loss_func = losses.ArcFaceLoss(margin=args.margin, num_classes=len(train_data), embedding_size=args.emb_dim) loss_func.to(device) if args.optimizer == "SGD": trunk_optimizer = torch.optim.SGD(trunk.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) model_optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) optimizers = { "trunk_optimizer": trunk_optimizer, "embedder_optimizer": model_optimizer } if args.metric_loss == "arcface": loss_optimizer = torch.optim.SGD(loss_func.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) optimizers["loss_optimizer"] = loss_optimizer elif args.optimizer == "Adam": trunk_optimizer = torch.optim.Adam(trunk.parameters(), lr=args.lr, weight_decay=args.decay) model_optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.decay) optimizers = { "trunk_optimizer": trunk_optimizer, "embedder_optimizer": model_optimizer } if args.metric_loss == "arcface": loss_optimizer = torch.optim.Adam(loss_func.parameters(), lr=args.lr, weight_decay=args.decay) optimizers["loss_optimizer"] = loss_optimizer else: raise NotImplementedError def lr_func(step): if step < args.warmup: return (step + 1) / args.warmup else: steps_decay = step // args.decay_freq return 1 / args.decay_factor**steps_decay trunk_scheduler = torch.optim.lr_scheduler.LambdaLR( trunk_optimizer, lr_func) model_scheduler = torch.optim.lr_scheduler.LambdaLR( model_optimizer, lr_func) schedulers = { "trunk_scheduler": trunk_scheduler, "model_scheduler": model_scheduler } if args.miner == "none": mining_funcs = {} elif args.miner == "batch-hard": mining_funcs = { "post_gradient_miner": miners.BatchHardMiner(use_similarity=True) } best_dev_eer = 1.0 i_epoch = 0 def end_of_epoch_hook(trainer): nonlocal i_epoch, best_dev_eer logger.info(f"EPOCH\t{i_epoch}") if i_epoch % args.eval_freq == 0: train_eer, train_eer_std = evaluate(args, trainer.models["trunk"], trainer.models["embedder"], eval_train_dataloaders) dev_eer, dev_eer_std = evaluate(args, trainer.models["trunk"], trainer.models["embedder"], eval_dev_dataloaders) logger.info("Eval EER (mean, std):\t{}\t{}".format( train_eer, train_eer_std)) logger.info("Eval EER (mean, std):\t{}\t{}".format( dev_eer, dev_eer_std)) if dev_eer < best_dev_eer: logger.info("New best model!") best_dev_eer = dev_eer i_epoch += 1 def end_of_iteration_hook(trainer): for scheduler in schedulers.values(): scheduler.step() trainer = trainers.MetricLossOnly( models={ "trunk": trunk, "embedder": model }, optimizers=optimizers, batch_size=None, loss_funcs={"metric_loss": loss_func}, mining_funcs=mining_funcs, iterations_per_epoch=n_batch, dataset=train_dataset, data_device=None, loss_weights=None, sampler=train_sampler, collate_fn=collate_fn, lr_schedulers=None, end_of_epoch_hook=end_of_epoch_hook, end_of_iteration_hook=end_of_iteration_hook, dataloader_num_workers=1) trainer.train(num_epochs=args.epoch) if args.save_model: save_models = { "trunk": trainer.models["trunk"].state_dict(), "embedder": trainer.models["embedder"].state_dict(), "args": [args.emb_dim] } torch.save(save_models, f"model/{args.suffix}.mdl") return best_dev_eer
def showforcastpricesvalues(symbol, portf_value, forecast_model, forecast_time, start_d, forecast_date, forecast_lookback): ''' :param symbol: Stock symbol :param portf_value: Prices dataframe :param forecast_model: Model for forecasting :param forecast_time: Number of days to forecast in the future :param start_d: Lookback date :param forecast_date: Forecasting date :param forecast_lookback: Number of days to lookback :return: Prices Plot. ''' # XGBoost (1 day forecasting) if forecast_model == 'model1': ## Indicators to use ''' * others_cr: Cumulative Return. (Close) * trend_ema_fast: Fast Exponential Moving Averages(EMA) (Close) * volatility_kcl: Keltner channel (KC) (High, Low, Close)''' # load model model = pickle.load(open("./xgboost.pkl", "rb")) # Bussines days start = forecast_date + dt.timedelta(1) rng = pd.date_range(pd.Timestamp(start), periods=forecast_time, freq='B') bussines_days = rng.strftime('%Y-%m-%d') df_prices = portf_value.copy() # Create datafrane with all TA indicators # TODO Change to Alpha Vantage indicators? df = add_all_ta_features(portf_value, "Open", "High", "Low", "Close", "Volume", fillna=True) # Delete unuseful columns del df['Open'] del df['High'] del df['Low'] del df['Close'] del df['Volume'] # Create 'date' column for posterior index df['date'] = df.index # Rename column for correlation matrix. Can't have spaces. df.rename(columns={'Adj Close': 'Adj_Close'}, inplace=True) # Scale data for using reg:logistic as array scaler = MinMaxScaler(feature_range=(0, 1)) features = df[['others_cr', 'trend_ema_fast', 'volatility_kcl']] dataset_scaled = scaler.fit_transform(features) # Scale Adj_Close scaler1 = MinMaxScaler(feature_range=(0, 1)) feature = df[['Adj_Close']] X_test_scaled = scaler1.fit_transform(feature) # Setting prediction dataframe cols and list for saving predictions cols = ['Price', 'date'] lst = [] # Calculate price prediction = model.predict(dataset_scaled) preds = scaler1.inverse_transform(prediction.reshape(-1, 1)) # Convert array to series mylist = preds.tolist() p = mylist[-1][-1] # Adding value to predictions dataframe for plotting lst.append([p, bussines_days.values[0]]) df_predictions = pd.DataFrame(lst, columns=cols) # Create Report metric = model_report(df_predictions, df_prices) # Plot chart plot_prices_pred = plot_stock_prices_prediction_XGBoost( df_prices, df_predictions, symbol) return symbol, start_d, forecast_date, plot_prices_pred, metric # KNN Model (1 day forecasting) if forecast_model == 'model2': ## Indicators to use ''' * others_cr: Cumulative Return. (Close) * trend_ema_fast: Fast Exponential Moving Averages(EMA) (Close) * volatility_kcl: Keltner channel (KC) (High, Low, Close)''' # load model model = pickle.load(open("./knn.pkl", "rb")) # Bussines days start = forecast_date + dt.timedelta(1) rng = pd.date_range(pd.Timestamp(start), periods=forecast_time, freq='B') bussines_days = rng.strftime('%Y-%m-%d') df_prices = portf_value.copy() # Create datafrane with all TA indicators df = add_all_ta_features(portf_value, "Open", "High", "Low", "Close", "Volume", fillna=True) # Delete unuseful columns del df['Open'] del df['High'] del df['Low'] del df['Close'] del df['Volume'] # Create 'date' column for posterior index df['date'] = df.index # Rename column for correlation matrix. Can't have spaces. df.rename(columns={'Adj Close': 'Adj_Close'}, inplace=True) # Scale data for using reg:logistic as array scaler = MinMaxScaler(feature_range=(0, 1)) features = df[['others_cr', 'trend_ema_fast', 'volatility_kcl']] dataset_scaled = scaler.fit_transform(features) # Scale Adj_Close scaler1 = MinMaxScaler(feature_range=(0, 1)) feature = df[['Adj_Close']] X_test_scaled = scaler1.fit_transform(feature) # Setting prediction dataframe cols and list for saving predictions cols = ['Price', 'date'] lst = [] # Calculate price prediction = model.predict(dataset_scaled) preds = scaler1.inverse_transform(prediction.reshape(-1, 1)) # Convert array to series mylist = preds.tolist() p = mylist[-1][-1] # Adding value to predictions dataframe for plotting lst.append([p, bussines_days.values[0]]) df_predictions = pd.DataFrame(lst, columns=cols) # Create Report metric = model_report(df_predictions, df_prices) # Plot chart plot_prices_pred = plot_stock_prices_prediction_XGBoost( df_prices, df_predictions, symbol) return symbol, start_d, forecast_date, plot_prices_pred, metric # ARIMA if forecast_model == 'model3': # Rolling forecasts ''' Load the model and use it in a rolling-forecast manner, updating the transform and model for each time step. This is the preferred method as it is how one would use this model in practice as it would achieve the best performance. ''' df_prices = portf_value.copy() # Bussines days start = forecast_date rng = pd.date_range(pd.Timestamp(start), periods=forecast_time, freq='B') bussines_days = rng.strftime('%Y-%m-%d') # Setting prediction dataframe cols and list for adding rows to dataframe cols = ['Price', 'date', 'lower_band', 'upper_band', 'Std. Error'] lst = [] # Create date column to save next date portf_value['date'] = portf_value.index # Forecasting for every business day for i in bussines_days: # load the dataset dataset = np.array(portf_value.iloc[:, 0].tolist())[np.newaxis] dataset = dataset.T dataset = dataset.astype('float32') # normalize the dataset scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) # predict model = ARIMA(dataset, order=(4, 0, 1)) model_fit = model.fit(disp=0) yhat, se, conf = model_fit.forecast(alpha=0.05) # Prediction Inverse scale prediction = yhat[0].reshape(-1, 1) futurePredict = scaler.inverse_transform(prediction) # Confidence intervals inverse transform inv_scaled_conf = scaler.inverse_transform(conf[0][0].reshape( 1, -1)) inv_scaled_conf1 = scaler.inverse_transform(conf[0][1].reshape( 1, -1)) # Confident intervals lower_band = inv_scaled_conf[0][0] upper_band = inv_scaled_conf1[0][0] # Adding last prediction to portf_value prediction = futurePredict.item(0) portf_value.loc[len(portf_value)] = [prediction, i] portf_value.index = portf_value['date'] # Adding value to predictions dictionary lst.append([prediction, i, lower_band, upper_band, se]) # Setting dataframe for predictions and confident intervals df = pd.DataFrame(lst, columns=cols) # Order confidence values for plotting lower_list = df['lower_band'].tolist() lower_list.sort(reverse=True) upper_list = df['upper_band'].tolist() upper_list.sort() # Adding confidence bands data to dataframe df['lower_band'] = lower_list df['upper_band'] = upper_list df.set_index('date', inplace=True) df.rename(columns={0: 'Price'}, inplace=True) # Prepare dataframe for metrics df_predictions = df[['Price']].copy() df_predictions['date'] = df_predictions.index # Reset index df_predictions.reset_index(drop=True) # Create Report metric = model_report(df_predictions, df_prices) # TODO Accuracy metrics https://www.machinelearningplus.com/time-series/arima-model-time-series-forecasting-python/ # Plot chart plot_prices_pred = plot_stock_prices_prediction_ARIMA( df_prices, df, symbol) return symbol, start_d, forecast_date, plot_prices_pred, metric # LSTM if forecast_model == 'model4': # load_model model = load_model('./lstm_model') df_prices = portf_value.copy() # Bussines days start = forecast_date + dt.timedelta(1) rng = pd.date_range(pd.Timestamp(start), periods=forecast_time, freq='B') bussines_days = rng.strftime('%Y-%m-%d') # Setting prediction dataframe cols and list for adding rows to dataframe cols = ['Price', 'date'] lst = [] # Create date column to save next date portf_value['date'] = portf_value.index for i in bussines_days: # load the dataset dataset = np.array(portf_value.iloc[:, 0].tolist())[np.newaxis] dataset = dataset.T dataset = dataset.astype('float32') # normalize the dataset scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) # prepare the X and Y label X, y = create_dataset(dataset) # Take 80% of data as the training sample and 20% as testing sample trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.20, shuffle=False) # reshape input to be [samples, time steps, features] trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) # Prediction testPredict = model.predict(testX) futurePredict = model.predict(np.asarray([[testPredict[-1]]])) futurePredict = scaler.inverse_transform(futurePredict) prediction = futurePredict.item(0) # Adding last prediction to portf_value portf_value.loc[len(portf_value)] = [prediction, i] portf_value.index = portf_value['date'] # Adding value to predictions dataframe lst.append([prediction, i]) df_predictions = pd.DataFrame(lst, columns=cols) # Create Report metric = model_report(df_predictions, df_prices) # Plot chart plot_prices_pred = plot_stock_prices_prediction_LSTM( df_prices, df_predictions, symbol) return symbol, start_d, forecast_date, plot_prices_pred, metric
from sentiment_analyser import SentimentAnalyser import logging logging.getLogger('tensorflow').disabled = True from mf import RecommenderSystem from util import get_sentiment_data, get_reviews_for_id, get_user_id, get_final_score, get_recommendation_data, create_dataset from variables import table_name, db_url from apscheduler.schedulers.background import BackgroundScheduler scheduler = BackgroundScheduler() ''' python -W ignore bellarena.py 1187 users 147 cloths ''' create_dataset() data = pd.read_sql_table(table_name, db_url) recommendations = RecommenderSystem(data) recommendations.run() analyser = SentimentAnalyser(data) analyser.run() def train_task(): recommendations.run_finetune_mf() if __name__ == "__main__": scheduler.add_job(func=train_task, trigger="interval", seconds=300) scheduler.start()
def train_eval(args, train_data, dev_data): logger = logging.getLogger("main") # Create dataset & dataloader trans = [ transforms.Resize((224, 224)), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ] trans = transforms.Compose(trans) train_dataset, train_char_idx = \ create_dataset(args.root, train_data, trans) train_sampler = MetricBatchSampler(train_dataset, train_char_idx, n_max_per_char=args.n_max_per_char, n_batch_size=args.n_batch_size, n_random=args.n_random) train_dataloader = DataLoader(train_dataset, batch_sampler=train_sampler, collate_fn=collate_fn) # number of batches given to trainer n_batch = int(len(train_dataloader)) eval_train_dataloaders = \ prepare_evaluation_dataloaders(args, args.eval_split*3, train_data, trans) eval_dev_dataloaders = \ prepare_evaluation_dataloaders(args, args.eval_split, dev_data, trans) # Construct model & optimizer device = "cpu" if args.gpu < 0 else "cuda:{}".format(args.gpu) trunk = models.resnet18(pretrained=True) trunk_output_size = trunk.fc.in_features trunk.fc = Identity() trunk.to(device) model = nn.Sequential(nn.Linear(trunk_output_size, args.emb_dim), Normalize()) model.to(device) if args.optimizer == "SGD": trunk_optimizer = torch.optim.SGD(trunk.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) model_optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) else: raise NotImplementedError loss_func = losses.TripletMarginLoss(margin=args.margin, normalize_embeddings=args.normalize) best_dev_eer = 1.0 i_epoch = 0 def end_of_epoch_hook(trainer): nonlocal i_epoch, best_dev_eer logger.info(f"EPOCH\t{i_epoch}") if i_epoch % args.eval_freq == 0: train_eer, train_eer_std = evaluate(args, trainer.models["trunk"], trainer.models["embedder"], eval_train_dataloaders) dev_eer, dev_eer_std = evaluate(args, trainer.models["trunk"], trainer.models["embedder"], eval_dev_dataloaders) logger.info("Eval EER (mean, std):\t{}\t{}".format( train_eer, train_eer_std)) logger.info("Eval EER (mean, std):\t{}\t{}".format( dev_eer, dev_eer_std)) if dev_eer < best_dev_eer: logger.info("New best model!") best_dev_eer = dev_eer i_epoch += 1 trainer = trainers.MetricLossOnly( models={ "trunk": trunk, "embedder": model }, optimizers={ "trunk_optimizer": trunk_optimizer, "embedder_optimizer": model_optimizer }, batch_size=None, loss_funcs={"metric_loss": loss_func}, mining_funcs={}, iterations_per_epoch=n_batch, dataset=train_dataset, data_device=None, loss_weights=None, sampler=train_sampler, collate_fn=collate_fn, lr_schedulers=None, #TODO: use warm-up, end_of_epoch_hook=end_of_epoch_hook, dataloader_num_workers=1) trainer.train(num_epochs=args.epoch) if args.save_model: torch.save(trainer.models, f"model/{args.suffix}.mdl") return best_dev_eer
def test(cmd_arg, data, gloveVectors): image, caption, keys, word = create_dataset(data) batch_size = cmd_arg.batchSize if image.shape[0] % batch_size != 0: idx = range((keys.shape[0] // batch_size) * batch_size) keys = keys[idx] image = image[idx, :, :] caption = caption[idx, :, :] word = [word[x] for x in idx] indices = np.arange(image.shape[0]) embedding_dimension = cmd_arg.embedSize # max(image.shape[2], caption.shape[2]) indices_copy = np.copy(indices) with tf.Graph().as_default(): image_placeholder = tf.placeholder(dtype=tf.float32, \ shape=[cmd_arg.batchSize, \ image.shape[2]]) caption_placeholder = tf.placeholder(dtype=tf.float32, \ shape=[cmd_arg.batchSize, \ caption.shape[2]]) word_placeholder = tf.placeholder(dtype=tf.float32, \ shape=[cmd_arg.batchSize, \ 300]) scaled_image_placeholder = scale_data(image_placeholder, min=0.0, max=65.0) # scaled_image_placeholder = tf.pow(image_placeholder, 1.0) # 1.0 for no scaling image_embedding_tensor, caption_embedding_tensor, word_embedding_tensor = model(scaled_image_placeholder, \ caption_placeholder, \ word_placeholder, \ embedding_dimension, cmd_arg.numberLayers, \ activation_fn(cmd_arg.activation), \ 'image_embedding', 'caption_embedding', 'word_embedding') saver = tf.train.Saver() session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True with tf.Session(config=session_config) as session: saver.restore( session, tf.train.latest_checkpoint(cmd_arg.experimentDirectory)) print('Restored Checkpoint: ', tf.train.latest_checkpoint(cmd_arg.experimentDirectory)) image_weights = [session.run(tf.contrib.framework.get_variables_by_name(\ 'image_embedding/image_embedding_'+ str(i+1)+'/weights')[0]) \ for i in range(cmd_arg.numberLayers)] image_biases = [session.run(tf.contrib.framework.get_variables_by_name(\ 'image_embedding/image_embedding_'+ str(i+1)+'/biases')[0]) \ for i in range(cmd_arg.numberLayers)] caption_weights = [session.run(tf.contrib.framework.get_variables_by_name(\ 'caption_embedding/caption_embedding_'+ str(i+1)+'/weights')[0]) \ for i in range(cmd_arg.numberLayers)] caption_biases = [session.run(tf.contrib.framework.get_variables_by_name(\ 'caption_embedding/caption_embedding_'+ str(i+1)+'/biases')[0]) \ for i in range(cmd_arg.numberLayers)] word_weights = [session.run(tf.contrib.framework.get_variables_by_name(\ 'word_embedding/word_embedding_'+ str(i+1)+'/weights')[0]) \ for i in range(cmd_arg.numberLayers)] word_biases = [session.run(tf.contrib.framework.get_variables_by_name(\ 'word_embedding/word_embedding_'+ str(i+1)+'/biases')[0]) \ for i in range(cmd_arg.numberLayers)] image_weights_inv = [np.linalg.pinv(x) for x in image_weights] caption_weights_inv = [np.linalg.pinv(x) for x in caption_weights] word_weights_inv = [np.linalg.pinv(x) for x in word_weights] all_cc_embedding = [] all_ic_embedding = [] all_wc_embedding = [] all_ii_embedding = [] all_ci_embedding = [] all_wi_embedding = [] all_iw_embedding = [] all_cw_embedding = [] all_ww_embedding = [] all_image_embedding = [] all_caption_embedding = [] all_word_embedding = [] all_original_caption = [] all_original_image = [] all_valid_key = [] for i in range(0, image.shape[0], batch_size): # for i in range(1): valid_image, valid_caption, valid_word, valid_keys, indices_copy, _ =\ create_batch(image, caption, word, keys, indices_copy, \ batch_size, gloveVectors) image_embedding, caption_embedding, word_embedding = session.run([image_embedding_tensor, \ caption_embedding_tensor, word_embedding_tensor], feed_dict={ image_placeholder: valid_image, caption_placeholder: valid_caption, word_placeholder: valid_word }) all_image_embedding.extend(image_embedding) all_caption_embedding.extend(caption_embedding) all_word_embedding.extend(word_embedding) # from test_three_branch_pkl import save_image ii_vector = inversion(image_embedding, image_weights_inv, image_biases, cmd_arg.activation, isTranspose=False) ic_vector = inversion(image_embedding, caption_weights_inv, caption_biases, cmd_arg.activation, isTranspose=False) iw_vector = inversion(image_embedding, word_weights_inv, word_biases, cmd_arg.activation, isTranspose=False) ci_vector = inversion(caption_embedding, image_weights_inv, image_biases, cmd_arg.activation, isTranspose=False) cc_vector = inversion(caption_embedding, caption_weights_inv, caption_biases, cmd_arg.activation, isTranspose=False) cw_vector = inversion(caption_embedding, word_weights_inv, word_biases, cmd_arg.activation, isTranspose=False) wi_vector = inversion(word_embedding, image_weights_inv, image_biases, cmd_arg.activation, isTranspose=False) wc_vector = inversion(word_embedding, caption_weights_inv, caption_biases, cmd_arg.activation, isTranspose=False) ww_vector = inversion(word_embedding, word_weights_inv, word_biases, cmd_arg.activation, isTranspose=False) all_ii_embedding.extend(ii_vector) all_ic_embedding.extend(ic_vector) all_iw_embedding.extend(iw_vector) all_ci_embedding.extend(ci_vector) all_cc_embedding.extend(cc_vector) all_cw_embedding.extend(cw_vector) all_wi_embedding.extend(wi_vector) all_wc_embedding.extend(wc_vector) all_ww_embedding.extend(ww_vector) all_original_caption.extend(valid_caption) all_original_image.extend(valid_image) all_valid_key.extend(valid_keys) print("Completed:", (i + 1) // batch_size, "/", image.shape[0] // batch_size) with open( os.path.join(cmd_arg.experimentDirectory, 'validation_inversion_embedding.pkl'), 'wb') as f: pkl.dump( { "image_keys": all_valid_key, "cc_embedding": all_cc_embedding, "ic_embedding": all_ic_embedding, "wc_embedding": all_wc_embedding, "ci_embedding": all_ci_embedding, "wi_embedding": all_wi_embedding, "ii_embedding": all_ii_embedding, "cw_embedding": all_cw_embedding, "iw_embedding": all_iw_embedding, "ww_embedding": all_ww_embedding, "image_embedding": all_image_embedding, "caption_embedding": all_caption_embedding, "word_embedding": all_word_embedding, "orig_image_embedding": all_original_image, "orig_caption_embedding": all_original_caption }, f)
# Hyper Parameters # HIDDEN_SIZE 调整为100、200,或 NUM_LAYERS 调整为 2,不明白为什么始终无法拟合❔😓 TIME_STEP = 1 INPUT_SIZE = 60 HIDDEN_SIZE = 50 NUM_LAYERS = 1 OUTPUT_SIZE = 1 # 加载预处理过(正则化)的训练数据 training_set_scaled = np.load('input/000001.XSHE_train.npy') scaler = joblib.load('encoder/standard_scaler.close.pkl') # Creating a data structure with 60 timesteps and 1 output train_X, train_y = util.create_dataset(training_set_scaled, input_size=INPUT_SIZE) # print(train_X.shape) ''' (672, 60) ''' # Reshape为 (batch, time_step, input_size), 这是放入LSTM的shape train_X = train_X.reshape(train_X.shape[0], 1, train_X.shape[1]) # print(train_X.shape) ''' (672, 1, 60) ''' # Part 2 - Building the RNN rnn = RNN(INPUT_SIZE, HIDDEN_SIZE, NUM_LAYERS, OUTPUT_SIZE) optimiser = torch.optim.Adam(rnn.parameters(), lr=args.lr)
training_x, validation_x, test_x, training_y, validation_y, test_y = \ u.create_dataset(input_path=input_path, class_names=class_names, pre_emphasis_coef=0.95, frame_length=400, frame_step=160, window_function=np.hamming, target_frame_number=110, random_time_shift=True, smooth=True, smooth_length=5, hertz_from=300, hertz_to=None, number_of_filters=40, power_of_2=True, dtype='float32', use_dct=False, add_delta=True, # NORMALIZATION shift_static=0, scale_static=1, shift_delta=0, scale_delta=1, shift_delta_delta=0, scale_delta_delta=1, exclude_augmentation=False, augmentation_folder="augmentation", print_info=True)
def train(cmd_arg, data, gloveVectors): image, caption, keys, word = create_dataset(data) indices = np.arange(image.shape[0]) batch_size = cmd_arg.batchSize embedding_dimension = cmd_arg.embedSize #max(image.shape[2], caption.shape[2]) indices_copy = np.copy(indices) noise_amp = cmd_arg.embedNoise with tf.Graph().as_default(): image_placeholder = tf.placeholder(dtype=tf.float32, \ shape=[batch_size, image.shape[2]]) caption_placeholder = tf.placeholder(dtype=tf.float32, \ shape=[batch_size, caption.shape[2]]) word_placeholder = tf.placeholder(dtype=tf.float32, \ shape=[batch_size, 300]) label_matrix_placeholder = tf.placeholder(dtype=tf.float32, \ shape=[batch_size, batch_size]) scaled_image_placeholder = scale_data(image_placeholder, min=0.0, max=65.0) # scaled_image_placeholder = tf.pow(image_placeholder, 1.0) # 1.0 for no scaling # noisy_image_placeholder = tf.add(scaled_image_placeholder, \ # tf.random_uniform(scaled_image_placeholder.get_shape(), minval=-0.1, maxval=0.1)) # noisy_caption_placeholder = tf.add(caption_placeholder, \ # tf.random_uniform(caption_placeholder.get_shape(), minval=-0.1, maxval=0.1)) # noisy_word_placeholder = tf.add(word_placeholder, \ # tf.random_uniform(word_placeholder.get_shape(), minval=0.0, maxval=0.0)) image_embedding_tensor_in, caption_embedding_tensor_in, word_embedding_tensor_in = model(scaled_image_placeholder, \ caption_placeholder, \ word_placeholder, \ embedding_dimension, cmd_arg.numberLayers, \ activation_fn(cmd_arg.activation), \ 'image_embedding', 'caption_embedding', 'word_embedding') image_embedding_tensor = image_embedding_tensor_in + tf.random_uniform(image_embedding_tensor_in.get_shape(), minval=-1*noise_amp, maxval=noise_amp) caption_embedding_tensor = caption_embedding_tensor_in + tf.random_uniform(caption_embedding_tensor_in.get_shape(), minval=-1*noise_amp, maxval=noise_amp) word_embedding_tensor = word_embedding_tensor_in + tf.random_uniform(word_embedding_tensor_in.get_shape(), minval=-1*noise_amp, maxval=noise_amp) ic_positive_distance, ic_negative_distance, distance, labels = pairwise_distance(image_embedding_tensor, caption_embedding_tensor, label_matrix_placeholder, cmd_arg.margin) image_positive_distance, image_negative_distance, _, _ = pairwise_distance(image_embedding_tensor, image_embedding_tensor, label_matrix_placeholder, cmd_arg.margin) cw_positive_distance, cw_negative_distance, _, _ = pairwise_distance(caption_embedding_tensor, word_embedding_tensor, label_matrix_placeholder, cmd_arg.margin) caption_positive_distance, caption_negative_distance, _, _ = pairwise_distance(caption_embedding_tensor, caption_embedding_tensor, label_matrix_placeholder, cmd_arg.margin) wi_positive_distance, wi_negative_distance, _, _ = pairwise_distance(word_embedding_tensor, image_embedding_tensor, label_matrix_placeholder, cmd_arg.margin) word_positive_distance, word_negative_distance, _, _ = pairwise_distance(word_embedding_tensor, word_embedding_tensor, label_matrix_placeholder, cmd_arg.margin) total_loss = image_positive_distance + caption_positive_distance + word_positive_distance \ + image_negative_distance + caption_negative_distance + word_negative_distance \ + 1.0 * ic_positive_distance + cw_positive_distance + 1.0 * wi_positive_distance \ + ic_negative_distance + cw_negative_distance + wi_negative_distance tf.summary.scalar('loss', total_loss) tf.summary.scalar('image_positive_distance', image_positive_distance) tf.summary.scalar('caption_positive_distance', caption_positive_distance) tf.summary.scalar('word_positive_distance', word_positive_distance) tf.summary.scalar('image_negative_distance', image_negative_distance) tf.summary.scalar('caption_negative_distance', caption_negative_distance) tf.summary.scalar('word_negative_distance', word_negative_distance) tf.summary.scalar('ic_positive_distance', ic_positive_distance) tf.summary.scalar('cw_positive_distance', cw_positive_distance) tf.summary.scalar('wi_positive_distance', wi_positive_distance) tf.summary.scalar('ic_negative_distance', ic_negative_distance) tf.summary.scalar('cw_negative_distance', cw_negative_distance) tf.summary.scalar('wi_negative_distance', wi_negative_distance) global_step_tensor = tf.Variable(0, trainable=False) # train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(total_loss,\ # global_step_tensor) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) gvs = optimizer .compute_gradients(total_loss) capped_gvs = [(tf.clip_by_value(grad, -1.0, 1.0), var) for grad, var in gvs] train_op = optimizer.apply_gradients(capped_gvs, global_step_tensor) saver = tf.train.Saver() session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True summary_tensor = tf.summary.merge_all() epoch_per_iteration = image.shape[0]//cmd_arg.batchSize with tf.Session(config=session_config) as session: summary_writer = tf.summary.FileWriter(os.path.join(cmd_arg.experimentDirectory, 'train'), graph=tf.get_default_graph()) session.run([tf.global_variables_initializer()]) for i in range(cmd_arg.totalIteration): if indices_copy.shape[0] == 0: indices_copy = np.copy(indices) train_image, train_caption, train_word, train_keys, indices_copy, label_matrix = \ create_batch(image, caption, word, keys, indices_copy, \ batch_size, gloveVectors) _, loss, im_pos, cap_pos, wor_pos, im_neg, cap_neg, wor_neg, \ ic_pos, cw_pos, wi_pos, ic_neg, cw_neg, wi_neg, dist, labs, summary, \ global_step = session.run([train_op, total_loss, \ image_positive_distance, caption_positive_distance, word_positive_distance, \ image_negative_distance, caption_negative_distance, word_negative_distance, \ ic_positive_distance, cw_positive_distance, wi_positive_distance, \ ic_negative_distance, cw_negative_distance, wi_negative_distance, distance, labels, \ summary_tensor, global_step_tensor], feed_dict={ image_placeholder: train_image, caption_placeholder: train_caption, word_placeholder: train_word, label_matrix_placeholder: label_matrix }) epoch = i//epoch_per_iteration + 1 iteration = i%epoch_per_iteration + 1 print ("Epoch: ", epoch, ", Iteration: ", iteration, " Total loss: ", loss) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : Img pos : ", im_pos) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : Cap pos : ", cap_pos) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : Wor pos : ", wor_pos) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : Img neg : ", im_neg) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : Cap neg : ", cap_neg) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : Wor neg : ", wor_neg) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : IC pos : ", ic_pos) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : CW pos : ", cw_pos) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : WI pos : ", wi_pos) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : IC neg : ", ic_neg) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : CW neg : ", cw_neg) print ("Epoch: ", epoch, ", Iteration: ", iteration, " : WI neg : ", wi_neg) # import pdb # pdb.set_trace() summary_writer.add_summary(summary, global_step) if (i+1)%cmd_arg.saveIteration == 0: saver.save(session, os.path.join(cmd_arg.experimentDirectory, \ 'model.ckpt'+str(global_step)))