def create_net(num_classes, dnn='resnet20', **kwargs): ext = None if dnn in ['resnet20', 'resnet56', 'resnet110']: net = models.__dict__[dnn](num_classes=num_classes) elif dnn == 'resnet50': net = models.__dict__['resnet50'](num_classes=num_classes) elif dnn == 'mnistnet': net = MnistNet() elif dnn == 'mnistflnet': net = MnistFLNet() elif dnn == 'cifar10flnet': net = Cifar10FLNet() elif dnn == 'vgg16': net = models.VGG(dnn.upper()) elif dnn == 'alexnet': net = torchvision.models.alexnet() elif dnn == 'lstman4': net, ext = models.LSTMAN4(datapath=kwargs['datapath']) elif dnn == 'lstm': net = lstmpy.lstm(vocab_size=kwargs['vocab_size'], batch_size=kwargs['batch_size']) else: errstr = 'Unsupport neural network %s' % dnn logger.error(errstr) raise errstr return net, ext
def __init__(self, batch_size=100, channels=1, g_dim=128, z_dim=10, rnn_size=256, prior_rnn_layers=1, posterior_rnn_layers=1, predictor_rnn_layers=2, opt=None): super().__init__() self.batch_size = batch_size self.channels = channels self.g_dim = g_dim self.z_dim = z_dim self.rnn_size = rnn_size self.prior_rnn_layers = prior_rnn_layers self.posterior_rnn_layers = posterior_rnn_layers self.predictor_rnn_layers = predictor_rnn_layers self.opt = opt # LSTMs self.frame_predictor = lstm_models.lstm( self.g_dim + self.z_dim + 1 + 1, self.g_dim, self.rnn_size, self.predictor_rnn_layers, self.batch_size) self.posterior = lstm_models.gaussian_lstm( self.g_dim + self.g_dim + 1 + 1, self.z_dim, self.rnn_size, self.posterior_rnn_layers, self.batch_size) self.prior = lstm_models.gaussian_lstm(self.g_dim + self.g_dim + 1 + 1, self.z_dim, self.rnn_size, self.prior_rnn_layers, self.batch_size) # encoder & decoder if opt.dataset == 'h36m': self.encoder = opt.backbone_net.encoder(out_dim=self.g_dim, h_dim=self.g_dim) self.decoder = opt.backbone_net.decoder(in_dim=self.g_dim, h_dim=self.g_dim) else: self.encoder = opt.backbone_net.encoder(self.g_dim, self.channels) self.decoder = opt.backbone_net.decoder(self.g_dim, self.channels) # optimizer opt.optimizer = optim.Adam # criterions self.mse_criterion = nn.MSELoss() # recon and cpc self.kl_criterion = criterion.KLCriterion(opt=self.opt) self.align_criterion = nn.MSELoss() self.init_weight() self.init_optimizer()
def CreatNet(opt): name = opt.model_name label_num = opt.label if name == 'lstm': net = lstm.lstm(opt.input_size, opt.time_step, input_nc=opt.input_nc, num_classes=label_num) elif name == 'cnn_1d': net = cnn_1d.cnn(opt.input_nc, num_classes=label_num) elif name == 'resnet18_1d': net = resnet_1d.resnet18() net.conv1 = nn.Conv1d(opt.input_nc, 64, 7, 2, 3, bias=False) net.fc = nn.Linear(512, label_num) elif name == 'resnet34_1d': net = resnet_1d.resnet34() net.conv1 = nn.Conv1d(opt.input_nc, 64, 7, 2, 3, bias=False) net.fc = nn.Linear(512, label_num) elif name == 'multi_scale_resnet_1d': net = multi_scale_resnet_1d.Multi_Scale_ResNet(inchannel=opt.input_nc, num_classes=label_num) elif name == 'micro_multi_scale_resnet_1d': net = micro_multi_scale_resnet_1d.Multi_Scale_ResNet( inchannel=opt.input_nc, num_classes=label_num) elif name == 'multi_scale_resnet': net = multi_scale_resnet.Multi_Scale_ResNet(inchannel=opt.input_nc, num_classes=label_num) elif name == 'dfcnn': net = dfcnn.dfcnn(num_classes=label_num) elif name in ['resnet101', 'resnet50', 'resnet18']: if name == 'resnet101': net = resnet.resnet101(pretrained=False) net.fc = nn.Linear(2048, label_num) elif name == 'resnet50': net = resnet.resnet50(pretrained=False) net.fc = nn.Linear(2048, label_num) elif name == 'resnet18': net = resnet.resnet18(pretrained=False) net.fc = nn.Linear(512, label_num) net.conv1 = nn.Conv2d(opt.input_nc, 64, 7, 2, 3, bias=False) elif 'densenet' in name: if name == 'densenet121': net = densenet.densenet121(pretrained=False, num_classes=label_num) elif name == 'densenet201': net = densenet.densenet201(pretrained=False, num_classes=label_num) elif name == 'squeezenet': net = squeezenet.squeezenet1_1(pretrained=False, num_classes=label_num, inchannel=1) return net
def create_net(num_classes, dnn='resnet20', **kwargs): ext = None if dnn in ['resnet20', 'resnet56', 'resnet110']: net = models.__dict__[dnn](num_classes=num_classes) elif dnn == 'resnet50': net = torchvision.models.resnet50(num_classes=num_classes) elif dnn == 'resnet101': net = torchvision.models.resnet101(num_classes=num_classes) elif dnn == 'resnet152': net = torchvision.models.resnet152(num_classes=num_classes) elif dnn == 'densenet121': net = torchvision.models.densenet121(num_classes=num_classes) elif dnn == 'densenet161': net = torchvision.models.densenet161(num_classes=num_classes) elif dnn == 'densenet201': net = torchvision.models.densenet201(num_classes=num_classes) elif dnn == 'inceptionv4': net = models.inceptionv4(num_classes=num_classes) elif dnn == 'inceptionv3': net = torchvision.models.inception_v3(num_classes=num_classes) elif dnn == 'vgg16i': # vgg16 for imagenet net = torchvision.models.vgg16(num_classes=num_classes) elif dnn == 'googlenet': net = models.googlenet() elif dnn == 'mnistnet': net = MnistNet() elif dnn == 'fcn5net': net = models.FCN5Net() elif dnn == 'lenet': net = models.LeNet() elif dnn == 'lr': net = models.LinearRegression() elif dnn == 'vgg16': net = models.VGG(dnn.upper()) elif dnn == 'alexnet': #net = models.AlexNet() net = torchvision.models.alexnet() elif dnn == 'lstman4': net, ext = models.LSTMAN4(datapath=kwargs['datapath']) elif dnn == 'lstm': # model = lstm(embedding_dim=args.hidden_size, num_steps=args.num_steps, batch_size=args.batch_size, # vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.dp_keep_prob) net = lstmpy.lstm(vocab_size=kwargs['vocab_size'], batch_size=kwargs['batch_size']) else: errstr = 'Unsupport neural network %s' % dnn logger.error(errstr) raise errstr return net, ext
def __init__(self, params): self.params = params self.loss_function = nn.MSELoss().cuda() # choose device self.cuda = params["cuda"] and torch.cuda.is_available() torch.manual_seed(params["seed"]) # Fix numeric divergence due to bug in Cudnn torch.backends.cudnn.benchmark = True self.device = torch.device("cuda" if self.cuda else "cpu") # Initialize model if params["noreload"]: self.frame_predictor = lstm_models.lstm(params["g_dim"] + params["z_dim"]+params["action_size"], params["g_dim"], params["rnn_size"], params["predictor_rnn_layers"], params["batch_size"]).cuda() self.posterior = lstm_models.gaussian_lstm(params["g_dim"], params["z_dim"], params["rnn_size"], params["posterior_rnn_layers"], params["batch_size"]).cuda() self.encoder = model.encoder(params["g_dim"], params["n_channels"]).cuda() self.decoder = model.decoder(params["g_dim"], params["n_channels"]).cuda() else: self.load_checkpoint() self.frame_predictor.apply(svp_utils.init_weights) self.posterior.apply(svp_utils.init_weights) self.encoder.apply(svp_utils.init_weights) self.decoder.apply(svp_utils.init_weights) # Init optimizers self.frame_predictor_optimizer = optim.Adam(self.frame_predictor.parameters(), lr=params["learning_rate"], betas=(params["beta1"], 0.999)) self.posterior_optimizer = optim.Adam(self.posterior.parameters(), lr=params["learning_rate"], betas=(params["beta1"], 0.999)) self.encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=params["learning_rate"], betas=(params["beta1"], 0.999)) self.decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=params["learning_rate"], betas=(params["beta1"], 0.999)) if params["plot_visdom"]: self.plotter = VisdomLinePlotter(env_name=params['env']) self.img_plotter = VisdomImagePlotter(env_name=params['env']) # Select transformations transform = transforms.Lambda( lambda x: np.transpose(x, (0, 3, 1, 2)) / 255) self.train_loader = DataLoader( RolloutSequenceDataset(params["path_data"], params["seq_len"], transform, buffer_size=params["train_buffer_size"]), batch_size=params['batch_size'], num_workers=2, shuffle=True, drop_last=True) self.test_loader = DataLoader( RolloutSequenceDataset(params["path_data"], params["seq_len"], transform, train=False, buffer_size=params["test_buffer_size"]), batch_size=params['batch_size'], num_workers=2, shuffle=False, drop_last=True)
def __init__(self, input_channels, hidden_channels, kernel_size, batch_size): super(CLSTM_upper, self).__init__() # temp predictor_rnn_layers = 2 hidden_dim = 128 rnn_size = 256 #batch_size = 20 # Initialize encoder and decoder self.encoder = ed_model.encoder(hidden_dim, input_channels) self.decoder = ed_model.decoder(hidden_dim, input_channels) self.encoder.apply(init_weights) self.decoder.apply(init_weights) # Initialize frame predictor self.frame_predictor = lstm_models.lstm(hidden_dim, hidden_dim, rnn_size, predictor_rnn_layers, batch_size) self.frame_predictor.apply(init_weights)
def run_lstm(d, x_train, x_test, y_train, y_test): model = lstm.lstm((11, args.window, 1)) model.summary() tb_callback = TensorBoard(log_dir=d, histogram_freq=0, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None) model.fit(x_train, y_train, epochs=128, validation_data=(x_test, y_test), batch_size=11, callbacks=[tb_callback], shuffle=False) return model
def trainLSTM(): global trainFilePath, testFilePath, corpus X_train, y_train = corpus.loadFile(filePath=trainFilePath) X_test, y_test = corpus.loadFile(filePath=testFilePath) model = lstm(corpus) metricHistory = MetricHistory(X_test, y_test) model.fit(X_train, y_train, epochs=10, batch_size=128, validation_split=0.1, shuffle=True, callbacks=[metricHistory]) y_prob = model.predict(X_train) y_predict = MyCorpus.hybrid(y_prob, threshold=0.5) predictions = MyCorpus.oneHotDecode(y_predict) with open(metricHistory.saveDir + 'LSTM.pkl', 'wb') as outputFile: pickle.dump(predictions, outputFile, pickle.HIGHEST_PROTOCOL)
def create_net(num_classes, dnn='resnet20', **kwargs): ext = None if dnn in ['resnet20', 'resnet56', 'resnet110']: net = models.__dict__[dnn](num_classes=num_classes) elif dnn == 'resnet50': #net = models.__dict__['resnet50'](num_classes=num_classes) net = torchvision.models.resnet50(num_classes=num_classes) elif dnn == 'inceptionv4': net = models.inceptionv4(num_classes=num_classes) elif dnn == 'inceptionv3': net = torchvision.models.inception_v3(num_classes=num_classes) elif dnn == 'vgg16i': # vgg16 for imagenet net = torchvision.models.vgg16(num_classes=num_classes) elif dnn == 'vgg19': # vgg19 for imagenet net = torchvision.models.vgg19(num_classes=num_classes) elif dnn == 'googlenet': net = models.googlenet() elif dnn == 'mnistnet': net = MnistNet() elif dnn == 'fcn5net': net = models.FCN5Net() elif dnn == 'lenet': net = models.LeNet() elif dnn == 'lr': net = models.LinearRegression() elif dnn == 'vgg16': net = models.VGG(dnn.upper()) elif dnn == 'alexnet': net = torchvision.models.alexnet() elif dnn == 'lstman4': net, ext = models.LSTMAN4(datapath=kwargs['datapath']) elif dnn == 'lstm': net = lstmpy.lstm(vocab_size=kwargs['vocab_size'], batch_size=kwargs['batch_size']) else: errstr = 'Unsupport neural network %s' % dnn logger.error(errstr) raise errstr return net, ext
def load_model(f, model='', is_train=1): try: ps = pickle.load(open(f, 'rb')) except: print('file not found') sys.exit(1) if model == '' and ps['type'] == 'lstm': model = lstm.lstm(ps['in_size'], ps['rnn_size'], ps['out_size'], ps['layers'], is_train=is_train) elif model == '' and ps['type'] == 'gru': model = gru.gru(ps['in_size'], ps['rnn_size'], ps['out_size'], ps['layers'], is_train=is_train) model.char_to_ix = ps['char_to_ix'] model.ix_to_char = ps['ix_to_char'] model.load_weights(ps) return model
def CreatNet(name): if name == 'lstm': net = lstm.lstm(100, 27, num_classes=5) elif name == 'cnn_1d': net = cnn_1d.cnn(1, num_classes=5) elif name == 'resnet18_1d': net = resnet_1d.resnet18() net.conv1 = nn.Conv1d(1, 64, 7, 2, 3, bias=False) net.fc = nn.Linear(512, 5) elif name == 'multi_scale_resnet_1d': net = multi_scale_resnet_1d.Multi_Scale_ResNet(inchannel=1, num_classes=5) elif name == 'multi_scale_resnet': net = multi_scale_resnet.Multi_Scale_ResNet(inchannel=1, num_classes=5) elif name == 'dfcnn': net = dfcnn.dfcnn(num_classes=5) elif name in ['resnet101', 'resnet50', 'resnet18']: if name == 'resnet101': net = resnet.resnet101(pretrained=False) net.fc = nn.Linear(2048, 5) elif name == 'resnet50': net = resnet.resnet50(pretrained=False) net.fc = nn.Linear(2048, 5) elif name == 'resnet18': net = resnet.resnet18(pretrained=False) net.fc = nn.Linear(512, 5) net.conv1 = nn.Conv2d(1, 64, 7, 2, 3, bias=False) elif 'densenet' in name: if name == 'densenet121': net = densenet.densenet121(pretrained=False, num_classes=5) elif name == 'densenet201': net = densenet.densenet201(pretrained=False, num_classes=5) elif name == 'squeezenet': net = squeezenet.squeezenet1_1(pretrained=False, num_classes=5, inchannel=1) return net
def plot_val(name, width=64, lstm_width=49, nu=1, ny=2, layers=2, train_batches=50, nl=None, T=1000, plot_str='k', x0=None, LSTM=False): path = "./experimental_results/pendulum/" non_lin = torch.relu if nl is None else nl data = io.loadmat(path + name + ".mat") if LSTM: model = lstm.lstm(nu, lstm_width, ny, layers) model.load_state_dict(torch.load(path + "p_" + name)) model.output_layer = model.output else: model = diRNN.diRNN(nu, width, ny, layers, nBatches=train_batches, nl=non_lin, learn_init_state=False) model.load_state_dict(torch.load(path + "p_" + name)) u, yest = get_ic_response(model, x0, T=T, batches=1000) plt.plot(yest[:, 0].detach().numpy().T, yest[:, 1].detach().numpy().T, plot_str) plt.pause(0.01)
train_data, test_data = utils.load_dataset(opt) train_loader = DataLoader(train_data, num_workers=opt.data_threads, batch_size=opt.batch_size, shuffle=True, drop_last=True, pin_memory=True) test_loader = DataLoader(test_data, num_workers=opt.data_threads, batch_size=opt.batch_size, shuffle=True, drop_last=True, pin_memory=True) lstm = models.lstm(opt.pose_dim + opt.content_dim, opt.pose_dim, opt.rnn_size, opt.rnn_layers, opt.batch_size, opt.normalize) lstm_dict = torch.load('pretrained_models/kth128x128_model.pth', map_location='cpu') new_state_dict = OrderedDict() # print(lstm_dict) for k, v in lstm_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v print(new_state_dict) lstm.load_state_dict(new_state_dict) def get_testing_batch(dtype=torch.cuda.FloatTensor): while True: for sequence in test_loader: batch = utils.normalize_data(opt, dtype, sequence)
def train(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) # setattr(object, name, value) 设置属性值 vis = Visualizer(env=opt.env) # 设置visdom的环境变量 # 获取数据 train_iter, valid_iter, test_iter, field = load_data() word2ix = field.vocab.stoi ix2word = field.vocab.itos # np.savez('data/word2ix.npz', word2ix = word2ix,ix2word = ix2word) # 模型定义 model = lstm(len(word2ix), 300, 150) best_model = model best_valid_loss = float("inf") optimizer = t.optim.Adam(model.parameters(), lr=opt.lr, weight_decay=1e-6) criterion = nn.CrossEntropyLoss() if opt.model_path: model.load_state_dict(t.load(opt.model_path)) if opt.use_gpu: model.cuda() criterion.cuda() loss_meter = meter.AverageValueMeter() count = 0 for epoch in range(opt.epoch): model.train() loss_meter.reset() logging.info("这是第{0}次epoch".format(count + 1)) cnt = 0 for batch in tqdm.tqdm( train_iter ): # tqdm是一个python进度条库,可以封装iterator,it/s表示的就是每秒迭代了多少次 # 训练 data = batch.text if opt.use_gpu: data = data.cuda() optimizer.zero_grad() # 输入和目标错开,CharRNN的做法 input_, target = Variable(data[:-1, :]), Variable(data[1:, :]) output, _ = model(input_) loss = criterion(output, target.view(-1)) loss.backward() optimizer.step() loss_meter.add(loss.item()) # 可视化 if (1 + cnt) % opt.plot_every == 0: vis.plot('loss', loss_meter.value()[0]) cnt += 1 count += 1 valid_loss = evaluate(model, valid_iter, criterion) logging.info("第%d次验证集的loss为: %f" % (count, valid_loss)) if valid_loss < best_valid_loss: os.system('rm ' + opt.model_prefix + opt.model + '.pth') best_valid_loss = valid_loss best_model = model t.save(best_model.state_dict(), '%s%s.pth' % (opt.model_prefix, opt.model)) test_loss = evaluate(best_model, test_iter, criterion) logging.info("测试集的loss为: %f" % test_loss)
trainFilePath = 'data/{}/train.json'.format(dataSetName) testFilePath = 'data/{}/test.json'.format(dataSetName) corpus = MyCorpus(filePathList=[trainFilePath, testFilePath]) X_train, y_train = corpus.loadFile(filePath=trainFilePath) X_test, y_test = corpus.loadFile(filePath=testFilePath) modelName = dataSetName + '_bilstm.h5' if len(sys.argv) > 1 and sys.argv[1] == 'eval': model = load_model(modelName) else: if os.path.exists(modelName): model = load_model(modelName) else: model = lstm(corpus) metricHistory = MetricHistory(X_test, y_test) model.fit(X_train, y_train, epochs=20, batch_size=128, validation_split=0.1, shuffle=True, callbacks=[metricHistory]) model.save(modelName) logging.info(str(metricHistory.history)) y_prob = model.predict(X_test) y_predict = corpus.hybrid(y_prob, threshold=0.5)
def generate_model(nu, ny, batches, args, loader=None, solver="SCS"): r'Function to easily re-generate models for training on different data sets.' print('Creating model', args.model, ' width = ', args.width) if args.model == "cirnn": model = ciRNN.ciRNN(nu, args.width, ny, args.depth, nBatches=batches) model.init_l2(0.0, 1E-3) constraints = { "lmi": model.contraction_lmi(0, 1E-5), "inequality": None } elif args.model == "RobustRnn": model = RobustRnn.RobustRnn(nu, args.width, ny, args.res_size, nBatches=batches, method=args.multiplier, supply_rate=args.supply_rate) if args.supply_rate == "dl2_gain": print('\t supply rate: dl2 gamma = ', args.gamma) if args.init_type == 'n4sid': model.init_lipschitz_ss(gamma=args.gamma, loader=loader, solver=solver) else: model.initialize_lipschitz_LMI(gamma=args.gamma, eps=1E-3, init_var=args.init_var) constraints = { "lmi": model.lipschitz_LMI(gamma=args.gamma, eps=1E-5), "inequality": [model.multiplier_barrier] } elif args.supply_rate == "stable": print('\t supply rate: stable') if args.init_type == 'n4sid': model.init_stable_ss(loader) else: model.initialize_stable_LMI(eps=1E-3, init_var=args.init_var, obj=args.init_type) constraints = { "lmi": model.stable_LMI(eps=1E-5), "inequality": [model.multiplier_barrier] } elif args.model == "rnn": model = rnn.rnn(nu, args.width, ny, nBatches=batches) constraints = {"lmi": None, "inequality": None} elif args.model == "lstm": # only constraint is invertible E model = lstm.lstm(nu, args.width, ny, nBatches=batches) constraints = {"lmi": None, "inequality": None} elif args.model == 'dnb': model = dnb.dnbRNN(nu, args.width, ny, nBatches=batches) constraints = { "lmi": model.norm_ball_lmi(eps=0.001), "inequality": None } return model, constraints
import models.lstm as lstm_models if opt.model == 'dcgan': if opt.image_width == 64: import models.dcgan_64 as model elif opt.image_width == 128: import models.dcgan_128 as model elif opt.model == 'vgg': if opt.image_width == 64: import models.vgg_64 as model elif opt.image_width == 128: import models.vgg_128 as model else: raise ValueError('Unknown model: %s' % opt.model) # define frame_predictor = lstm_models.lstm((opt.factor+1)*opt.z_dim, opt.g_dim, opt.rnn_size, opt.predictor_rnn_layers, int(opt.batch_size/len(opt.gpu_ids))) posterior_pose = lstm_models.gaussian_lstm(opt.g_dim+opt.factor*opt.z_dim, opt.z_dim, opt.rnn_size, opt.posterior_rnn_layers, int(opt.batch_size/len(opt.gpu_ids))) prior = lstm_models.gaussian_lstm(opt.g_dim+opt.factor*opt.z_dim, opt.z_dim, opt.rnn_size, opt.prior_rnn_layers, int(opt.batch_size/len(opt.gpu_ids))) cont_encoder = model.cont_encoder(opt.z_dim*opt.factor, opt.channels*opt.n_past) #g_dim = 64 or 128 pose_encoder = model.pose_encoder(opt.g_dim, opt.channels) decoder = model.decoder(opt.g_dim, opt.channels) # init frame_predictor = utils.init_net(frame_predictor, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids) posterior_pose = utils.init_net(posterior_pose, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids) prior = utils.init_net(prior, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids) cont_encoder = utils.init_net(cont_encoder, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids) pose_encoder = utils.init_net(pose_encoder, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids) decoder = utils.init_net(decoder, init_type='normal', init_gain=0.02, gpu_ids=opt.gpu_ids)
model_load_file = args.load model_save_file = args.save if model_load_file != '': rnn = util.load_model(model_load_file) rnn.dropout = dropout rnn.adadelta_params = adadelta_params rnn.alpha = alpha char_to_ix = rnn.char_to_ix ix_to_char = rnn.ix_to_char elif model_type == 'lstm': rnn = lstm.lstm(in_size, rnn_size, out_size, rnn_layers, dropout=dropout, adadelta_params=adadelta_params, alpha=alpha) elif model_type == 'gru': rnn = gru.gru(in_size, rnn_size, out_size, rnn_layers, dropout=dropout, adadelta_params=adadelta_params, alpha=alpha) for e in range(epochs): p = 0 costs = []
ar, d, ma, prediction = arima_model.get_output() params = "ar=" + str(ar) + ";d=" + str(d) + ";ma=" + str( ma) + ";training_ratio=" + str(training_ratio) elif (m == "lstm"): #params lstmCells = 10 DL1units = 20 DL2units = 5 DL3units = 1 lstm_model = lstm.lstm(data=value, epochs=epochs, batch_size=batch_size, training_ratio=training_ratio, sequance_length=sequance_length, lstmCells=lstmCells, DL1units=DL1units, DL2units=DL2units, DL3units=DL3units) lstm_model.train() params = "lstmCells=" + str(lstmCells) + ";DL1units=" + str( DL1units) + ";DL2units=" + str(DL2units) + ";DL3units=" + str( DL3units) + ";epochs=" + str( epochs) + ";batch_size=" + str( batch_size) + ";training_ratio=" + str( training_ratio) + ";sequance_length=" + str( sequance_length) prediction = lstm_model.get_output() elif (m == "cnn"): #params
name = 'iqc-rnn_w10_gamma15.0_n4' model = RobustRnn.RobustRnn(nu, width, ny, width, nBatches=batches, method='Neuron') model.load_state_dict(torch.load(path + name + ".params")) res = vary_amplitude(model) io.savemat('./results/msd/generalization/amp_' + name + '.mat', res) # # lstm print("Running tests on LSTM") name = 'lstm_w10_gamma0.0_n4' model = lstm.lstm(nu, width, ny, layers=1, nBatches=batches) model.load_state_dict(torch.load(path + name + ".params")) res = vary_amplitude(model) io.savemat('./results/msd/generalization/amp_' + name + '.mat', res) # rnn print("Running tests on RNN") name = 'rnn_w10_gamma0.0_n4' model = rnn.rnn(nu, width, ny, 1, nBatches=batches) model.load_state_dict(torch.load(path + name + ".params")) res = vary_amplitude(model) io.savemat('./results/msd/generalization/amp_' + name + '.mat', res) # cirnn print("Running tests on cirnn") name = 'cirnn_w10_gamma0.0_n4'
opt.data_root = drnet_opt.data_root print(opt) # ---------------- optimizers ---------------- if opt.optimizer == 'adam': opt.optimizer = optim.Adam elif opt.optimizer == 'rmsprop': opt.optimizer = optim.RMSprop elif opt.optimizer == 'sgd': opt.optimizer = optim.SGD else: raise ValueError('Unknown optimizer: %s' % opt.optimizer) import models.lstm as models lstm = models.lstm(opt.pose_dim+opt.content_dim, opt.pose_dim, opt.rnn_size, opt.rnn_layers, opt.batch_size, opt.normalize) lstm.apply(utils.init_weights) optimizer = opt.optimizer(lstm.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # --------- loss functions ------------------------------------ mse_criterion = nn.MSELoss() # --------- transfer to gpu ------------------------------------ if has_cuda: lstm.cuda() netEP.cuda() netEC.cuda() netD.cuda() mse_criterion.cuda()
def Bleu(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) # setattr(object, name, value) 设置属性值 print('Loading model from {}'.format(opt.model_path)) # 加载词典 if os.path.exists(opt.pickle_path): data = np.load(opt.pickle_path) word2ix, ix2word = data['word2ix'].item(), data['ix2word'] else: train_iter, valid_iter, test_iter, field = load_data() word2ix = field.vocab.stoi ix2word = field.vocab.itos # 加载模型 if opt.model == 'lstm': model = lstm(len(word2ix), 300, 150) elif opt.model == 'lstm_twin': model = lstm_twin(len(word2ix), 300, 150) map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model.cuda() print("加载完毕") # model.eval() hypothesis = [] references = [] cnt = 0 for batch in tqdm.tqdm(test_iter): cnt += 1 # batch = next(iter(test_iter)) data = batch.text if opt.model == 'lstm_twin': model.batch_size = data.size(1) hidden = model.init_hidden() if opt.use_gpu: data = data.cuda() input_, target = Variable(data[:-1, :]), Variable(data[1:, :]) tmp = target.transpose(0, 1).cpu().numpy() # print(tmp) print('===========输入==========') for ii in tmp: ii_ = list(ii) for i in ii_: print(ix2word[i], end='') print('') ii_ = ii_[:ii_.index(3) + 1] references.append([ii_]) print('===========输出==========') # print(references) if opt.model == 'lstm': output, _ = model(input_) output = output.view(data.size(0) - 1, data.size(1), -1) elif opt.model == 'lstm_twin': output = model.work(input_, hidden) output = output[0].view(data.size(0) - 1, data.size(1), -1) # print(output.size()) top = output.topk(1, dim=2)[1].squeeze().transpose(0, 1) top = top.cpu().numpy() for ii in top: ii_ = list(ii) for i in ii_: print(ix2word[i], end='') print('') haha = ii_.index(3) if 3 in ii_ else None if (haha): ii_ = ii_[:haha + 1] hypothesis.append(ii_) # if cnt > 10: # break # print(hypothesis) bleu1 = corpus_bleu(references, hypothesis, weights=(1, 0, 0, 0)) bleu2 = corpus_bleu(references, hypothesis, weights=(1. / 2., 1. / 2., 0, 0)) bleu3 = corpus_bleu(references, hypothesis, weights=(1. / 3., 1. / 3., 1. / 3., 0)) bleu4 = corpus_bleu(references, hypothesis) print("bleu1: ", bleu1, "bleu2: ", bleu2, "bleu3: ", bleu3, "bleu4: ", bleu4)
ar_max=ar_max, d_max=d_max, ma_max=ma_max) arima_model.train() ar, d, ma, prediction = arima_model.get_output() params = "ar=" + str(ar) + ";d=" + str(d) + ";ma=" + str( ma) + ";training_ratio=" + str(training_ratio) elif (m == "lstm"): #params # lstmCells = 10 lstm_model = lstm.lstm(data=value, epochs=epochs, batch_size=batch_size, training_ratio=training_ratio, sequance_length=sequance_length, lstmCells=lstmCells, learningRate=learningRate) lstm_model.train() params = "lstmCells=" + str( lstmCells ) + ";learningRate=" + str(learningRate) + ";epochs=" + str( epochs) + ";batch_size=" + str( batch_size) + ";training_ratio=" + str( training_ratio) + ";sequance_length=" + str( sequance_length) prediction = lstm_model.get_output() elif (m == "cnn"): #params # CL1filters = 1
test_Y = y[train_size:] # STEP.6:调整数据形状 train_X = np.reshape(train_X, (-1, 1, 10)) test_X = np.reshape(test_X, (-1, 1, 10)) train_Y = np.reshape(train_Y, (-1, 1, 1)) #test_Y = np.reshape(test_X, (1, 218, 1)) print(train_X.shape, train_Y.shape, test_X.shape, test_Y.shape) #print(train_X[0], train_Y[0], test_X[0], test_Y[0]) s, b, h = torch.from_numpy(train_X).size() # STEP.7:建立损失函数和优化器 model = lstm() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) # STEP.8:训练 print('start train') for e in range(200): var_x = Variable(torch.from_numpy(train_X)).float() var_y = Variable(torch.from_numpy(train_Y)).type(torch.float32) out = model(var_x) loss = criterion(out, var_y) optimizer.zero_grad() loss.backward() optimizer.step()
def train(X=None, Y=None, data=None, epoch=5, flatten='mean', validation_split=.33, dropout=.2, layers=3, loss='binary_crossentropy', optimizer='adam', activation='relu', activation_out='sigmoid', save_model=False, neuron_max='auto', neuron_last=1, batch_size=10, verbose=0, shape='funnel', double_check=False, validation=False, model='mlp', seq_len=50, prediction_len='auto', dense_neurons=100, normalize_window=True, reg_mode='linear', hyperscan='False', w_regularizer='auto', w_reg_values=[0, 0]): '''The command for training a new model. NOTE: If you want to see the training / test plots, remember to do: %matplotlib inline INPUT: the data ingestion is very flexibile. You can Input text (also unicode), labels, and things will work. No transformation needed outside of Autonomio. See more details below. PARAMETERS: X = The input can be indicated in several ways: 'label' = single column label ['a','b'] = multiple column labels [1,12] = a range of columns [1,2,12] = columns by index The data can be multiple dtypes: 'int' = any integer values 'float' = any float value 'string' = raw text or category labels* * use commands.utils wrangler() to convert in to process your data first! Y = This can be in multiple dtype: 'int' = any integer values 'float' = any float value 'string' = category labels See more related to prediction variable below in 'flatten section'. data = A pandas dataframe where you have at least one column for 'x' depedent variable (predictor) and one column for 'y' indepedent variable (prediction). dims = this is selected automatically and is not needed. NOTE: this needs to be same as x features epoch = how many epocs will be run for training. More epochs will take more time. flatten = For transforming y (outcome) variable. For example if the y input is continuous but prediction is binary, then a flattening of some sort should be used. OPTIONS: 'mean','median','mode', int, float, 'cat_string', 'cat_numeric', and 'none' dropout = The fraction of learning that will be "forgotten" on each each learning event. layers = The number of dense layers the model will have. Note that each dense layer is followed by a dropout layer. model = This is currently not in use. Later we add LSTM and some other model options, then it will be activated. loss = The loss to be used with the model. All the Keras losses all available https://keras.io/losses/ optimizer = The optimizer to use with the model. All the Keras optimizers are all available > https://keras.io/optimizers/ activation = Activation for the hidden layers (non-output) and all the Keras optimizers are all available > https://keras.io/optimizers/ activation_out = Same as 'activation' (above), but for the output layer only. save_model = An option to save the model configuration, weights and parameters. OPTIONS: default is 'False', if 'True' model will be saved with default name ('model') and if string, then the model name will be the string value e.g. 'titanic'. neuron_max = The maximum number of neurons on any layer. neuron_last = How many neurons there are in the last layer. batch_size = Changes the number of samples that are propagated through the network at one given point in time. The smaller the batch_size, the longer the training will take. verbose = This is set to '0' by default. The other options are '1' and '2' and will change the amount of information you are getting. shape = Used for automatically creating a network shape. Currently there are 8 options available. 'funnel' 'rhombus' 'long_funnel' 'brick' 'hexagon' 'diamon' 'triangle' 'stairs' double_check = Makes a 'manual' check of the results provided by Keras backend and compares the two. This is good when you have doubt with the results. validation = Validates in a more robust way than usual train/test split by initially splitting the dataset in half, where the first half becomes train and test, and then the second half becomes validation data set. OPTIONS: default is 'false', with 'true' 50% of data is separated for validation. model = Switch for choosing which kind of model is being used. The options are 'mlp' for multi layer perceptor and 'regression' for regression. hyperscan = Enables a mode where an hyperscan function can be run for hyperparameter optimization purpose. w_regularizer = Adds a weight regularizer to a model. 'Auto' mode adds regularizer to the last layer. Options are the string with number of layers starting from 0. w_reg_value = String with two values for l1 and l2. ''' parameters = { 'epoch': epoch, 'batch_size': batch_size, 'activation': activation, 'validation_split': validation_split, 'loss': loss, 'optimizer': optimizer, 'dropout': dropout, 'layers': layers, 'neuron_last': neuron_last, 'activation_out': activation_out, 'verbose': verbose, 'flatten': flatten, 'save_model': save_model, 'shape': shape, 'double_check': double_check, 'validation': validation, 'neuron_max': neuron_max, 'model': model, 'reg_mode': reg_mode, 'hyperscan': hyperscan, 'w_regularizer': w_regularizer, 'w_reg_values': w_reg_values, 'prediction_len': prediction_len, 'seq_len': seq_len, 'dense_neurons': dense_neurons, 'normalize_window': normalize_window } if model is 'lstm': if X is None: print 'Please input data to use lstm model' return lstm(X, parameters) return else: if X is None: if Y is None: if data is None: print 'X, Y or data is missing' return out = trainer(X, Y, data, parameters) return out
options=solver_options) test_and_save_model(name, best_model, train_loader, val_loader, test_loader, log, params=scaling_factors) log, best_model = train.train_model_ipm(model, train_loader=train_loader, val_loader=val_loader, test_loader=test_loader, options=solver_options) # Train Contracting model name = "contracting_sub{:d}_val{:d}".format(subject, val_set) model = diRNN.diRNN(nu, width, ny, layers, nBatches=9, nl=torch.tanh) model.init_l2(mu=mu, epsilon=eps+init_offset, init_var=init_var, custom_seed=this_seed+val_set) log, best_model = train.train_model_ipm(model, train_loader=train_loader, val_loader=val_loader, test_loader=test_loader, options=solver_options, LMIs=model.contraction_lmi(mu=mu, epsilon=eps)) test_and_save_model(name, best_model, train_loader, val_loader, test_loader, log, params=scaling_factors) run_fgsa(name, best_model, train_loader, val_loader, test_loader) # Train an LSTM network name = "LSTM_sub{:d}_val{:d}".format(subject, val_set) model = lstm.lstm(nu, lstm_width, ny, layers=layers) log, best_model = train.train_model_ipm(model, train_loader=train_loader, val_loader=val_loader, test_loader=test_loader, options=solver_options) test_and_save_model(name, best_model, train_loader, val_loader, test_loader, log, params=scaling_factors) run_fgsa(name, best_model, train_loader, val_loader, test_loader)
def lstm_forecast(self, epoch, dropout_rate, learning_rate, simulation_size, test_size): df = self.fil_stock_prices sns.set() tf.compat.v1.random.set_random_seed(111) # test_size = 60 # simulation_size = 10 num_layers = 1 size_layer = 128 timestamp = 5 # dropout_rate = 0.8 future_day = test_size # forecast_day # learning_rate = 0.01 # epoch = 100 for s in range(df.shape[1]): key = df.keys()[s] print(key) print("forecast progress : " + str(s * 100 / df.shape[1]) + " %") minmax = MinMaxScaler().fit( df.iloc[:, s:s + 1].astype('float32')) # Close index df_log = minmax.transform( df.iloc[:, s:s + 1].astype('float32')) # Close index df_log = pd.DataFrame(df_log) df_log.head() df_log = df_log.dropna() ## for testing accuracy of this model # df_train = df_log.iloc[:-test_size] # df_test = df_log.iloc[-test_size:] # df.shape, df_train.shape, df_test.shape ## syn data generation df_train = df_log tf.reset_default_graph() modelnn = lstm.lstm(learning_rate, num_layers, df_log.shape[1], size_layer, df_log.shape[1], dropout_rate) results = [] for i in range(simulation_size): print('simulation %d' % (i + 1)) results.append( lstm.lstm.forecast( self, modelnn, epoch, num_layers, size_layer, df, df_train, timestamp, test_size, minmax, )) date_ori = pd.to_datetime(df.iloc[:, 0]).tolist() self.results = results for i in range(test_size): date_ori.append(date_ori[-1] + timedelta(days=1)) date_ori = pd.Series(date_ori).dt.strftime( date_format='%Y-%m-%d').tolist() date_ori[-5:] accepted_results = [] for r in results: # if (np.array(r[-test_size:]) < np.min(df[key].iloc[-test_size:].values)).sum() / 2 == 0 and \ # (np.array(r[-test_size:]) > np.max(df[key].iloc[-test_size:].values) * 2).sum() == 0: if (np.array(r[-test_size:]) < 0).sum() == 0 and \ (np.array(r[-test_size:]) > np.max(df[key].iloc[-test_size:].values) * 6).sum() == 0: accepted_results.append(r) self.accepted_results = accepted_results accuracies = [ self.calculate_accuracy(df[key].iloc[-test_size:].values, r) for r in results ] plt.figure(figsize=(15, 5)) std_dev = np.array(self.accepted_results).std(axis=0) average = np.array(self.accepted_results).mean(axis=0) ci = 1.96 * std_dev dates_dt = self.get_forecast_dates(results) # find best matched forecast errors = [] for res in self.accepted_results: error = sum((res - average)**2) errors.append(error) # print([errors == min(errors)][0]) # best_forecast = self.accepted_results[np.array([errors == min(errors)]).astype(int)] self.lstm_forecast_price['Dates'] = pd.to_datetime(dates_dt) self.lstm_forecast_price.set_index('Dates') ax = plt.gca() dates = matplotlib.dates.date2num(dates_dt) ax.xaxis.set_minor_locator(matplotlib.dates.MonthLocator()) ax.xaxis.set_minor_formatter(matplotlib.dates.DateFormatter('%m')) ax.xaxis.set_major_locator(matplotlib.dates.YearLocator()) ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%Y')) # self.forecast_stock_prices[key.split('.')[0]] = self.accepted_results try: # try to save best forecast in parent class best_forecast = np.array(accepted_results)[errors == min( errors)][0] self.lstm_forecast_price[key] = best_forecast # try to display as aggregated plt.plot(dates, best_forecast, label="best forecast", color='r') plt.plot(dates, average, label="avg forecast", color='b') plt.fill_between(dates, (average - ci), (average + ci), label='95% confidence', color='b', alpha=.5) except: # plot acceptable ones for no, r in enumerate(accepted_results): plt.plot(dates, r, label='forecast %d' % (no + 1)) # plt.plot(dates[:len(df[key])-test_size], df[key][:-test_size], label = 'true trend (train)', c = 'green') # plt.plot(dates[len(df[key])-test_size-1:len(df[key])], df[key][-test_size-1:], label = 'true trend (test)', c = 'red') # forecasting plt.plot(dates[:len(df[key])], df[key], label='true trend (train)', c='k') plt.legend() plt.title( key.split('.')[0] + ' average accuracy: %.2f' % (np.mean(accuracies))) x_range_future = np.arange(len(results[0])) # plt.xticks(x_range_future[::30], date_ori[::30]) if self.save_output: plt.savefig(os.path.join( self.output_path, "forecast_LSTM_" + key.split('.')[0] + '.png'), dpi=300) plt.show() if self.save_output: self.stock_prices.to_csv( os.path.join(self.output_path, self.name + '_lstm_stock_prices.csv'))
def task3(return_dict, config, randomize_length, n_steps, epochs, log_dir="logs"): from sklearn.model_selection import KFold from keras.models import clone_model import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from util.loader import load_data from util.time_series_data import get_time_series, reshape_X, reshape_y from models.lstm import lstm from util.common import loss from util.tensorboard import tensorboard_log_values from preprocessing.standard_scaler import StandardScaler from preprocessing.augmentation import add_nontraining_time_series, add_perturbed_time_series n_steps_valid = n_steps n_steps_test = n_steps use_configs = True config_step = config["config_step"] repeat_config = config["repeat_config"] scale_configs = True validation_split = 0.3 evaluate_each = 1 lr = config["lr"] batchsize = config["batchsize"] lr_decay = config["lr_decay"] decay = 0 if not lr_decay else config["decay"] regularize = config["weight_decay"] alpha = 0 if not regularize else config["alpha"] remove_nonlearning = False augment = config["augment"] add_perturbed = 0 if not augment else config["add_perturbed"] add_nontraining = 0 if not augment else config["add_nontraining"] # title of current run run_name = current_time_str() if not randomize_length: run_name += "_%is" % n_steps else: run_name += "_rnd" run_name += "_lr%f" % lr run_name += "_bs%i" % batchsize if lr_decay: run_name += "_dc%f" % decay if regularize: run_name += "_a%f" % alpha run_name += "_cstp" if config_step else "" run_name += "_rptcnfg" if repeat_config else "" if augment: run_name += "_augm_%i_%i" % (add_perturbed, add_nontraining) print(run_name) # functions def plot_predicted_curves(model, X_test, test_indices, filename=None): plt.figure(figsize=(20, 10)) n_plots = 20 pred = predict_whole_sequences(model, X_test[:n_plots, :n_steps_test, :]) for i in range(n_plots): plt.subplot(4, 5, i + 1) plt.plot(learning_curves[test_indices[i]], "g") if config_step: plt.plot(range(40), pred[i, :, :], "r") else: plt.plot(range(1, 40), pred[i, :, :], "r") if filename != None: plt.savefig(filename) plt.close() def predict_whole_sequences(model, X): n = X.shape[0] true_steps = X.shape[1] d = X.shape[2] final_step = 41 if config_step else 40 XX = np.zeros((n, final_step, d)) XX[:, :true_steps, :] = X for j in range(true_steps, final_step): pred = model.predict(XX[:, :j, :]) XX[:, j, -1] = pred[:, -1, 0] if repeat_config: XX[:, j, :-1] = XX[:, j - 1, :-1] return pred def evaluate_step40_loss(model, X_test, test_indices, n_steps_test): if config_step: n_steps_test += 1 final_y = [learning_curves[index][-1] for index in test_indices] pred = predict_whole_sequences(model, X_test[:, :n_steps_test, :]) final_y_hat = pred[:, -1, 0] return loss(np.array(final_y), final_y_hat) # file name for plots tmp_file_name = "tmp/model_%s" % run_name if config_step: n_steps_train = n_steps n_steps_valid += 1 n_steps_test += 1 else: n_steps_train = n_steps - 1 # read data configs, learning_curves = load_data(source_dir='./data') if remove_nonlearning: keep_indices = [ i for i in range(len(learning_curves)) if learning_curves[i][-1] < 0.8 ] configs = [configs[i] for i in keep_indices] learning_curves = [learning_curves[i] for i in keep_indices] n_params = len(configs[0]) if use_configs else 0 d = n_params + 1 # 3 fold CV: n_folds = 3 k_fold = KFold(n_splits=n_folds, shuffle=True, random_state=42) fold = 0 fold_test_errors = [] for training_indices, test_indices in k_fold.split(learning_curves): fold = fold + 1 # split into training and validation training_indices = np.random.permutation(training_indices) valid_split_index = int(validation_split * len(training_indices)) validation_indices = training_indices[:valid_split_index] training_indices = training_indices[valid_split_index:] # prepare training data: configs_train = [configs[index] for index in training_indices] learning_curves_train = [ learning_curves[index] for index in training_indices ] if scale_configs: scaler = StandardScaler() configs_train = scaler.fit_transform(configs_train) if add_perturbed > 0: configs_train, learning_curves_train = add_perturbed_time_series( configs_train, learning_curves_train, add_perturbed) if add_nontraining > 0: configs_train, learning_curves_train = add_nontraining_time_series( configs_train, learning_curves_train, add_nontraining) n_train = len(configs_train) X_train = get_time_series(configs_train, learning_curves_train, use_configs=use_configs, repeat_config=repeat_config, config_step=config_step) X_train = reshape_X(X_train) Y_train = learning_curves_train # prepare validation data: configs_valid = [configs[index] for index in validation_indices] learning_curves_valid = [ learning_curves[index] for index in validation_indices ] if scale_configs: configs_valid = scaler.transform(configs_valid) X_valid = get_time_series(configs_valid, learning_curves_valid, use_configs=use_configs, repeat_config=repeat_config, config_step=config_step) X_valid = reshape_X(X_valid) # prepare test data: configs_test = [configs[index] for index in test_indices] learning_curves_test = [ learning_curves[index] for index in test_indices ] if scale_configs: configs_test = scaler.transform(configs_test) X_test = get_time_series(configs_test, learning_curves_test, use_configs=use_configs, repeat_config=repeat_config, config_step=config_step) X_test = reshape_X(X_test) n_valid = len(validation_indices) n_test = len(test_indices) Y_train = reshape_y(Y_train) Y_valid = [ learning_curves_valid[i][1:(n_steps_valid + 1)] for i in range(n_valid) ] Y_test = [ learning_curves_test[i][1:(n_steps_test + 1)] for i in range(n_test) ] n_batches = int(np.ceil(n_train / batchsize)) model = lstm(d, lr, decay=decay, many2many=True, regularize=regularize, alpha=alpha, batchsize=None) best_valid_e40 = {} for k in [5, 10, 20, 30]: best_valid_e40[k] = float("inf") best_mean_valid_e40 = float("inf") best_valid_e40_epoch = -1 for epoch in range(epochs): print("epoch = %i" % epoch) # random permutation of training data permutation = np.random.permutation(range(n_train)) X_train_permuted = X_train[permutation, :, :] Y_train_permuted = Y_train[permutation, :, :] training_losses = [] for batch in range(n_batches): if randomize_length: n_steps_train = int(np.random.uniform(5, 21)) if config_step: n_steps_train += 1 batch_begin = batch * batchsize batch_end = batch_begin + batchsize x = X_train_permuted[batch_begin:batch_end, :n_steps_train, :] y = Y_train_permuted[batch_begin:batch_end, 1:(n_steps_train + 1)] y_hat = model.predict(x) model.train_on_batch(x, y) training_losses.append(loss(y, y_hat)) training_loss = np.mean(training_losses) print("training loss = %f" % training_loss) # validation if (epoch + 1) % 1 == 0: y_hat = model.predict(X_valid[:, :n_steps_valid, :])[:, :, 0] validation_loss = np.mean(loss(Y_valid, y_hat)) print("validation loss = %f" % validation_loss) if (epoch + 1) % evaluate_each == 0: print(lr, decay, batchsize) print("best[:5] = %f @ %i" % (best_valid_e40[5], best_valid_e40_epoch)) print("best[:10] = %f @ %i" % (best_valid_e40[10], best_valid_e40_epoch)) print("best[:20] = %f @ %i" % (best_valid_e40[20], best_valid_e40_epoch)) print("best[:30] = %f @ %i" % (best_valid_e40[30], best_valid_e40_epoch)) valid_e40_5 = evaluate_step40_loss(model, X_valid, validation_indices, 5) print("validation MSE[:5]@40 = %f" % valid_e40_5) valid_e40_10 = evaluate_step40_loss(model, X_valid, validation_indices, 10) print("validation MSE[:10]@40 = %f" % valid_e40_10) valid_e40_20 = evaluate_step40_loss(model, X_valid, validation_indices, 20) print("validation MSE[:20]@40 = %f" % valid_e40_20) valid_e40_30 = evaluate_step40_loss(model, X_valid, validation_indices, 30) print("validation MSE[:30]@40 = %f" % valid_e40_30) mean_valid_e40 = np.mean( [valid_e40_5, valid_e40_10, valid_e40_20, valid_e40_30]) prefix = "losses_f%i/" % fold tensorboard_log_values( log_dir, run_name, epoch, { prefix + "training": training_loss, prefix + "validation": validation_loss, prefix + "validation_E40_5": valid_e40_5, prefix + "validation_E40_10": valid_e40_10, prefix + "validation_E40_20": valid_e40_20, prefix + "validation_E40_30": valid_e40_30, prefix + "validation_E40_mean": mean_valid_e40 }) if mean_valid_e40 < best_mean_valid_e40: print("* new best model *") best_valid_e40_epoch = epoch best_valid_e40[5] = valid_e40_5 best_valid_e40[10] = valid_e40_10 best_valid_e40[20] = valid_e40_20 best_valid_e40[30] = valid_e40_30 best_mean_valid_e40 = mean_valid_e40 best_model = clone_model(model) best_model.set_weights(model.get_weights()) """if (epoch + 1) % 10 == 0: filename = tmp_file_name + "_f%i_e%i.png" % (fold, epoch) print(filename) plot_predicted_curves(model, X_test, test_indices, filename = filename)""" # evaluation on test data test_e40 = {} test_e40[5] = evaluate_step40_loss(best_model, X_test, test_indices, 5) test_e40[10] = evaluate_step40_loss(best_model, X_test, test_indices, 10) test_e40[20] = evaluate_step40_loss(best_model, X_test, test_indices, 20) test_e40[30] = evaluate_step40_loss(best_model, X_test, test_indices, 30) fold_test_errors.append(test_e40) print(test_e40) #filename = tmp_file_name + "_f%i_best.png" % fold #print(filename) #plot_predicted_curves(best_model, X_test, test_indices, filename = filename) means_e40 = {} for steps in [5, 10, 20, 30]: print("MSE@40 for %i input steps:" % steps) e40_folds = [fold_res[steps] for fold_res in fold_test_errors] print(e40_folds) mean_e40 = np.mean(e40_folds) print("mean = %f" % mean_e40) means_e40[steps] = mean_e40 return_dict["results"] = means_e40
opt.image_width = drnet_opt.image_width print(opt) # ---------------- optimizers ---------------- if opt.optimizer == 'adam': opt.optimizer = optim.Adam elif opt.optimizer == 'rmsprop': opt.optimizer = optim.RMSprop elif opt.optimizer == 'sgd': opt.optimizer = optim.SGD else: raise ValueError('Unknown optimizer: %s' % opt.optimizer) import models.lstm as models lstm = models.lstm(opt.pose_dim, opt.rnn_size, opt.batch_size) optimizer = opt.optimizer(lstm.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) # --------- loss functions ------------------------------------ mse_criterion = nn.MSELoss() # --------- transfer to gpu ------------------------------------ lstm.cuda() mse_criterion.cuda() # --------- load a dataset ------------------------------------ train_data, test_data, load_workers = utils.load_dataset(opt) train_loader = DataLoader(train_data, num_workers=load_workers,
opt.optimizer = optim.Adam elif opt.optimizer == 'rmsprop': opt.optimizer = optim.RMSprop elif opt.optimizer == 'sgd': opt.optimizer = optim.SGD else: raise ValueError('Unknown optimizer: %s' % opt.optimizer) import models.lstm as lstm_models if opt.model_dir != '': frame_predictor = saved_model['frame_predictor'] posterior = saved_model['posterior'] prior = saved_model['prior'] else: frame_predictor = lstm_models.lstm(opt.g_dim + opt.z_dim, opt.g_dim, opt.rnn_size, opt.predictor_rnn_layers, opt.batch_size) posterior = lstm_models.gaussian_lstm(opt.g_dim, opt.z_dim, opt.rnn_size, opt.posterior_rnn_layers, opt.batch_size) prior = lstm_models.gaussian_lstm(opt.g_dim, opt.z_dim, opt.rnn_size, opt.prior_rnn_layers, opt.batch_size) frame_predictor.apply(utils.init_weights) posterior.apply(utils.init_weights) prior.apply(utils.init_weights) if opt.model == 'dcgan': if opt.image_width == 64: import models.dcgan_64 as model elif opt.image_width == 128: import models.dcgan_128 as model
def gen_comment(**kwargs): for k, v in kwargs.items(): setattr(opt, k, v) # setattr(object, name, value) 设置属性值 print('Loading model from {}'.format(opt.model_path)) # 加载词典 if os.path.exists(opt.pickle_path): data = np.load(opt.pickle_path) word2ix, ix2word = data['word2ix'].item(), data['ix2word'] else: train_iter, valid_iter, test_iter, field = load_data() word2ix = field.vocab.stoi ix2word = field.vocab.itos # 加载模型 if opt.model == 'lstm': model = lstm(len(word2ix), 300, 150) elif opt.model == 'lstm_twin': model = lstm_twin(len(word2ix), 300, 150) map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) print("加载完毕") start_word = [ '用这个牌子好多年了', '篮球手感', '蛋白质粉', '价格', '什么也不想说了', '有用过这个牌子', '箱子外观很漂亮', '家里人', '店家', '比我预料中的好' ] if opt.use_gpu: model.cuda() hidden = None comments = [] for ii in start_word: result = list(ii) input = Variable(t.Tensor([word2ix['<start>']]).view(1, 1).long()) if opt.use_gpu: input = input.cuda() if opt.model == 'lstm_twin': model.batch_size = 1 hidden = model.init_hidden() for i in range(opt.max_gen_len): if opt.model == 'lstm': output, hidden = model(input, hidden) elif opt.model == 'lstm_twin': output = model.work(input, hidden) hidden = output[2] output = output[0] if i < len(ii): w = result[i] input = Variable(input.data.new([word2ix[w]])).view(1, 1) else: top_index = output.data[0].topk(1)[1][0] w = ix2word[top_index] result.append(w) input = Variable(input.data.new([top_index])).view(1, 1) if w == '<eos>': del result[-1] break comments.append(result) print("打印评论:") for i in comments: print(''.join(i))