def main(): train_dataset, train_label, test_dataset = get_data(True) print(train_dataset[:, 1, :]) print(train_dataset[:, 1000, :]) validation_dataset, validation_label = train_dataset[:, :1000, :], train_label[:1000] train_dataset, train_label = train_dataset[:, 1000:, :], train_label[1000:] net = SentimentAnalysisModule(300, 1000, 2).to(device) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters(), lr=0.001) batch = 32 for epoch in range(1000): avg_loss = 0 cnt = 0 net.train() for i in range(0, train_dataset.size()[1], batch): cnt += 1 input_data = Variable(train_dataset[:, i:i+batch, :]).to(device) label = Variable(train_label[i:i+batch].long()).to(device) output = net(input_data)[-1, :, :] optimizer.zero_grad() loss = criterion(output, label) loss.backward() optimizer.step() avg_loss += loss.item() avg_loss /= cnt print(f'epoch {epoch}: loss = {avg_loss}') net.eval() calc_accuracy(net, validation_dataset, validation_label, epoch) if epoch % 5 == 0 and epoch > 0: torch.save(net.state_dict(), f'./checkpoints/sentiment_{epoch}.pt')
def check_model(path=MODEL_PATH, file=SAMPLE_CSV_FILE, nsamples=2): ''' see predictions generated for the training dataset ''' # load model model = load_model(path) # load data data, dic = get_data(file) rows, questions, true_answers = encode_data(data, dic) # visualize model graph # plot_model(model, to_file='tableqa_model.png') # predict answers prediction = model.predict([rows[:nsamples], questions[:nsamples]]) print prediction predicted_answers = [[np.argmax(character) for character in sample] for sample in prediction] print predicted_answers print true_answers[:nsamples] # one hot encode answers # true_answers = [to_categorical(answer, num_classes=len(dic)) for answer in answers[:nsamples]] # decode chars from char ids int inv_dic = {v: k for k, v in dic.iteritems()} for i in xrange(nsamples): print '\n' # print 'Predicted answer: ' + ''.join([dic[char] for char in sample]) print 'Table: ' + ''.join([inv_dic[char_id] for char_id in rows[i] if char_id != 0]) print 'Question: ' + ''.join([inv_dic[char_id] for char_id in questions[i] if char_id != 0]) print 'Answer(correct): ' + ''.join([inv_dic[char_id] for char_id in true_answers[i] if char_id != 0]) print 'Answer(predicted): ' + ''.join([inv_dic[char_id] for char_id in predicted_answers[i] if char_id != 0])
def run(epochs, splits_path, splits_name, goal="classification", load_model=None): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"Running on {device}.") # Build dataset t0 = time.time() train_loader, test_loader, train_files, test_files = load_data.get_data( dataset_path=dataset_path, batch_size=32, splits_path=splits_path, splits_name=splits_name, num_workers=0, goal=goal) print(f"Dataset loaded in {time.time()-t0:.3f}s") # Build aggregating dataset for evaluation aggregator = aggregate.AggregatorDataset(test_files) # Build or load model model = models.ResNet50(pretrained=True, goal=goal) models_path = os.path.join('models', 'resnet50') if load_model: model.load_state_dict(torch.load(os.path.join(models_path, load_model))) print("Loaded model", load_model) model.to(device) # puts model on GPU / CPU modelManager = ModelManager(model, models_path, device, train_loader, test_loader, aggregator, goal) modelManager.train(epochs, verbose=False)
def main(name, device='cpu', samples=1500): ''' First programm to execute. It sets datas format, therefor the following parameters shouldn't be modified then : - backcast and forecast length - iteration : determines the number of samples - signal : choose the caracteristics of the signal that will be analyzed. Datas are stored in file.txt easely exploitable, following the format : xtrain_name.txt''' #we create the directories that will be usefull afterward datapath = './data/{}/datas'.format(name) os.makedirs(datapath) os.makedirs('./data/{}/predictions'.format(name)) os.makedirs('./data/{}/out'.format(name)) # we generate the signal which will be analyzed length_seconds, sampling_rate = 1000, 150 #that makes 15000 pts freq_list = [0.5] print('----creating the signal, plz wait------') sig = gs.generate_signal(length_seconds, sampling_rate, freq_list) print('finish : we start storing it in a csv file') gs.register_signal(sig[0], './data/{}/signal'.format(name)) print('----we got it : time to create the ndarray-----') xtrain, ytrain, xtest, ytest = get_data( backcast_length, forecast_length, limit, './data/{}/signal.csv'.format(name), copy=samples) np.savetxt(datapath + '/xtrain.txt', xtrain) np.savetxt(datapath + '/ytrain.txt', ytrain) np.savetxt(datapath + '/xtest.txt', xtest) np.savetxt(datapath + '/ytest.txt', ytest) print('--------- name of the file you used : {} ---------'.format(name))
def set_conf_get_df(self): root_temp = self.sRoot.text() file_key_temp = self.sFileKey.text().split(",") if not len(root_temp) < 2: self.root = root_temp if not len(file_key_temp[0]) == 0: self.file_key = file_key_temp self.data_base = load_data.get_data( load_data.get_valid_files(self.root, self.file_key))
def make_class(idx=0, crop=False, problem='hf'): if problem == "hf": if crop: pass else: X, y = get_data(id=idx+1, event_code=[6,10,14], filter=[0.5, 45], t=[0., 4]) if problem == "lr": if crop: pass else: X, y = get_data(id=idx+1, event_code=[4,8,12], filter=[0.5, 45], t=[0., 4]) if problem == "lh": if crop: pass else: Xl, yl = get_data_one_class(id=idx+1, event_code=[4,8,12], filter=[0.5, 45], t=[0, 4], classid=2) Xh, yh = get_data_one_class(id=idx+1, event_code=[6,10,14], filter=[0.5, 45], t=[0, 4], classid=2) X = np.vstack((Xl,Xh)) y = np.hstack((yl,yh+1)) if problem == "rh": if crop: pass else: Xr, yr = get_data_one_class(id=idx+1, event_code=[4,8,12], filter=[0.5, 45], t=[0, 4], classid=3) Xh, yh = get_data_one_class(id=idx+1, event_code=[6,10,14], filter=[0.5, 45], t=[0, 4], classid=2) X = np.vstack((Xr,Xh)) y = np.hstack((yr-1,yh+1)) if problem == "lf": if crop: pass else: Xl, yl = get_data_one_class(id=idx+1, event_code=[4,8,12], filter=[0.5, 45], t=[0, 4], classid=2) Xf, yf = get_data_one_class(id=idx+1, event_code=[6,10,14], filter=[0.5, 45], t=[0, 4], classid=3) X = np.vstack((Xl,Xf)) y = np.hstack((yl,yf)) if problem == "rf": if crop: pass else: Xr, yr = get_data_one_class(id=idx+1, event_code=[4,8,12], filter=[0.5, 45], t=[0, 4], classid=3) Xf, yf = get_data_one_class(id=idx+1, event_code=[6,10,14], filter=[0.5, 45], t=[0, 4], classid=3) X = np.vstack((Xr,Xf)) y = np.hstack((yr-1,yf)) return X, y
def save_test_data(path1,path2): X, y = ld.get_data(path1,path2) X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.1,random_state=42) X_test = np.array([img/255. for img in X_test]).astype('float32') Y_test = np_utils.to_categorical(y_test, 2) test_data = {'X_test':X_test,'Y_test':Y_test} pickle.dump(test_data,open('test_data.p','wb'))
def main(): parser = build_parser() options = parser.parse_args() ###### #print(parser) from load_data import get_data from parameterfile import (parameter_file, parameter_file_non_linear_coupling) var_num = 42 data_name = options.data_name X_train, y_train, X_test, y_test = get_data(data_name, var_num) try: assert np.min(X_train) >= 0 and np.max(X_train <= 1) except AssertionError: logging.error( "Train Data is NOT normalized. Hint: Go to get_data() function and normalize the data to lie in the range [0, 1]", exc_info=True) try: assert np.min(X_test) >= 0.0 and np.max(X_test <= 1.0) except AssertionError: logging.error( "Test Data is NOT normalized. Hint: Go to get_data() function and normalize the data to lie in the range [0, 1]", exc_info=True) # In[3]: ### The following are parameters are different for different datasets # In[4]: ### IRIS q2 = 0.56 coeff0 = 0.999 / 2.0 coeff1 = 0.999 / 2.0 coeff2 = 1 - (coeff0 + coeff1) #-------------------------------------- a, b, c, q, length, num_classes, samples_per_class, check, details, var, method, epsilon = parameter_file_non_linear_coupling( data_name) # In[5]: from Codes import (skew_tent, iterations, firingtime_calculation, probability_calculation, class_avg_distance, cosine_similar_measure, class_wise_data, test_split_generator, chaos_method, CHAOSNET) from layer_2_non_linear_coupling import (chaos_second_layer, CHAOSNETWORKS) y_pred_val, avg_class_dist_1, ACC, PRECISION, RECALL, F1SCORE, avg_total_class_prob, test_proba = CHAOSNETWORKS( X_train, y_train, X_test, y_test, num_classes, samples_per_class, check, q, a, b, c, length, var, details, method, epsilon, data_name, q2, coeff0, coeff1, coeff2)
def compute_test(): d_test = data.get_data(test_fname, label, sample, replicate, incl_curvature, load_attn1, load_attn2, modelpkl_fname1, modelpkl_fname2, preloadn2v, out_channels=8, heads=8, negative_slope=0.2, dropout=0.4) if False: # batch it to keep on GPU model.to(device) # assumes cuda specified cd_test = data.ClusterData(d_test, num_parts) cl_test = data.ClusterLoader(cd_test, batch_size, shuffle=True) batch_loss_test = [] batch_acc_test = [] for batch_test in cl_test: batch_test = batch_test.to(device) model.eval() if 'transformer' in model_name or 'set' in model_name: output = model( batch_test, utils.edge_set_reshape(batch_test).float().to(device)) else: output = model(batch_test) loss_test = F.nll_loss(output, batch_test.y) batch_acc_test.append(utils.accuracy(output, batch_test.y).item()) batch_loss_test.append(loss_test.item()) print('Test set results:') print(' <loss>_bacth={:.4f}'.format(np.mean(batch_loss_test))) print(' <acc>_batch ={:.4f}'.format(np.mean(batch_acc_test))) else: # keep on cpu model.eval() if 'transformer' in model_name or 'set' in model_name: output = model(d_test, utils.edge_set_reshape(d_test).float()) else: output = model(d_test) loss_test = F.nll_loss(output, d_test.y).item() acc_test = utils.accuracy(output, d_test.y).item() print('Test set results:') print(' loss: {:.4f}'.format(loss_test)) print(' accuracy: {:.4f}'.format(acc_test))
def import_data(path1,path2): X, y = ld.get_data(path1,path2) X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.1,random_state=42) #reshaping not working - so using this hack X_train = np.array([img/255. for img in X_train]).astype('float32') X_test = np.array([img/255. for img in X_test]).astype('float32') Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) return X_train, X_test, Y_train, Y_test
def test_train_model(file=SAMPLE_CSV_FILE): data, dic = get_data(file) rows, questions, answers = encode_data(data, dic) # rows, questions, answers = one_hot_encode_data(data, dic) print '#samples:', len(rows) # training_data = (rows[:split], questions[:split], answers[:split]) # print '#samples for training:', len(training_data[0]) # validation_data = (rows[split:], questions[split:], answers[split:]) # print '#samples for validation:', len(validation_data[0]) model = train_model((rows, questions, answers), dic) model.save(MODEL_PATH)
def __init__(self, parent=None): self.root = os.getcwd() self.file_key = [".txt"] self.topics = [] self.tags = [] self.content = [] self.all = [] self.data_base = load_data.get_data( load_data.get_valid_files(self.root, self.file_key)) self.shower = PrettyShower() QtWidgets.QWidget.__init__(self, parent=None) vLayout = QtWidgets.QVBoxLayout(self) # first line of buttons hLayout0 = QtWidgets.QHBoxLayout() # path button self.sRoot = QtWidgets.QLineEdit(self) self.sRoot.setPlaceholderText(os.getcwd()) # file key self.sFileKey = QtWidgets.QLineEdit(self) self.sFileKey.setPlaceholderText("[.txt]") # or search self.sAll = QtWidgets.QLineEdit(self) self.sAll.setPlaceholderText("Tags or Content") # set button self.loadBtn0 = QtWidgets.QPushButton("Set", self) # set button function, load data function call self.loadBtn0.clicked.connect(self.set_conf_get_df) hLayout0.addWidget(self.sRoot) hLayout0.addWidget(self.sFileKey) hLayout0.addWidget(self.loadBtn0) hLayout0.addWidget(self.sAll) vLayout.addLayout(hLayout0) # second line of buttons hLayout = QtWidgets.QHBoxLayout() self.sTopics = QtWidgets.QLineEdit(self) self.sTopics.setPlaceholderText("Topics") self.sTags = QtWidgets.QLineEdit(self) self.sTags.setPlaceholderText("Tags") self.sContent = QtWidgets.QLineEdit(self) self.sContent.setPlaceholderText("Content") self.loadBtn = QtWidgets.QPushButton("Search", self) hLayout.addWidget(self.sTopics) hLayout.addWidget(self.sTags) hLayout.addWidget(self.sContent) hLayout.addWidget(self.loadBtn) vLayout.addLayout(hLayout) self.loadBtn.clicked.connect(self.search_df)
def get_test_data(model_name, lang, max_seq_len=256, batch_size=64, data_dir=DATA_DIR): if model_name == "han": as_heirarchy = True else: as_heirarchy = False titles_vocab_size = 0 if model_name == "clstm": if lang == "en": codes_titles_file = os.path.join(BASE_DIR, "codes_and_titles_en.txt") else: codes_titles_file = os.path.join(BASE_DIR, "codes_and_titles_de.txt") T, titles_word2index = get_titles_T(codes_titles_file) titles_vocab_size = len(titles_word2index) else: T = None train_file = os.path.join(data_dir, "train_data.pkl") dev_file = os.path.join(data_dir, "dev_data.pkl") test_file = os.path.join(data_dir, "test_data.pkl") _, dev_data, test_data, word2index = get_data(train_file, dev_file, use_data=lang, max_seq_len=max_seq_len, as_heirarchy=as_heirarchy, max_sents_in_doc=10, max_words_in_sent=40, test_file=test_file) # dev data Xdev, ydev, ids_dev = dev_data vocab_size = len(word2index) num_classes = ydev[0].shape[0] # test data Xtest, ids_test = test_data dev_dataloader = batched_data(Xdev, ids_dev, batch_size=batch_size) test_dataloader = batched_data(Xtest, ids_test, batch_size=batch_size) return test_dataloader, dev_dataloader, vocab_size, titles_vocab_size, num_classes, T
def rolling(self, i=0, batch_size=2048): x_train, y_train, x_test, y_test = get_data(i) early_stopping = EarlyStopping('loss', 0.0001, 5) self.model.fit(x_train, y_train, batch_size=2048, epochs=100, callbacks=[early_stopping]) y_pred = self.model.predict(x_test, batch_size=500) r = pd.DataFrame({ 'change': y_test.flatten(), 'pred': y_pred.flatten() }) self.save_result(i, y_pred, r) if i % 12 == 0 and i > 0: self.save_model()
def m_minimize_bynetwork(x, val_x, train_label, val_label, batch_size, self_made_m, prefix, iteration, num_epoch, learning_rate, k, a): # 第二次初始化时使用加载的模型 model_loaded = mx.model.FeedForward.load(prefix, iteration) # 加载初始化参数 params = model_loaded.get_params() # get model paramters arg_params = params['arg_params'] # 初始化训练集 train, test = load_data.get_data(x, val_x, train_label, val_label, batch_size, self_made_m) # 加载优化M的网络 net = mlp_model.modelM(k, a) model = mx.model.FeedForward( symbol=net, # network structure num_epoch=num_epoch + 100, # number of data passes for training learning_rate=learning_rate, # learning rate of SGD initializer=mx.init.Xavier(factor_type="in", magnitude=2.34), # optimizer=SelfOptimizer, # optimizer=optimizer, arg_params=arg_params) metric = load_data.Auc() print "网络加载完成,开始训练" model.fit( X=train, # training data eval_metric=metric, # eval_data=test, # validation data batch_end_callback=mx.callback.Speedometer(batch_size, 600 * 600 / batch_size, iteration=iteration, minwhich='m-') # output progress for each 200 data batches ) model.save(prefix + '-M', iteration) model_loaded = mx.model.FeedForward.load(prefix + '-M', iteration) params = model_loaded.get_params() # get model paramters arg_params = params['arg_params'] m = arg_params['M'].asnumpy() return m
def load_split_data(data_size, test_p): # Load data and split into train set, test set randomly. # data_size is either "100k", "1m", "10m" or "20m". # test_p is a float between 0 - 1 indicating the portion of data hold out as test set print("split data randomly") # Load ratings, data is already permuted in get_data ratings = get_data(data_size) nb_users = int(np.max(ratings[:, 0])) nb_movies = int(np.max(ratings[:, 1])) # split test/train set test_size = int(len(ratings) * test_p) test_ratings = ratings[:test_size] train_ratings = ratings[test_size:] # train_ratings is sorted by user index train_ratings = train_ratings[train_ratings[:, 0].argsort()] # save test and train data in case more training is needed on this split np.save( "Data/" + data_size + "_" + str(int(test_p * 100)) + "percent_test.npy", test_ratings) np.save( "Data/" + data_size + "_" + str(int(test_p * 100)) + "percent_train.npy", train_ratings) # test_ratings and train_ratings are numpy array of user id | item id | rating return test_ratings, train_ratings, nb_users, nb_movies, len(train_ratings)
def train(file=SAMPLE_CSV_FILE): # load data data, dic = get_data(file) # tables_train, questions_train, answers_train = encode_data(data, dic) tables_train, questions_train, answers_train = one_hot_encode_data( data, dic) # compute data stats print '#samples:', len(tables_train) len_dic = len(dic) + 1 print 'Vocabulary size:', len_dic row_maxlen = tables_train.shape[1] question_maxlen = questions_train.shape[1] answer_maxlen = len(answers_train[0]) print 'Max length of a row:', row_maxlen print 'Max length of a question:', question_maxlen print 'Max length of an answer:', answer_maxlen # compile model model = Seq2SeqtableQA(row_maxlen, question_maxlen, answer_maxlen, len_dic, HIDDEN_SIZE, BATCH_SIZE) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') model.summary() # train # for tables_batch, questions_batch, answers_batch in batch_data(data, dic, BATCH_SIZE): # nn_model.fit([tables_batch, questions_batch], answers_batch, batch_size=BATCH_SIZE, nb_epoch=1, show_accuracy=True, verbose=1) # print tables_train model.fit([tables_train, questions_train], answers_train, batch_size=BATCH_SIZE, epochs=2, shuffle=True, verbose=2, validation_split=0.2)
from torchvision import datasets, transforms from load_data import get_data # from baseline_model import BS_Net # from train_utils import train, test from qlnet_model_quantized import BS_Net from train_utils_quantized import train, test from training_parameters import get_params args = get_params() args.cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) train_loader = get_data(args, dataset='mnist', ifTrain=True) test_loader = get_data(args, dataset='mnist', ifTrain=False) model = BS_Net() model.to(device) args.out_name = 'mnist_baseline.pth' train(model, train_loader, test_loader, args, device) # model = BS_Net() # model.load_state_dict(torch.load('mnist_baseline.pth')) # model.to(device).eval() # test(model, test_loader, device, 0, None)
f.close() # Number of pixels in the input and output images IMG_SHAPE = (400, 400) theano.config.floatX = 'float32' srng = RandomStreams() f = open(os.path.join(OUT_DIR, "costs.txt"), 'w') f.write("Starting...\n") f.close() print("Loading image data...") write("Loading image data...") channels = 3 trX, trY, teX, teY, teReal = load_data.get_data(img_shape=IMG_SHAPE) img_x = IMG_SHAPE[0] img_y = IMG_SHAPE[1] s1 = str(trX.shape[0]) + " synthetic training images.\n" s2 = str(teX.shape[0]) + " synthetic validation images.\n" s3 = str(teReal.shape[0]) + " real images.\n" print(s1 + s2 + s3) write(s1 + s2 + s3) X = T.ftensor4() Y = T.ftensor4() # Network architecture f1 = (5, channels, 3, 3) # 5 filters of shape 3 x 3 filters = [f1]
def main(subject_id, cv_splits, batch_size, epochs, model_class, crop, lr, map2d=False, conv3d=False, conv1d=False, name=None, taskname=None): all_accs_list = [] all_mean_list = [] all_var_list = [] count = 0 for idx in subject_id: # if crop: # X, y, _, _ = get_crops(id=idx+1, event_code=[6,10,14], filter=bpfilter, t=[0., 4], # time_window=1.0, time_step=0.5) # else: # X, y = get_data(id=idx+1, event_code=[6,10,14], filter=bpfilter, t=[0., 4]) if crop: X1, y1, _, _ = get_crops(id=idx+1, event_code=[4,8,12], filter=[0.5, 45], t=[0., 4], time_window=1.0, time_step=0.5) X2, y2, _, _ = get_crops(id=idx+1, event_code=[6,10,14], filter=[0.5, 45], t=[0., 4], time_window=1.0, time_step=0.5) X = np.vstack((X1,X2)) y = np.hstack((y1,y2+2)) else: X1, y1 = get_data(id=idx+1, event_code=[4,8,12], filter=[0.5, 45], t=[0., 4]) X2, y2 = get_data(id=idx+1, event_code=[6,10,14], filter=[0.5, 45], t=[0., 4]) X = np.vstack((X1,X2)) y = np.hstack((y1,y2+2)) if map2d: X = elec_map2d(X) print(X.shape) if conv3d: X = X.transpose(0,2,1,3,4) print(X.shape) else: X = X.reshape(X.shape[0], 1, X.shape[1], X.shape[2]).transpose(0,1,3,2) acc = cv_train(model_class, torch.nn.CrossEntropyLoss, torch.optim.Adam, X, y, epoch=epochs, lr=lr, num_of_cv=cv_splits, batch_size=batch_size, shuffle=True) mean = np.mean(acc) var = np.var(acc) print('subject{} mean_acc:{}, var_acc:{}'.format(idx+1, mean, var)) all_accs_list.append(acc) all_mean_list.append(mean) all_var_list.append(var) if count % 5 == 0: accs = np.array(all_mean_list) accs = accs.reshape(-1) np.save('accuracies/task({})model({})_acc'.format(taskname, name)+'.npy', accs) count += 1 all_mean = np.mean(all_accs_list) all_var = np.var(all_accs_list) sub_mean = np.mean(all_mean_list) sub_var = np.var(all_mean_list) print('********************result**********************') print('model name:'+name) print('taskname:'+taskname) print('all validation mean_acc:{}, var_acc:{}'.format(all_mean, all_var)) print('all subject mean_acc:{}, var_acc:{}'.format(sub_mean, sub_var)) accs = np.array(all_mean_list) accs = accs.reshape(-1) np.save('accuracies/task({})model({})_acc'.format(taskname, name)+'.npy', accs)
def inference(): data, label = ld.get_data(ld.dataset_path) global_step = tf.Variable(0,trainable=False) # shape(批数据,输入宽度即列长,输入通道数) x = tf.placeholder(shape=[None, data.shape[1],1], name="input", dtype=tf.float32) y = tf.placeholder(shape=[None, 3],dtype=tf.float32) # 第一层卷积 with tf.variable_scope('conv1',reuse=tf.AUTO_REUSE): conv1_weight=weight_variable(name='weight',shape=(CONV1_SIZE,1,CONV1_DEEP)) conv1_bias=bias_variable(name='bis',shape=[CONV1_DEEP]) conv1_output=tf.nn.relu(conv1d(x,conv1_weight)+conv1_bias) #第一层池化 with tf.variable_scope('pool1',reuse=tf.AUTO_REUSE): pool1_output =pool(conv1_output,pool_win_shape,) # 第二层卷积 with tf.variable_scope('conv2',reuse=tf.AUTO_REUSE): conv2_weight=weight_variable(name='weight',shape=(CONV2_SIZE,CONV1_DEEP,CONV2_DEEP)) conv2_bias=bias_variable(name='bis',shape=[CONV2_DEEP]) conv2_output=tf.nn.relu(conv1d(pool1_output,conv2_weight)+conv2_bias) # 第二层池化 with tf.variable_scope('pool2',reuse=tf.AUTO_REUSE): pool2_output =pool(conv2_output,pool_win_shape) # 将输出拉成向量 pool2_shape = pool2_output.get_shape().as_list() nodes = pool2_shape[1]*pool2_shape[2] reshaped = tf.reshape(pool2_output,(-1,nodes)) # lstm层 with tf.variable_scope('lstm',reuse=tf.AUTO_REUSE): lstm_layers = rnn.MultiRNNCell([rnn.LSTMCell(num_units=num) for num in [LSTM_NUMBER, LSTM_NUMBER]], state_is_tuple=True) reshaped = tf.expand_dims(reshaped, axis=2) outputs, h_ = tf.nn.dynamic_rnn(lstm_layers, reshaped, dtype=tf.float32) lstm_output=outputs[:,-1,:] # 全连接层1 with tf.variable_scope("full1",reuse=tf.AUTO_REUSE): full1_weight = weight_variable([LSTM_NUMBER,FULL_SIZE],"weight") full1_bias = bias_variable([FULL_SIZE],name='bias') full1_output = tf.nn.relu(tf.matmul(lstm_output,full1_weight)+full1_bias) # 全连接层2 with tf.variable_scope("full2", reuse=tf.AUTO_REUSE): full2_weight = weight_variable([FULL_SIZE, CLASS_NUMBER], "weight") full2_bias = bias_variable([CLASS_NUMBER],name='bias') full2_output = tf.nn.relu(tf.matmul(full1_output, full2_weight) + full2_bias) y_pred = tf.nn.softmax(logits=full2_output) # 损失函数 with tf.name_scope('Loss'): # 求交叉熵损失 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=full2_output, name='cross_entropy') # 求平均 loss = tf.reduce_mean(cross_entropy, name='loss') # 训练算法 with tf.name_scope('Optimization'): train = tf.train.AdamOptimizer(learning_rate).minimize(loss,global_step=global_step) # 评估节点 with tf.name_scope('Evaluate'): # 返回验证集/测试集预测正确或错误的布尔值 correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1)) # 将布尔值转换为浮点数后,求平均准确率 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.Session() as sess: # 变量初始化 tf.global_variables_initializer().run() for i in range(TRAINING_STEPS): start = (i * BATCH_SIZE) % data.shape[0] end = min(start + BATCH_SIZE, data.shape[0]) _,loss_value,step = sess.run([train,loss,global_step],feed_dict={x:data[start:end],y:label[start:end]}) if i%1000 ==0: print("after %d training step(s),loss on training batch is %g."%(step,loss_value))
print('We will use the GPU:', torch.cuda.get_device_name(0)) # If not... else: print('No GPU available, using the CPU instead.') device = torch.device("cpu") model = DistilBertForSequenceClassification.from_pretrained('/app/incivility_project/models/distilbert_5000_03-06-20') #config = BertConfig.from_json_file('../models/bert_classifier_2epoch_256size/config.json') tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased') model.cuda() #load comments and labels from the input tsv comments, labels = load_data.get_data(sys.argv[1]) #encode inputs using BERT tokenizer input_ids = [] for comment in comments: encoded_comment = tokenizer.encode(comment, add_special_tokens = True, max_length=256,pad_to_max_length=True) input_ids.append(encoded_comment) #define attention masks: if 0 it's a PAD, set to 0; else set to 1 attention_masks = [] for sent in input_ids: att_mask = [int(token_id > 0) for token_id in sent] attention_masks.append(att_mask)
valid_loss += batch_loss.item() ps = torch.exp(log_ps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type( torch.FloatTensor)).item() model.train() print(f'Epoch: {e + 1}/{epochs}', f'Training Loss: {train_loss/print_every:.3f}', f'Valid Loss: {valid_loss/len(validloader):.3f}', f'Valid Accuracy: {accuracy/len(validloader):.3f}') train_loss = 0 return model device = get_device(args.gpu) data_loaders, img_datasets = get_data(args.data_directory) print('Training new model ...') model = load_model(args.arch, device, CLASS_NUM, args.hidden_units) optimizer = get_optimizer(model, args.learning_rate) model = train_model(model, data_loaders[TRAIN], data_loaders[VALID], device, optimizer, args.epochs) save_checkpoint(model, img_datasets[TRAIN], optimizer, args.arch, CLASS_NUM, args.learning_rate, args.epochs, args.hidden_units) print("Testing Model accuracy ...") test_model_accuracy(model, data_loaders[TEST], device)
#Printing results of classification print('Final results') mean_aucs = {k:(v.mean(),v.std()) for k,v in auc_dict.items()} for k,v in mean_aucs.items(): print('classification=%s, Mean_AUC = %f, std = %f' \ %(k, v[0],v[1])) cluster_max_key = max(mean_aucs,key=mean_aucs.get) print('\nMAX RESULT:CLUSTER, classification=%s, Mean_AUC = %f, std = %f\n' \ %(cluster_max_key, mean_aucs[cluster_max_key][0],mean_aucs[cluster_max_key][1])) return mean_aucs if __name__=='__main__': exp_num=sys.argv[1] path = join('..', 'meg_data1',exp_num) #Loading data target_grad_data, nontarget_grad_data = get_data(path,'MEG GRAD') # mean(intertrial) oscilates near 10^(-12) # variance differ from 10^(-21) to 10^(-22) # (mean variance between time x space points 10^(-21) target_mag_data, nontarget_mag_data = get_data(path,'MEG MAG') # mean(intertrial) oscilates near 10^(-13) and 10^(-14) #variance differ from 10^(-23) to 10^(-24) # (mean variance between time x space points 10^(-24) #Run crossvalidation cv_score(target_grad_data,nontarget_grad_data,target_mag_data,nontarget_mag_data)
layers.Dense(1, activation='sigmoid') ]) optimizer = tf.keras.optimizers.RMSprop(0.0005) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[auc]) return model def norm(x): return (x - train_stats['mean']) / train_stats['std'] train_data, test_data = load_data.get_data() train_data = train_data.sample(frac=1) if TEST_LOCAL: dataset = train_data.copy() train_data = dataset.sample(frac=1) test_data = dataset.drop(train_data.index) test_labels = test_data.pop('Delay.Indicator') train_labels = train_data.pop('Delay.Indicator') train_stats = train_data.describe() train_stats = train_stats.transpose() if not TEST_LOCAL: submission_id = test_data.pop('ID') model = build_model() normed_train_data = norm(train_data)
from keras.callbacks import EarlyStopping config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) ktf.set_session(session) model = Sequential() model.add(LSTM(256,return_sequences=True, input_shape=(735, 10)) model.add(LSTM(256)) model.add(Dense(1)) early_stopping = EarlyStopping('loss', 0.0001, 5) model.compile(loss='mse', optimizer=Adam(1e-4)) p = [] for i in range(0, 66, 5): x_train, y_train, x_test, y_test = get_data(i) model.fit(x_train, y_train, batch_size=256, epochs=100, callbacks=[early_stopping]) y_pred = model.predict(x_test, batch_size=500) r = pd.DataFrame({'change': y_test.flatten(), 'pred': y_pred.flatten()}) for j in range(3): p.append(r[j::3].corr().values[0, 1]) df = pd.DataFrame({'p': np.array(p)}) df.to_csv('result.csv')
model = model_from_json(open('my_model_arch.json').read()) print 'foooo' model.load_weights('model_model_weights.h5') TRAIN_NUM = 51853 TEST_NUM = 22218 DIM = 10000 label_sizes = 73 def gen_label(prob_vector): label = np.argmax(prob_vector) return label (X_train, y_train), (X_test, y_test) = load_data.get_data() X_test = np.asarray(X_test) X_train = np.asarray(X_train) X_test = np.asarray(X_test) X_train = X_train.reshape(TRAIN_NUM, DIM) X_test = X_test.reshape(TEST_NUM, DIM) preds = model.predict_proba(X_test[:10], batch_size=1, verbose=2) preds2 = model.predict_proba(X_test[-10:], batch_size=1, verbose=2) preds = [gen_label(i) for i in preds] preds2 = [gen_label(i) for i in preds2] print preds, preds2
def fx_minimize(x, val_x, train_label, val_label, self_made_m, M, k, a, batch_size, prefix, iteration, num_epoch, learning_rate, train_data_count): logging.getLogger().setLevel(logging.DEBUG) train, test = load_data.get_data(x, val_x, train_label, val_label, batch_size, self_made_m) print "训练集+验证集生成完成" # 加载训练网络 net = mlp_model.model_main(k, a) internals = net.get_internals() arg_names = internals.list_arguments() lr_dict = dict() for arg_name in arg_names: if arg_name == 'M': lr_dict[arg_name] = 0 if iteration == 100: # 训练模型 t7 = load_data.SelfOptimizer(learning_rate=0.02, rescale_grad=(1.0 / batch_size)) sgd = mx.optimizer.create('sgd', learning_rate=0.02) optimizer = mx.optimizer.create('sgd', learning_rate=0.02, rescale_grad=(1.0 / batch_size)) optimizer.set_lr_mult(lr_dict) model = mx.model.FeedForward( symbol=net, # network structure num_epoch=num_epoch, # number of data passes for training learning_rate=learning_rate, # learning rate of SGD initializer=mx.init.Xavier(factor_type="in", magnitude=2.34), # optimizer=SelfOptimizer, # optimizer=optimizer, arg_params={'M': M}) metric = load_data.Auc() print "网络加载完成,开始训练" model.fit( X=train, # training data eval_metric=metric, # eval_data=test, # validation data batch_end_callback=mx.callback.Speedometer( batch_size, train_data_count * train_data_count / batch_size, iteration=iteration) # output progress for each 200 data batches ) model.save(prefix, iteration) else: # 使用之前一次的模型 model_loaded = mx.model.FeedForward.load(prefix, iteration - 100) # 加载初始化参数 params = model_loaded.get_params() # get model paramters arg_params = params['arg_params'] model = mx.model.FeedForward( symbol=net, # network structure num_epoch=num_epoch, # number of data passes for training learning_rate=learning_rate, # learning rate of SGD initializer=mx.init.Xavier(factor_type="in", magnitude=2.34), # optimizer=SelfOptimizer, # optimizer=optimizer, arg_params=arg_params) metric = load_data.Auc() print "网络加载完成,开始训练" model.fit( X=train, # training data eval_metric=metric, # eval_data=test, # validation data batch_end_callback=mx.callback.Speedometer( batch_size, train_data_count * train_data_count / batch_size, iteration=iteration, minwhich='fx-') # output progress for each 200 data batches ) model.save(prefix, iteration)
import load_data if __name__ == "__main__": qso_dict = load_data.get_data("data/I_Paris_plate_mjd_fiber.dat")
def feedforward( self, input_image, biases): #input_image as a member of train_images or other list input_dot_weights = np.dot(np.asarray(input_image), self.weights) x = np.add(input_dot_weights, self.biases) return sigmoid_f(x) def sigmoid_f(x): return 1.0 / (1.0 + np.exp(-x)) net = Network((784, 10)) correct_count = 0 incorrect_count = 0 (train_images, train_labels, validation_images, validation_labels, test_images, test_labels) = ld.get_data() for image, label in zip(train_images, train_labels): #image data imported from load_data.py output = list(net.feedforward(image, net.biases)) mx = max(output) mx_index = output.index(mx) if mx_index == label: correct_count += 1 else: incorrect_count += 1 percent_correct = 100 * (float(correct_count) / (float(correct_count) + float(incorrect_count))) print "Percent correct is %f%%" % percent_correct
max_change_up = max_change_up_sum / doclist_count max_change_down = max_change_down_sum / doclist_count tendency = tendency_sum / doclist_count count_rank_up = count_rank_up_sum / doclist_count count_rank_down = count_rank_down_sum / doclist_count changes_per_docs = changes_per_docs_sum / doclist_count with open('evaluation_results\corpus_evaluations.txt', 'a') as file: file.write( f'Corpus of {doclist_count} documentlists (queries)\nFirst {first_x_docs} documents of each list\nAverage of documents ranked up: {count_rank_up}\nAverage of documents ranked down: {count_rank_down}\nAverage rank changes per doc of documentlist: {changes_per_docs:.2f}\nAverage rank change by: {average_changes:.2f} ranks\nAverage maximum rank jump up: {max_change_up:.2f}\nAverage maximum rank jump down: {max_change_down:.2f}\nAverage tendency of direction: {tendency:.2f}\n\n' ) filepath_query = 'search_results\query_topic25.json' filepath_results = 'search_results\\results_custom_topic25.json' data_tuple = get_data(filepath_query, filepath_results) doclist = data_tuple[1] jump_threshold = 5 listo = [5, 10, 20, 50, 100] for nr in listo: print_average_changes(data_tuple, nr, jump_threshold) """ location = 'search_results\\' filepaths = [] for root, directories, files in os.walk( location ): for item in files: if 'results' in item: filepaths.append( os.path.join( root, item ) )
import load_data from transformers import LayoutLMTokenizerFast, TrainingArguments, Trainer import torch import finetuning_utils import sroie import numpy as np import os import pandas as pd df = load_data.get_data() tokenizer = LayoutLMTokenizerFast.from_pretrained( "microsoft/layoutlm-base-uncased") test_data = sroie.SROIE_Dataset(df.iloc[-200:].reset_index(drop=True), tokenizer, augmentation=None) for epoch in [3, 5, 7]: for n in [25, 50, 75, 100, 200, 300, 400]: current_df = df.iloc[:n].reset_index(drop=True) #loop for augmentation parameter for k in [[0, 0, 0], [2, 0.8, 0.8], [3, 0.8, 0.8]]: print("augmentation parameters: ", k) aug_params = { "copies": k[0], "p_lines": k[1], #100% "p_char": k[2], #100%
model=model_from_json(open('my_model_arch.json').read()) print 'foooo' model.load_weights('model_model_weights.h5') TRAIN_NUM=51853 TEST_NUM=22218 DIM=10000 label_sizes=73 def gen_label(prob_vector): label = np.argmax(prob_vector) return label (X_train, y_train), (X_test, y_test) =load_data.get_data() X_test=np.asarray(X_test) X_train=np.asarray(X_train) X_test=np.asarray(X_test) X_train=X_train.reshape(TRAIN_NUM,DIM) X_test=X_test.reshape(TEST_NUM,DIM) preds = model.predict_proba(X_test[:10],batch_size=1,verbose=2) preds2 = model.predict_proba(X_test[-10:],batch_size=1,verbose=2) preds = [gen_label(i) for i in preds] preds2 = [gen_label(i) for i in preds2] print preds, preds2
import numpy as np from calculator.models import Athlete import sys sys.path.append('/Users/duncanblythe/work/repo/running/python_code/') import load_data if __name__=='__main__': conf = {} conf['no_events_tried'] = 3 conf['percentiles'] = [0,25] conf['gender'] = 'Male' conf['outlier_threshold'] = 0.05 x = load_data.get_data(conf) x[np.isnan(x)]=0 for i in range(x.shape[0]): print i a = Athlete() a.best100 = x[i,0] a.best200 = x[i,1] a.best400 = x[i,2] a.best800 = x[i,3] a.best1500 = x[i,4] a.bestMile = x[i,5] a.best5k = x[i,6] a.best10k = x[i,7] a.bestHM = x[i,8] a.bestMar = x[i,9] a.save() conf = {} conf['no_events_tried'] = 3 conf['percentiles'] = [0,25]
def prepare_data(file): X, Y = load_data.get_data(file) # print('total images:{0}'.format(len(X))) return X, Y
from __future__ import print_function from load_data import get_data, analyze_data, train_data_generation #process_train_data from keras.models import Model from keras.layers import Dense, Dropout, Input, LSTM, Bidirectional, Masking, Embedding, concatenate from keras.layers import BatchNormalization, Activation from keras.optimizers import Adam from attention_model import AttentionLayer import numpy as np max_features = 20000 batch_size = 16 epo = 100 # loading data print('Loading data...') train_audio_data, train_text_data, train_label, test_audio_data, test_text_data, test_label, test_label_o, embed_matrix, dic = get_data( ) print('train_audio shape:', train_audio_data.shape) print('train_text shape:', train_text_data.shape) print('test_audio shape:', test_audio_data.shape) print('test_text shape:', test_text_data.shape) print('train_label shape:', train_label.shape) print('test_label shape:', test_label.shape) """ final_train_audio, final_train_text, final_train_label = process_train_data(train_audio_data, train_text_data, train_label) final_train_audio = np.array(final_train_audio) print('train_audio shape:', final_train_audio.shape) print('train_text shape:', final_train_text.shape) print('test_audio shape:', test_audio_data.shape) print('test_text shape:', test_text_data.shape) print('train_label shape:', final_train_label.shape)
def calc_metricts(data_path,epoch_start_time,result_path,sensor_type,freqs,save_tft=False,load_existing_tft=False): #Loading data #epoch_start_time in consideration, that fixation start is zero (0 ms) erase_dir(result_path) target_data, nontarget_data = get_data(data_path,sensor_type) #trials x channels x times sensor_type = sensor_type.split(' ')[-1] first_target = get_tft_data(target_data,'target',data_path,sensor_type,freqs,save_tft,load_existing_tft) # trials x channels x freqs x times first_nontarget = get_tft_data(nontarget_data,'nontarget',data_path,sensor_type,freqs,save_tft,load_existing_tft) # trials x channels x freqs x times # Calc mean for UNCORRECTED data third_target = first_target.mean(axis=0) third_nontarget = first_nontarget.mean(axis=0) save_results(third_target,'third_target_%s' %sensor_type,exp_num) save_results(third_nontarget,'third_nontarget_%s' %sensor_type,exp_num) del third_target,third_nontarget # Calc t-stat for UNCORRECTED data fivth = ttest_ind(first_target,first_nontarget,axis=0,equal_var=False) save_results(fivth.statistic,'fivth_%s' %sensor_type,exp_num) del fivth # Calc avaraget t-stats for mean value of interval [200:500]ms start_window = 200 - epoch_start_time end_window = 500 - epoch_start_time seventh = ttest_ind(first_target[:,:,:,start_window:end_window].mean(axis=3),first_nontarget[:,:,:,start_window:end_window].mean(axis=3),axis=0,equal_var=True) save_results(seventh.statistic,'seventh_t_%s' %sensor_type,result_path,need_image=False) save_results(seventh.pvalue,'seventh_p_%s' %sensor_type,result_path,need_image=False) title = 'T-stat_mean_200_500ms_uncorrected' fig = vis_space_freq(seventh.statistic,title,freqs) plt.savefig(os.path.join(result_path,title+'_'+sensor_type+'.png')) plt.close(fig) heads_path = os.path.join(result_path,'seventh_heads') save_heads(heads_path,seventh.statistic,seventh.pvalue,sensor_type.lower(),freqs) #conver 'MEG GRAD' to 'grad' and 'MEG MAG' to 'mag' del seventh #CORRECTED data second_target = baseline_correction(first_target,epoch_start_time) second_nontarget = baseline_correction(first_nontarget,epoch_start_time) del first_target, first_nontarget # # # # Calc mean for CORRECTED data # fourth_target = second_target.mean(axis=0) # fourth_nontarget = second_nontarget.mean(axis=0) # save_results(fourth_target,'fourth_target_%s' %sensor_type,exp_num) # del fourth_target,fourth_nontarget # # # Calc t-stat for CORRECTED data # sixth = ttest_ind(second_target,second_nontarget,axis=0,equal_var=False) # save_results(sixth.statistic,'sixth_%s' %sensor_type,exp_num) # del sixth # # Calc avaraget t-stats for mean value of interval [200:500]ms start_window = 200 - epoch_start_time end_window = 500 - epoch_start_time eighth = ttest_ind(second_target[:,:,:,start_window:end_window].mean(axis=3),second_nontarget[:,:,:,start_window:end_window].mean(axis=3),axis=0,equal_var=True) save_results(eighth.statistic,'eighth_t_%s' %sensor_type,result_path,need_image=False) save_results(eighth.pvalue,'eighth_p_%s' %sensor_type,result_path,need_image=False) title = 'T-stat_mean_200_500ms_corrected' fig = vis_space_freq(eighth.statistic,title,freqs) plt.savefig(os.path.join(result_path,title+'_'+sensor_type+'.png')) plt.close(fig) heads_path = os.path.join(result_path,'eighth_heads') save_heads(heads_path,eighth.statistic,eighth.pvalue,sensor_type.lower(),freqs) del eighth