def train(model, Parse_parameters, opts, dictionaries): train_data = load_dataset(Parse_parameters, opts.train, dictionaries) dev_data = load_dataset(Parse_parameters, opts.dev, dictionaries) optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay=1e-4) # Number of Epoch, It needs 10 epoches to converge n_epochs = 1 #eval_epoch(model, dev_data, dictionaries, opts) for epoch in xrange(n_epochs): print("Trian epoch: %d"%(epoch)) train_epoch(model, train_data, opts, optimizer)
def load_dataset(): """ Loads the data using the DataLoader implementation in loader.py. Hard Coding: For this project the data is Mindboggle only, so the values are hard coded. """ data_dir = '../data/Mindboggle' data = loader.load_dataset(data_dir, dataset='Mindboggle', goal='segment') return data
def run(**kwargs): # load graph graph_nx, dump_folder = loader.load_dataset(kwargs['dataset']) # initialize tNodeEmbed task = kwargs['task'] test_size = kwargs['test_size'] tnodeembed = models.tNodeEmbed(graph_nx, task=task, dump_folder=dump_folder, test_size=test_size, **kwargs['n2vargs']) # load dataset X, y = tnodeembed.get_dataset(train_skip=kwargs['train_skip']) # fit keras_args = kwargs['keras_args'] batch_size = keras_args.pop('batch_size', 32) steps_per_epoch = ceil(len(X['train']) / batch_size) # tNodeEmbed generator = loader.dataset_generator(X['train'], y['train'], tnodeembed.graph_nx, tnodeembed.train_time_steps, batch_size=batch_size) tnodeembed.fit_generator(generator, steps_per_epoch, **keras_args) # node2vec static_model = models.StaticModel(task=task) generator = loader.dataset_generator(X['train'], y['train'], tnodeembed.graph_nx, [max(tnodeembed.train_time_steps)], batch_size=batch_size) static_model.fit_generator(generator, steps_per_epoch, **keras_args) # predict steps = ceil(len(X['test']) / batch_size) generator = loader.dataset_generator(X['test'], y['test'], tnodeembed.graph_nx, tnodeembed.train_time_steps, batch_size=batch_size, shuffle=False) tnodeembed_metrics = get_metrics( y['test'], tnodeembed.predict_generator(generator, steps)) generator = loader.dataset_generator(X['test'], y['test'], tnodeembed.graph_nx, [max(tnodeembed.train_time_steps)], batch_size=batch_size, shuffle=False) node2vec_metrics = get_metrics( y['test'], static_model.predict_generator(generator, steps)) print(f'tnodeembed: {tnodeembed_metrics}') print(f'node2vec: {node2vec_metrics}')
def training(modelName, dataAugmentationFlag, pretrainedFlag, epochsNumber): print("Modello: ", modelName) # Caricamento del dataset train_loader, valid_loader, test_loader = load_dataset( dataAugmentationFlag) # Definizione del modello num_class = 4 if modelName == 'SqueezeNet': model = squeezenet1_0(pretrained=True) model.classifier[1] = nn.Conv2d(512, num_class, kernel_size=(1, 1), stride=(1, 1)) model.num_classes = num_class else: model = vgg16(pretrained=True) model.classifier[6] = nn.Linear(4096, num_class) model = trainval_classifier(model, pretrainedFlag, modelName, train_loader, valid_loader, lr=0.001, exp_name=modelName, momentum=0.99, epochs=epochsNumber) # Fase di test predictions_test, labels_test = test_classifier(model, test_loader) # Predizioni di test del modello all'ultima epoch accuracy = accuracy_score(labels_test, predictions_test) * 100 print("Accuracy di ", modelName, ": ", accuracy) return accuracy, model
Parse_parameters['train'] = opts.train Parse_parameters['development'] = opts.dev Parse_parameters['vocab_size'] = opts.vocab_size # Check parameters validity assert os.path.isfile(opts.train) assert os.path.isfile(opts.dev) if opts.pre_emb: assert opts.embedding_dim in [50, 100, 200, 300] assert opts.lower == 1 # load datasets dictionaries = prepare_dictionaries(Parse_parameters) tagset_size = len(dictionaries['tag_to_id']) train_data = load_dataset(Parse_parameters, opts.train, dictionaries) dev_data = load_dataset(Parse_parameters, opts.dev, dictionaries) # Model parameters Model_parameters = OrderedDict() Model_parameters['vocab_size'] = opts.vocab_size Model_parameters['embedding_dim'] = opts.embedding_dim Model_parameters['hidden_dim'] = opts.hidden_dim Model_parameters['tagset_size'] = tagset_size Model_parameters['lower'] = opts.lower == 1 Model_parameters['decode_method'] = opts.decode_method Model_parameters['loss_function'] = opts.loss_function model = LstmModel.LSTMTagger(Model_parameters) #model = LstmCrfModel.BiLSTM_CRF(Model_parameters) optimizer = optim.Adam(model.parameters(), lr=0.01)
from sklearn.metrics import average_precision_score from sklearn.metrics import recall_score from sklearn.metrics import accuracy_score device_name = tf.test.gpu_device_name() device = torch.device("cuda") batch_size = 1 print('Loading BERT tokenizer...') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) data_dir = "/home1/kchandu/research1/persona_movies/data/categorical_data" test_inputs, test_masks, test_labels, test_categories = load_dataset( os.path.join(data_dir, "test.all"), tokenizer) test_data = TensorDataset(test_inputs, test_masks, test_labels, test_categories) test_sampler = SequentialSampler(test_data) test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size) #model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2) model = BertForSequenceClassification.from_pretrained("./model_save/") # Tell pytorch to run this model on the GPU. model.cuda() model.eval() # Tracking variables predictions, true_labels, pred_labels = [], [], []
import numpy as np from neural_network import NeuralNetwork from layers import Layer import loader import csv import multiprocessing import time import matplotlib.pyplot as plt np.random.seed() # This is the configuration of the present experiment. X, y, X_test, y_test = loader.load_dataset("CasosArtrite2.csv", input_dimension=17, test_ratio=0.3) epochs = 10000 def single_run(hidden_neurons, learning_rate, hidden_activation, output_activation): model_data = np.vstack([np.arange(0, epochs), np.zeros([4, epochs])]) nn = NeuralNetwork(learning_rate=learning_rate) nn.add_layer(Layer(hidden_neurons, activation=hidden_activation)) nn.initialize(X.shape[1], y.shape[1], output_activation=output_activation) epoch_values = model_data[0, :] start = time.time()
def assign(self, point): self.centroid_buffer = self.centroid_buffer * self.pt_count / ( self.pt_count + 1) + point[:0 - 1] / (self.pt_count + 1 ) # update centroid position self.pt_count += 1 self.labels[int(point[-1])] += 1 self.ss_loss += self.dist(point) # add squared distance to loss def update_centroid(self): self.centroid = self.centroid_buffer if __name__ == "__main__": assert (len(sys.argv)) == 3 # load iris dataset iris_set = load_dataset() # initialize clusters seed_examples = random.sample(iris_set, int(sys.argv[1])) clusters = [Cluster(seed) for seed in seed_examples] # iterate for i in xrange(int(sys.argv[2])): for cluster in clusters: cluster.clear_points() # reset points in cluster, keep centroid for point in iris_set: nearest_cluster = np.argmin( [cluster.dist(point) for cluster in clusters]) clusters[nearest_cluster].assign( point) # reassign points to clusters for cluster in clusters:
import compute import loader import plotter dataset = loader.load_dataset('patient_1') data = dataset["data"] mask = dataset["mask"] threshold = 95.0 scaled = compute.to_hounsfield_units(data) masked = list(map(lambda i: compute.apply_mask(i, mask, threshold), scaled)) gradients = compute.compute_gradients(masked) masked_gradients = list( map(lambda i: compute.apply_mask(i, mask, threshold), gradients)) def slicer(arr): return arr[30, :, :] scaled_slices = list(map(slicer, scaled)) masked_slices = list(map(slicer, masked)) gradient_slices = list(map(slicer, masked_gradients)) plotter.plot(scaled_slices, masked_slices, gradient_slices) plotter.show()
01: 576 580 700 iterations (595 in 100 iterations) 02: 473 491 (200 iterations) 03: 641 673 (300 iterations) 04: 1001 1092 (1100 after 100 iterations) 05: 749 801 400 iterations 06: 876 113 200 iterations 07: 885 08: 4437 5362 801 with 5 10 20 population profile on problem 05. 500 iterations. 839 with 5 population profile on problem 05. 500 iterations. 002 with 5 population profile on problem 05. 500 iterations. local optimum.. ''' depots, customers, durations, n_paths_per_depot = loader.load_dataset(filename) conf = configparser.parse_config('configs/default.conf') if len(plt.get_fignums()) > 0: ax0, ax1 = plt.gcf().get_axes() else: _, (ax0, ax1) = plt.subplots(1, 2) model = MDVRPModel(customers, depots, n_paths_per_depot, conf) optimal_solution = utils.visualize_solution(model, solution_file) model.evolve(3) one = model.population[0] # debug L = [each.fitness_score() for each in model.population] best = model.population[np.argmin(L)]
'C': [0.001, 1, 1000], 'penalty': ['l2'], 'solver': ['newton-cg', 'lbfgs'] }] param_object = ParameterGrid(tuned_parameters) skf = StratifiedKFold( n_splits=NUMBER_OF_FOLDS, random_state=SEED) # Splits the data into 5 stratified folds # Word level: Unigrams, Bigrams, Trigrams, UniBiTri_combined | Character level: Trigrams, 4grams, 5grams, Tri45_combined map_analyzer_ngram_options = { 'word': [(1, 1), (2, 2), (3, 3), (1, 3)], 'char': [(3, 3), (4, 4), (5, 5), (3, 5)] } data_path = '/data/annotations_UQ.csv' results_path = '/path/unpalatable-questions/results/model-results/LogReg_results.tsv' results_file = open(results_path, "w") results_file.write( "Agreement\tContext\tModel\tF1-score\tAUROC\tWeighted F1\tPrecision\tRecall\tAccuracy\tAUPRC\n" ) for conf in [0.6, 1.0]: # 3/5 and 5/5 agreement question_sentences, reply_sentences, comment_sentences, y = loader.load_dataset( data_path, conf=conf) random_baseline(y) for analyzer in ['word', 'char']: run_experiments(analyzer)
import loader import csv import multiprocessing import time import matplotlib.pyplot as plt def adjust_output(data): return data[:, :] np.random.seed() # This is the configuration of the present experiment. X, y, r, X_test, y_test, r_test = loader.load_dataset("~/anti_stationary.csv", input_dimension=9, test_ratio=0.3) y = adjust_output(y) y_test = adjust_output(y_test) print(X_test) print(y_test) print(r_test) epochs = 5000 def single_run(hidden_neurons, learning_rate, hidden_activation, output_activation):
from helper import flat_accuracy, format_time device = torch.device("cuda") epochs = 3 batch_size = 32 #Tokenization print('Loading BERT tokenizer...') tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) data_dir = "/home1/kchandu/research1/persona_movies/data/categorical_data" #Data loading train_inputs, train_masks, train_labels, train_categories = load_dataset( os.path.join(data_dir, "train.all"), tokenizer) train_data = TensorDataset(train_inputs, train_masks, train_labels, train_categories) train_sampler = RandomSampler(train_data) train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size) validation_inputs, validation_masks, validation_labels, validation_categories = load_dataset( os.path.join(data_dir, "dev.all"), tokenizer) validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels, validation_categories) validation_sampler = RandomSampler(validation_data) validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)
assert opts.embedding_dim in [50, 100, 200, 300] # load datasets if not opts.load: dictionaries = prepare_dictionaries(Parse_parameters) else: # load dictionaries with open(opts.load+'/dictionaries.dic', 'rb') as f: dictionaries = cPickle.load(f) # load parameters opts = load_parameters(opts.load, opts) tagset_size = len(dictionaries['tag_to_id']) train_data = load_dataset(Parse_parameters, opts.train, dictionaries) dev_data = load_dataset(Parse_parameters, opts.dev, dictionaries) # Model parameters Model_parameters = OrderedDict() Model_parameters['vocab_size'] = opts.vocab_size Model_parameters['embedding_dim'] = opts.embedding_dim Model_parameters['hidden_dim'] = opts.hidden_dim Model_parameters['tagset_size'] = tagset_size Model_parameters['decode_method'] = opts.decode_method Model_parameters['loss_function'] = opts.loss_function Model_parameters['freeze'] = opts.freeze #model = LstmModel.LSTMTagger(Model_parameters)