def test_data(model, data_dir, device): correct = 0 total = 0 step = 0 device = torch.device( "cuda" if torch.cuda.is_available() and device == 'cuda' else "cpu") model.to(device) train_datasets, trainloader, validloader, testloader = process_data( data_dir) with torch.no_grad( ): #turn off gradient step to reduce computation time and use up resources model.eval() for images, labels in testloader: step += 1 images, labels = images.to(device), labels.to(device) outputs = model.forward(images) ps = torch.exp( outputs ) #convert to softmax probability from 0 to 1 for each image in each batch top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy = torch.mean(equals.type(torch.FloatTensor)) print('Accuracy for batch', step, ':{:.3f}%'.format(accuracy * 100)) correct += sum(equals).item() total += labels.size(0) print('Number of correct classified images:', correct) print('Number of images in test set:', total) print('Accuracy of test set:{:.3f}%'.format(100 * correct / total))
def main(): try: learning_rate = float(sys.argv[1]) except: learning_rate = config.learning_rate # number of hidden layers hidden = int(sys.argv[2]) L = hidden + 2 # L is the total no of layers data_file = sys.argv[3] #dataset filename # regularization parameter try: lambd = float(sys.argv[4]) except: lambd = config.lambd # layers try: k = int(sys.argv[5]) # no of output nodes, ie, nodes in the Lth layers except: k = config.k try: normal_file = sys.argv[6] # normalized datset name except: normal_file = config.normal_file # the number of nodes per layers excluding the biasing unit # this will be used to build the theta array nodes_per = [] for _ in range(2, hidden + 2): print("Nodes in layer", _, end=" : ") nodes_per.append(int(input())) m, dataset, statistics = helper.process_data(data_file, normal_file, k) print(dataset) input() nodes_per.append(k) nodes_per.insert(0, len(dataset[0][0])) # the number of inputs L, nodes_per, inital_thetas = design_thetas(nodes_per) # final_thetas, theta_history, total_runs, final_rate, regular_param \ # = learn_thetas(inital_thetas, dataset, learning_rate, lambd) theta_history = learn_thetas(inital_thetas, dataset, learning_rate, lambd) final_thetas = theta_history[-1] cont = True while cont: query_y(final_thetas) print("Calculate another (Y/n) : ", end="") ans, cont = str(input()), False if ans == "" or ans[0] == "" or ans[0] == "y": cont = True
def _text2vector(self, texts): '''Given a list of Strings will convert to a numpy 3D array where each token in the text is reprsented as a vector from the self.word2vec_model. see semeval.helper.process_data for more details. list of strings -> 3D numpy array (len(texts), max_number_tokens, self.word2vec_model.vector_size) ''' if self._max_length == 0: raise Exception('Your model requires training first') return helper.process_data(texts, self._word2vec_model, self._max_length)
def run(options): # Checking if all required options are specified if options.ensembl is None: print '\nError: no Ensembl release specified. Use option -h to get help!\n' quit() try: options.ensembl = int(options.ensembl) except: print '\nError: Ensembl release specified is not an integer. Use option -h to get help!\n' quit() if options.output is None: print '\nError: no output file name specified. Use option -h to get help!\n' quit() # Must use Ensembl release >= 70 or v65 if not (options.ensembl >= 70 or options.ensembl == 65): print '\nError: This version works with Ensembl v65 or >= v70.\n' quit() # Genome build genome_build = 'GRCh37' if options.ensembl <= 75 else 'GRCh38' # Print info print 'Ensembl version: ' + str(options.ensembl) print 'Reference genome: ' + genome_build # Creating compressed output file Nretrieved = helper.process_data(options, genome_build) print '\nA total of ' + str(Nretrieved) + ' transcripts have been included\n' # Indexing output file with Tabix helper.indexFile(options) # Removing uncompressed output file os.remove(options.output) # Printing out summary information print '' print '---------------------' print 'Output files created:' print '---------------------' print options.output + '.gz (transcript database)' print options.output + '.gz.tbi (index file)' print options.output + '.txt (list of transcripts)'
def main(): filename = rd[1] # no of features to represent each "thing" # more is better, but more expensive n = int(rd[2]) learning_rate = float(rd[3]) # regularized try: regular = float(rd[4]) except: # regular = False regular = config.regular dataset = np.matrix(helper.process_data(filename)) # this contains the i,j and value of the filled elements global fill_array fill_array = getFilled(dataset) n_m, n_u = dataset.shape # learn the defining features as well as preferences for users final_xi_s, final_thetas = grad_descent(n_m, n_u, n, learning_rate, regular) print("FINAL THETAS", final_thetas, "FINAL XIS", final_xi_s, sep = "\n") # fill in the approx blanks now that params have been learnt new_dataset = fill_data(dataset, final_xi_s, final_thetas)
def recall(labels, pred): recall = recall_score(labels, pred) return recall def extract_weighted_columns(data): data = data[:, [1, 2, 4, 6, 8, 9, 10, 15, 17, 18, 21, 25, 27, 28, 29]] return data if __name__ == "__main__": training_data = genfromtxt('training.csv', dtype=str, delimiter=',') testing_data = genfromtxt('testing.csv', dtype=str, delimiter=',') train_data, train_labels, train_weights = helper.process_data( training_data) test_data, test_labels, test_weights = helper.process_data(testing_data) train_data_weighted = helper.normalize_data( helper.replace_missing_values(extract_weighted_columns(train_data))) test_data_weighted = helper.normalize_data( helper.replace_missing_values(extract_weighted_columns(test_data))) models.run_lr(train_data_weighted, train_labels, test_data_weighted, test_labels, test_weights) models.run_gnb(train_data_weighted, train_labels, test_data_weighted, test_labels, test_weights) models.run_gradient_boosting(train_data_weighted, train_labels, test_data_weighted, test_labels, test_weights) models.run_decision_tree(train_data_weighted, train_labels, test_data_weighted, test_labels, test_weights)
import nltk from helper import process_data train = process_data('train') x = train['question'][0]
parser.add_argument("-e" "--epochs", dest='epochs', action="store", type=int, default=5, help="number of epochs for training, default 5") parser.add_argument("-d" "--device", dest='device', action="store", type=str, default="cuda", help="device for training,default cuda") args = parser.parse_args() train_datasets, trainloader, validloader, testloader = process_data( args.data_dir) model = build_model(args.arch, args.hidden_units, args.output_units) running_losses, running_valid_losses, trained_model = train_model( args.data_dir, model, args.learning_rate, args.epochs, args.device) test_data(trained_model, args.data_dir, args.device) trained_model.class_to_idx = train_datasets.class_to_idx #device = torch.to("cuda" if torch.cuda.is_available() and args.device == 'cuda' else "cpu") #trained_model.to(device) torch.save( { 'arch': args.arch, 'hidden_units': args.hidden_units, 'output_units': args.output_units, 'state_dict': trained_model.state_dict(), 'class_to_idx': trained_model.class_to_idx
def train_model(data_dir, model, learning_rate, epochs, device): device = torch.device( "cuda" if torch.cuda.is_available() and device == 'cuda' else "cpu") model.to(device) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) epochs = epochs train_datasets, trainloader, validloader, testloader = process_data( data_dir) running_losses, running_valid_losses = [], [] for e in range(epochs): running_loss = 0 corrects = 0 for images, labels in trainloader: images, labels = images.to(device), labels.to( device) #Make sure that the training step is running on gpu #clean up accumulated gradients before training the new batch optimizer.zero_grad() #Forward and backward pass log_ps = model.forward(images) loss = criterion(log_ps, labels) loss.backward() optimizer.step() running_loss += loss.item( ) #add loss of the batch to the running loss #use the validation datset to compare train and validation loss else: running_valid_loss = 0 running_accuracy = 0 with torch.no_grad(): model.eval() #set model to evaluation mode to stop dropout for images, labels in validloader: images, labels = images.to(device), labels.to(device) log_ps = model.forward(images) valid_loss = criterion(log_ps, labels) running_valid_loss += valid_loss.item() ps = torch.exp(log_ps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy = torch.mean(equals.type(torch.FloatTensor)) running_accuracy += accuracy.item() model.train() print( "Epoch:", e + 1, "Training loss:{:.3f}..".format(running_loss / len(trainloader)), "Validation loss:{:.3f}..".format(running_valid_loss / len(validloader)), #"Running Accuracy:{:.3f}..".format(running_accuracy), "Validation accuracy:{:.3f}%..".format(running_accuracy * 100 / len(validloader))) running_losses.append(running_loss / len(trainloader)) running_valid_losses.append(running_valid_loss / len(validloader)) print('Train Losses:', running_losses, 'Validation Losses:', running_valid_losses) return running_losses, running_valid_losses, model