def __main__(): train = data.Dataset('train_data.txt') dev = data.Dataset('dev_data.txt') accs = [] for i in range(5): print("Seed", i) classifier, acc = models.train_classifier(train, dev, seed=i) accs.append(acc) reporter.report_results(classifier, dev) print()
def train(config_path): """ Trains a model Args: config_path: string, path to a config.json file """ # Load configuration if not os.path.exists(config_path): print('Error: No configuration file present at specified path.') return config = util.load_config(config_path) print('Loaded configuration from: %s' % config_path) # Create session directory if 'session_dir' not in config['training'] or os.path.exists( config['training']['session_dir']): create_new_session(config) model = sfun.SFUN(config) dataset = data.Dataset(config).load_dataset() train_set_generator = dataset.get_random_batch_generator('train') val_set_generator = dataset.get_random_batch_generator('val') model.fit_model(train_set_generator, config['training']['num_steps_train'], val_set_generator, config['training']['num_steps_val'], config['training']['num_epochs'])
def bn_update(tf_config, logger): dataset = data.Dataset(cfg.DATASET, cfg.RNG_SEED) cfg.MODEL.BN_MOMENTUM = 0. assert cfg.MODEL.BN_MOMENTUM == 0., 'BN_MOMENTUM should be 0. for update step' imgs, _ = dataset.preprocessing(training=True, augment=False, batch_size=dataset.train_num, num_epochs=1) net, _ = model.unet(imgs, bn_training=True, dropout_training=False, dataset=cfg.DATASET) with tf.variable_scope('cls'): _ = tf.layers.conv2d(net, 1, 1, activation=tf.nn.relu) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) saver = tf.train.Saver(max_to_keep=1000) with tf.Session(config=tf_config) as sess: weights_path = tf.train.latest_checkpoint(cfg.OUTPUT_DIR) logger.info('Restoring weights from {}'.format(weights_path)) saver.restore(sess, weights_path) sess.run(update_ops) weights_path = saver.save(sess, os.path.join(cfg.OUTPUT_DIR, 'bn-model'), global_step=int(weights_path.split('-')[-1])) logger.info('Updating weights to {}'.format(weights_path)) tf.reset_default_graph()
def __init__(self, train=True, common_params=None, solver_params=None, net_params=None, dataset_params=None): self.device_id = int(common_params['gpus']) # 0 self.image_size = int(common_params['image_size']) #256 self.batch_size = int(common_params['batch_size']) self.num_gpus = 1 self.learning_rate = float(solver_params['learning_rate']) self.moment = float(solver_params['moment']) #? self.max_steps = int(solver_params['max_iterators']) self.train_dir = str(solver_params['train_dir']) # don't know self.lr_decay = float(solver_params['lr_decay']) self.decay_steps = int(solver_params['decay_steps']) self.train = train # ? self.cnn = cnn.Model(train=train, common_params=common_params, net_params=net_params) self.dataset = data.Dataset(common_params=common_params, dataset_params=dataset_params)
def initialize_dataset(): d = data.Dataset("Test", data.Conditions(), sequence="MEGAMAN") d.create_peptide("MEGA", start_residue=1) d.create_peptide("MEGAMA", start_residue=1) d.create_peptide("GAMAN", start_residue=3) d.create_peptide("AMAN", start_residue=4) return d
def evaluate_policy_docs(): opt = make_options() dataset = data.Dataset() feeder = data.Feeder(dataset) model, _ = models.load_or_create_models(opt, False) translator = Translator(model, opt.beam_size, opt.min_length, opt.max_length) docs = data.load_policy_documents() for doc in docs: data.parse_paragraphs(doc) lines = [] for doc in docs: paras = [p for p in doc.paragraphs if 50 <= len(p) <= 400] if not paras: continue lines.append('=================================') lines.append(doc.title) if len(paras) > 16: paras = random.sample(paras, 16) paras = sorted(paras, key=lambda x: -len(x)) pids = [feeder.sent_to_ids(p) for p in paras] pids = data.align2d(pids) src = nu.tensor(pids) lengths = (src != data.NULL_ID).sum(-1) tgt = translator.translate(src.transpose(0, 1), lengths, opt.best_k_questions) questions = [[feeder.ids_to_sent(t) for t in qs] for qs in tgt] for p, qs in zip(paras, questions): lines.append('--------------------------------') lines.append(p) for k, q in enumerate(qs): lines.append('predict {}: {}'.format(k, q)) utils.write_all_lines(opt.output_file, lines)
def evaluate(): opt = make_options() dataset = data.Dataset() model, _ = models.load_or_create_models(opt, False) evaluate_accuracy(model, dataset, opt.batch_size, opt.beam_size, opt.min_length, opt.max_length, opt.best_k_questions, None, opt.output_file)
def _init(self): args = self._args os.environ["CUDA_VISIBLE_DEVICES"] = str(args.device) self._device = f'cuda:{args.device}' if torch.cuda.is_available( ) else "cpu" self._aug = utils.Augmentation(float(args.aug_params[0]), float(args.aug_params[1]), float(args.aug_params[2]), float(args.aug_params[3])) self._dataset = data.Dataset(root=args.root, name=args.name, num_parts=args.init_parts, final_parts=args.final_parts, augumentation=self._aug) self._loader = DataLoader( dataset=self._dataset) # [self._dataset.data] print(f"Data: {self._dataset.data}") hidden_layers = [int(l) for l in args.layers] layers = [self._dataset.data.x1.shape[1]] + hidden_layers self._model = models.BGRL(layer_config=layers, pred_hid=args.pred_hid, dropout=args.dropout, epochs=args.epochs).to(self._device) print(self._model) self._optimizer = torch.optim.Adam(params=self._model.parameters(), lr=args.lr, weight_decay=1e-5)
def main(opt): dataset = data.Dataset(dataset=opt.dataset, pool_size=opt.pool_size, sample_size=opt.sample_size) dataset.show_inf() feature_size, att_size = dataset.feature_size, dataset.att_size discriminator = model.Discriminator(feature_size, att_size).cuda() generator = model.Generator(feature_size, att_size).cuda() for epoch in range(opt.epochs): # d_loss = train.train_disciminator(discriminator, generator, dataset, opt.lr, opt.batch_size, epoch) # g_loss = train.train_generator(discriminator, generator, dataset, opt.lr, opt.batch_size, epoch) d_loss, g_loss = train.train_together(discriminator, generator, dataset, opt.lr, opt.batch_size, epoch) D_zsl_acc = test.compute_acc(discriminator, dataset, opt1='zsl', opt2='test_unseen') D_seen_acc = test.compute_acc(discriminator, dataset, opt1='gzsl', opt2='test_seen') D_unseen_acc = test.compute_acc(discriminator, dataset, opt1='gzsl', opt2='test_unseen') D_harmonic_mean = (2 * D_seen_acc * D_unseen_acc) / (D_seen_acc + D_unseen_acc) print("Epoch {}/{}...".format(epoch + 1, opt.epochs)) print("D_Loss: {:.4f}".format(d_loss), "zsl_acc: {:.4f}".format(D_zsl_acc), "seen_acc: {:.4f}".format(D_seen_acc), "unseen_acc: {:.4f}".format(D_unseen_acc), "harmonic_mean: {:.4f}".format(D_harmonic_mean) ) G_zsl_acc = test.compute_acc(generator, dataset, opt1='zsl', opt2='test_unseen') G_seen_acc = test.compute_acc(generator, dataset, opt1='gzsl', opt2='test_seen') G_unseen_acc = test.compute_acc(generator, dataset, opt1='gzsl', opt2='test_unseen') G_harmonic_mean = (2 * G_seen_acc * G_unseen_acc) / (G_seen_acc + G_unseen_acc) print("G_Loss: {:.4f}".format(g_loss), "zsl_acc: {:.4f}".format(G_zsl_acc), "seen_acc: {:.4f}".format(G_seen_acc), "unseen_acc: {:.4f}".format(G_unseen_acc), "harmonic_mean: {:.4f}".format(G_harmonic_mean) )
def parsePlinkBfile(prefix, noPheno=False, params=defaultParams, options=None): fam = prefix + '.fam' bim = prefix + '.bim' bed = prefix + '.bed' ## individual data idata = parseFamFile(fam) ## options not implemented ni = len(idata) ### SNP data sdata = parseBimFile(bim) if options and options.chr: mySnpIdx = _get_snpIdx(sdata['map'], options.chr, options.posleft, options.posright, options.other_map) ns = len(mySnpIdx) else: ns = len(sdata['snps']) mySnpIdx = range(ns) dataset = data.Dataset(prefix, nsnp=ns, nindiv=ni) ## fill in SNP data for s in [sdata['snps'][i] for i in mySnpIdx]: dataset.addSnp(s.name) dataset.snp[s.name].initAlleles(s.alleles[0], s.alleles[1]) ## fill in indiv data for ind in idata: dataset.addIndividual(pop=ind[0], ID=ind[1], fatherID=ind[2], motherID=ind[3], sex=ind[4], phenotype=ind[5]) fillBedData(bed, dataset.Data, mySnpIdx) return {'dataset': dataset, 'map': sdata['map']}
def test(self): test_data = data.Dataset(self.config['test_data_file'], shuffle=False) test_graph = tf.Graph() with tf.Session(graph=test_graph) as sess: model = models.RumourDetectModel( embed_dim=self.config['embed_dim'], vocab_size=self.config['vocab_size'], sent_hidden_dims=self.config['sent_hidden_dims'], branch_hidden_dims=self.config['branch_hidden_dims'], sdqc_attn_dim=self.config['sdqc_attn_dim'], veracity_attn_dim=self.config['veracity_attn_dim'], sdqc_hidden_dim=self.config['sdqc_hidden_dim'], veracity_hidden_dim=self.config['veracity_hidden_dim'], embed_pret_file=self.config['embed_pret_file'], dicts_file=self.config['dicts_file'], keep_prob=1.0, reuse=None) model(is_train=False) sess.run(tf.global_variables_initializer()) if self.config['embed_pret_file']: model.embedder.init_pretrained_emb(sess) saver = tf.train.Saver(max_to_keep=self.config['max_ckpts']) ckpt_dir = os.path.dirname(self.config['ckpt']) ckpt = tf.train.latest_checkpoint(ckpt_dir) saver.restore(sess, ckpt) utils.print_log('Testing ...') batch_num = int( math.floor(len(test_data.records) / self.config['batch_size'])) sdqc_corr, sdqc_total, veracity_corr, veracity_total = 0, 0, 0, 0 for _ in range(batch_num): X, X_pret, Y_sdqc, Y_veracity, sent_length, branch_length = \ test_data.get_next(self.config['batch_size']) c1, t1, c2, t2 = sess.run( [ model.sdqc_correct_count, model.sdqc_total_count, model.veracity_correct_count, model.veracity_total_count ], feed_dict={ model.word_ids: X, model.word_ids_pret: X_pret, model.sdqc_labels: Y_sdqc, model.veracity_labels: Y_veracity, model.sent_length: sent_length, model.branch_length: branch_length, }) sdqc_corr += c1 sdqc_total += t1 veracity_corr += c2 veracity_total += t2 utils.print_log( 'SDQC Task Acc = {}, Veracity Task Acc = {}'.format( float(sdqc_corr) / sdqc_total, float(veracity_corr) / veracity_total))
def create_testing(): """create testing dataset""" dataset = [] """ # for oxford floswer 17 labels = [0] img_dir = "jpg" with open('labels.txt','rb') as f: for line in f: labels.append(int(line.strip())-1) for img in os.listdir(img_dir): if img.find('.jpg') ==-1: #.txt file continue index = int(img.replace('image_','').replace('.jpg','')) dataset.append(data.ImageClassData( cv2.imread(osp.join(img_dir, img), cv2.IMREAD_COLOR), labels[index], img)) sys.stdout.write("\r{:7d}".format(len(dataset))) sys.stdout.flush() """ # for cifar10 print('Loading cifar 10 testing data...') batch_dic = unpickle('cifar-10-batches-py/test_batch') for index in xrange(len(batch_dic['data'])): dataset.append(data.ImageClassData(batch_dic['data'][index], batch_dic['labels'][index])) sys.stdout.write("\r{:7d}".format(len(dataset))) sys.stdout.flush() return data.Dataset(dataset)
def exhaustive_search(model, full_dataset, country_code): global fig plt.figure(fig) data.set_full_dataset(full_dataset) own_model = data.Dataset(country_code).to_model() own_model.set_pca(4) own_model.set_epochs(300) own_model.train() plt.subplot(3, 2, 1) plt.title(country_code + ' - all data') _search(model, country_code, plot_revenue=True, with_index=True) plt.subplot(3, 2, 2) plt.title(country_code + ' - all data') _search(model, country_code, with_index=True) plt.subplot(3, 2, 3) own_model.plot_history(False) plt.subplot(3, 2, 5) plt.title(country_code + ' - own data') _search(own_model, country_code, plot_revenue=True) plt.subplot(3, 2, 6) plt.title(country_code + ' - own data') _search(own_model, country_code) fig += 1
def process(code, div): import openface import openface.helper import dlib from openface.alignment import NaiveDlib # Depends on dlib. code = int(code) div = int(div) dlibModelDir = os.path.join(fileDir, "./openface/models/dlib") dlibFaceMean = os.path.join(dlibModelDir, "mean.csv") dlibFacePredictor = os.path.join(dlibModelDir, "shape_predictor_68_face_landmarks.dat") align = NaiveDlib(dlibFaceMean, dlibFacePredictor) dataset = data.Dataset() last = time.time() count = 0 for model, key, img in dataset.get_images(BUCKET_NAME): if hash(key) % div == code: bb = align.getLargestFaceBoundingBox(img) aligned = align.alignImg("affine", 224, img, bb) # print time.time() - last last = time.time() count += 1 if not aligned is None: # print model,key,img.shape,bb,aligned.shape cv2.imwrite( "output/face_{}".format( key.replace('/', '_').replace('models', '')), aligned) # cv2.imshow("test",aligned) # cv2.waitKey(0) # cv2.destroyAllWindows() # break if count % 20 == 0 and code == 0: local( 'aws s3 mv output/ s3://aub3data/output/ --recursive --storage-class "REDUCED_REDUNDANCY" --region "us-east-1"' )
def _init(self): args = self._args self._device = torch.device( utils.get_device_id(torch.cuda.is_available())) self._aug = utils.Augmentations(method=args.aug) self._dataset = data.Dataset(root=args.root, name=args.name, num_parts=args.init_parts, final_parts=args.final_parts, augumentation=self._aug) self._loader = DataLoader( dataset=self._dataset) # [self._dataset.data] print(f"Data Augmentation method {args.aug}") print(f"Data: {self._dataset.data}") hidden_layers = [int(l) for l in args.layers] layers = [self._dataset.data.x.shape[1]] + hidden_layers self._norm_config = utils.get_norm_configs(args.norms) self._model = models.SelfGNN(layer_config=layers, dropout=args.dropout, gnn_type=args.model, heads=args.heads, **self._norm_config).to(self._device) print(self._model) self._optimizer = torch.optim.Adam(params=self._model.parameters(), lr=args.lr)
def main(opt): dataset = data.Dataset(dataset=opt.dataset, pool_size=opt.pool_size, sample_size=opt.sample_size) dataset.show_inf() feature_size, att_size = dataset.feature_size, dataset.att_size discriminator = model.Discriminator(feature_size, att_size, opt.t1).cuda() generator = model.Generator(feature_size, att_size, opt.t2).cuda() train2.train(discriminator, generator, dataset, d_lr=opt.d_lr, g_lr=opt.g_lr,\ batch_size=opt.batch_size, alpha=opt.alpha, epochs=opt.epochs)
def build_translator(): opt = make_options() dataset = data.Dataset() feeder = data.Feeder(dataset) model, _ = models.load_or_create_models(opt, False) translator = Translator(model, opt.beam_size, opt.min_length, opt.max_length) return translator, feeder
def calculateDataLoaderTrain(args_dict): # Augmentation train_transformation = transforms.Compose([ transforms.Resize( 256), # rescale the image keeping the original aspect ratio transforms.CenterCrop(256), # we get only the center of that rescaled transforms.RandomCrop( 224), # random crop within the center crop (data augmentation) transforms.ColorJitter(brightness=(0.9, 1.1)), transforms.RandomRotation((-10, 10)), transforms.RandomHorizontalFlip(), transforms.RandomAffine(0, translate=(0.1, 0.1), shear=10, scale=(0.85, 1.15), fillcolor=0), # TransformShow(), # visualize transformed pic transforms.ToTensor(), ]) # Dataloaders for training and validation # preprocess the given txt files: Train datasets_train, _, labels, labels_non, labels_cov = data.preprocessSplit( args_dict.train_txt) # create Datasets train_non_covid = data.Dataset(datasets_train[0], labels_non, args_dict.train_folder, transform=train_transformation) train_covid = data.Dataset(datasets_train[1], labels_cov, args_dict.train_folder, transform=train_transformation) covid_size = max(int(args_dict.batch * args_dict.covid_percent), 1) # create data loader dl_non_covid = DataLoader(train_non_covid, batch_size=(args_dict.batch - covid_size), shuffle=True) # num_workers= 2 dl_covid = DataLoader(train_covid, batch_size=covid_size, shuffle=True) # num_workers= 2 return dl_non_covid, dl_covid
def main(): parser = argparse.ArgumentParser(description='Yelp Rating Interpretation') parser.add_argument('--n-estimators', type=int, default=100) parser.add_argument('--criterion', type=str, default='gini', choices=['gini', 'entropy']) parser.add_argument('--max-depth', type=int, default=20) parser.add_argument('--seed', type=int, default=23) parser.add_argument('--top-n-features', type=int) parser.add_argument('--train-datafile', type=str, default='data/train.csv') parser.add_argument('--test-datafile', type=str, default='data/test.csv') parser.add_argument('--model-path', type=str, default='models/model.pkl') parser.add_argument('--fig-path', type=str, default='figure/importance.png') args = parser.parse_args() model = models.RatingInterpreter(n_estimators=args.n_estimators, criterion=args.criterion, max_depth=args.max_depth, seed=args.seed, top_n_features=args.top_n_features) # if os.path.exists(args.model_path): # model.load(args.model_path) # else: train_dataset = data.Dataset(args.train_datafile) test_dataset = data.Dataset(args.test_datafile) # acc, rmse = model.train(train_dataset, test_dataset) acc = model.train(train_dataset, test_dataset) model.save(args.model_path) importances, std = model.get_importance() # visualize.display(importances, std, acc, rmse, args.fig_path, # top_n_features=args.top_n_features) visualize.display(importances, std, acc, args.fig_path, top_n_features=args.top_n_features)
def build_train_model(opt, dataset=None): dataset = dataset or data.Dataset(opt) model = build_model(opt, dataset) feeder = data.TrainFeeder(dataset, opt.batch_size, opt.char_limit) optimizer = torch.optim.Adam( [p for p in model.parameters() if p.requires_grad], lr=opt.learning_rate) feeder.prepare('train') return model, optimizer, feeder
def build_train_models(opt): dataset = data.Dataset() generator = build_model(opt, dataset.vocab_size) discriminator = build_discriminator(opt) feeder = data.TrainFeeder(dataset) g_optimizer = torch.optim.Adam(generator.parameters(), lr=opt.learning_rate) d_optimizer = torch.optim.Adam(discriminator.parameters(), lr=opt.learning_rate) feeder.prepare('train', opt.batch_size) return generator, discriminator, g_optimizer, d_optimizer, feeder
def main(): """ Run training and export summaries to data_dir/logs for a single test setup and a single set of parameters. Summaries include a) TensorBoard summaries, b) the latest train/test accuracies and raw edit distances (status.txt), c) the latest test predictions along with test ground-truth labels (test_label_seqs.pkl, test_prediction_seqs.pkl), d) visualizations as training progresses (test_visualizations_######.png).""" args = define_and_process_args() print('\n', 'ARGUMENTS', '\n\n', args, '\n') log_dir = get_log_dir(args) print('\n', 'LOG DIRECTORY', '\n\n', log_dir, '\n') standardized_data_path = os.path.join(args.data_dir, args.data_filename) if not os.path.exists(standardized_data_path): message = '%s does not exist.' % standardized_data_path raise ValueError(message) dataset = data.Dataset(standardized_data_path) train_raw_seqs, test_raw_seqs = dataset.get_splits(args.test_users) train_triplets = [data.prepare_raw_seq(seq) for seq in train_raw_seqs] test_triplets = [data.prepare_raw_seq(seq) for seq in test_raw_seqs] train_input_seqs, train_reset_seqs, train_label_seqs = zip(*train_triplets) test_input_seqs, test_reset_seqs, test_label_seqs = zip(*test_triplets) Model = eval('models.' + args.model_type + 'Model') input_size = dataset.input_size target_size = dataset.num_classes # This is just to satisfy a low-CPU requirement on our cluster # when using GPUs. if 'CUDA_VISIBLE_DEVICES' in os.environ: config = tf.ConfigProto(intra_op_parallelism_threads=2, inter_op_parallelism_threads=2) else: config = None with tf.Session(config=config) as sess: model = Model(input_size, target_size, args.num_layers, args.hidden_layer_size, args.init_scale, args.dropout_keep_prob) optimizer = optimizers.Optimizer(model.loss, args.num_train_sweeps, args.initial_learning_rate, args.num_initial_sweeps, args.num_sweeps_per_decay, args.decay_factor, args.max_global_grad_norm) train(sess, model, optimizer, log_dir, args.batch_size, args.num_sweeps_per_summary, args.num_sweeps_per_save, train_input_seqs, train_reset_seqs, train_label_seqs, test_input_seqs, test_reset_seqs, test_label_seqs)
def get_detector_datasets(self): train_token_input, train_predections_detector, _, \ val_token_input, val_predections_detector, _ = self.get_hot_flip_data() detector_dataset = data.Dataset(train_seq=train_token_input, train_lbl=train_predections_detector, val_seq=val_token_input, val_lbl=val_predections_detector, test_seq=None, test_lbl=None) return detector_dataset
def parseSyncFile(fileName, populations, popNames=None): print "Reading", fileName ## open reader reader = sync.SyncReader(fileName) nsnp = reader.countSnps() print "Found", nsnp, "in file" ## get population names if not provided if popNames is None: popNames = ["pop" + str(i) for i in xrange(1, len(populations) + 1)] elif len(populations) != len(popNames): popNames = [popNames[x - 1] for x in populations] ## map object myMap = data.Map() ## create empty dataset dataset = data.Dataset(fileName, nsnp=nsnp, nindiv=len(popNames)) for pop in popNames: dataset.addIndividual(pop, pop=pop) sync_bases = ['A', 'T', 'C', 'G'] frqs = [] # for each record for record in reader: totalCounts = np.zeros(shape=(len(populations), len(sync_bases)), dtype='int16') i = 0 for pop in record.subpopulations(populations): popCounts = [] for base in sync_bases: popCounts.append(pop.countForAllele(base)) totalCounts[i, ] = popCounts i += 1 # two major alleles indexes indexes = np.argpartition(sum(totalCounts), 1)[len(sync_bases) - 2:] ## get biallelic biallelic = totalCounts[:, indexes] ## get the alleles alleles = [sync_bases[x] for x in indexes] alleleFreqs = biallelic.astype(float) / np.sum(biallelic, 1)[:, None] frqs.append(alleleFreqs[:, 0]) # get the name for the SNP name = record.chr + "_" + str(record.pos) ## add the marker to the map and to the dataset myMap.addMarker(M=name, C=record.chr, posG=record.pos, posP=record.pos) snp = dataset.addSnp(name) snp.initAlleles(alleles[0], alleles[1]) ## return all including freqs return { 'dataset': dataset, 'map': myMap, 'freqs': np.transpose(np.vstack(frqs)), 'pops': popNames }
def get_char_selector_datasets(self): train_token_input, _, train_predections_char_selector, \ val_token_input, _, val_predections_char_selector = self.get_hot_flip_data() char_selector_dataset = data.Dataset( train_seq=train_token_input, train_lbl=train_predections_char_selector, val_seq=val_token_input, val_lbl=val_predections_char_selector, test_seq=None, test_lbl=None) return char_selector_dataset
def prepare_dataloaders(word2idx,ints,en1_pos,en2_pos,predicates): # ========= Preparing DataLoader =========# train_loader = torch.utils.data.DataLoader( data.Dataset( word2idx=word2idx, insts=ints, en1_pos=en1_pos, en2_pos=en2_pos, predicates=predicates), batch_size=128, collate_fn=collate_fn, shuffle=True) valid_loader = torch.utils.data.DataLoader( data.Dataset( word2idx=word2idx, insts=ints, en1_pos=en1_pos, en2_pos=en2_pos, predicates=predicates), batch_size=128, collate_fn=collate_fn) return train_loader, valid_loader
def create_training(): """create training dataset""" dataset = [] """ # for oxford floswer 17 labels = [0] img_dir = "jpg" with open('labels.txt','rb') as f: for line in f: labels.append(int(line.strip())-1) for img in os.listdir(img_dir): if img.find('.jpg') ==-1: #.txt file continue index = int(img.replace('image_','').replace('.jpg','')) dataset.append(data.ImageClassData(cv2.imread(osp.join(img_dir, img), cv2.IMREAD_COLOR), labels[index], img)) sys.stdout.write("\r{:7d}".format(len(dataset))) sys.stdout.flush() """ # for cifar10 print('Loading cifar 10 training data...') for batch_index in xrange(1, 6): batch_dic = unpickle('cifar-10-batches-py/data_batch_{}'.format(batch_index)) for index in xrange(len(batch_dic['data'])): dataset.append(data.ImageClassData(batch_dic['data'][index], batch_dic['labels'][index])) sys.stdout.write("\r{:7d}".format(len(dataset))) sys.stdout.flush() val_ratio = 1./5 # split for validation split_index = int(len(dataset) *val_ratio) random.shuffle(dataset) return data.Dataset(dataset[split_index:]), data.Dataset(dataset[:split_index])
def initialize_system_and_dataset(): sequence = "MEGAMAN" sys = system.System() mol = sys.add_macromolecule(sequence, "test_molecule") d = data.Dataset("Test", data.Conditions(), sequence=sequence) d.create_peptide("MEGA", start_residue=1) d.create_peptide("MEGAMA", start_residue=1) d.create_peptide("GAMAN", start_residue=3) d.create_peptide("AMAN", start_residue=4) for pep in d.get_peptides(): pep.add_timepoints([10, 100, 1000]) mol.get_state(0).add_dataset(d) return mol.get_state(0)
def main(args): dataset = data.Dataset(args.data_dir) criterion = nn.CrossEntropyLoss() net = model.Net(data.VOCAB_SIZE) if os.path.isfile(args.model_filename): checkpoint = (torch.load(args.model_filename) if (torch.cuda.is_available()) else torch.load( filename, map_location=lambda storage, loc: storage)) net.load_state_dict(checkpoint['state_dict']) if (torch.cuda.is_available()): net.cuda() print('\nRunning test') epoch_end = False total_loss = [] test_step = 0 num_correct = 0 test_size = len(dataset.dataset['test']) while epoch_end == False: test_step += 1 minibatch, epoch_end = dataset.get_next_minibatch('test', 1) batch_tensor = Variable(minibatch[0]) labels_tensor = Variable(minibatch[1]) if (torch.cuda.is_available()): batch_tensor = batch_tensor.cuda() labels_tensor = labels_tensor.cuda() output = net.forward(batch_tensor) label_index = torch.max(labels_tensor, 1)[1] output_index = torch.max(output, 1)[1] num_correct += (label_index == output_index).sum().type( torch.LongTensor) loss = criterion(output, label_index) total_loss.append(loss.data) sys.stdout.write('Test step %i/%i\r' % (test_step, test_size)) sys.stdout.flush() total_loss = float(sum(total_loss)[0]) / float(len(total_loss)) print('\nTest loss: %f' % total_loss) print('\nAccuracy: %f%%' % ((float(num_correct) / float(test_size)) * 100))
def load_dir(basedir, num_target, window_size): """Load dataset from every csv file in `basedir` Args: - `basedir`: path to dataset directory - `num_target`: number of class in the dataset - `window_size`: sliding window size Returns: A `Dataset` containing `data` and `target` """ filenames = get_filenames(basedir) features, target = data.get(filenames, num_target, window_size) return data.Dataset(features, target)