def main(_): assert FLAGS.train_dir, '`train_dir` is missing.' if FLAGS.pipeline_config_path: model_config, train_config, input_config = get_configs_from_pipeline_file( ) else: model_config, train_config, input_config = get_configs_from_multiple_files( ) model_fn = functools.partial(model_builder.build, model_config=model_config, is_training=True) create_input_dict_fn = functools.partial(input_reader_builder.build, input_config) env = json.loads(os.environ.get('TF_CONFIG', '{}')) cluster_data = env.get('cluster', None) cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None task_data = env.get('task', None) or {'type': 'master', 'index': 0} task_info = type('TaskSpec', (object, ), task_data) # Parameters for a single worker. ps_tasks = 0 worker_replicas = 1 worker_job_name = 'lonely_worker' task = 0 is_chief = True master = '' if cluster_data and 'worker' in cluster_data: # Number of total worker replicas include "worker"s and the "master". worker_replicas = len(cluster_data['worker']) + 1 if cluster_data and 'ps' in cluster_data: ps_tasks = len(cluster_data['ps']) if worker_replicas > 1 and ps_tasks < 1: raise ValueError( 'At least 1 ps task is needed for distributed training.') if worker_replicas >= 1 and ps_tasks > 0: # Set up distributed training. server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc', job_name=task_info.type, task_index=task_info.index) if task_info.type == 'ps': server.join() return worker_job_name = '%s/task:%d' % (task_info.type, task_info.index) task = task_info.index is_chief = (task_info.type == 'master') master = server.target trainer.train(create_input_dict_fn, model_fn, train_config, input_config, master, task, 1, worker_replicas, False, ps_tasks, worker_job_name, is_chief, FLAGS.train_dir, FLAGS.save_interval_secs, FLAGS.log_every_n_steps)
def main(): gc.collect() # Garbage collect # Check arguments if len(sys.argv) < 2: return showUsage() ui.clear() # Pre-process PDFs dirname = sys.argv[1] images, targets, plotType = pproc.processPDFs(dirname) # Extract feature vectors (until user quits) doneExtracting = False while not doneExtracting: data, ftType = ft.extract_features(images) # Create, train, and evaluate model (until user quits) doneTraining = False while not doneTraining: tr.train(data, targets, plotType, ftType) options = ["Try another model", "Extract new features", "Quit"] res = ui.prompt(options=options) if options[int(res)] == "Quit": doneTraining = True doneExtracting = True elif options[int(res)] == "Extract new features": doneTraining = True gc.collect() # Garbage collect
def policyIteration(self, start_round, rounds, episodes, iterations, dup): for i in range(start_round, start_round + rounds + 1): net = self.nnet self.mcts = MCTS(net, iterations) mcts = self.mcts print("ROUND") print(i) path = "Models/checkpoint" + "_" + str(i) + "_" + str(episodes) + "_" + str(mcts.iterations) + "_" + str(dup) + ".pth" print("model " + path + " saved") torch.save(net.state_dict(), path) state_dict = torch.load(path) net.load_state_dict(state_dict) if i >= rounds: return self.nnet for e in range(episodes): print(e) self.data += self.executeEpisode() # collect examples from this game print(len(self.data)) if dup: duplicate = [(encode_reverse(x[0]), x[1], x[2]) for x in self.data] self.data += duplicate datasets = np.array(self.data) optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.8, 0.999)) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50,100,150,200,250,300,400], gamma=0.77) train(net, datasets, optimizer, scheduler, 0, 0, 0) self.nnet = net self.data = [] return self.nnet
def main(): args = setup_parser().parse_args() param = load_json(args.config) args = vars(args) # Converting argparse Namespace to a dict. args.update(param) # Add parameters from json train(args)
def main(): process_URL_list('URL.txt', 'url_features.csv') # process_test_list("query.txt",'query_features.csv') tr.train( 'url_features.csv', 'url_features.csv' ) # arguments:(input_training feature,test/query traning features) tr.train('url_features.csv', 'query_features.csv')
def main(): args = arguments() num_templates = 25 # 29 # 25 # aka the number of clusters train_loader, weights_dir = get_dataloader(args.traindata, args, num_templates) model = DetectionModel(num_objects=1, num_templates=num_templates) loss_fn = DetectionCriterion(num_templates) optimizer = optim.SGD(model.learnable_parameters(args.lr), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) if args.resume: checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) # Set the start epoch if it has not been if not args.start_epoch: args.start_epoch = checkpoint['epoch'] scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, last_epoch=args.start_epoch-1) # train and evalute for `epochs` for epoch in range(args.start_epoch, args.epochs): scheduler.step() trainer.train(model, loss_fn, optimizer, train_loader, epoch, save_path=weights_dir)
def main(argv=None): writer = Writer(RESULTS_DIR) trainer = Trainer(RESULTS_DIR, 'train', writer) trainer.train(LEARNING_RATE, EVAL_FREQUENCY, init_step=None, restoring_file=RESTORING_FILE)
def train_skip_wandb(): # env = SimpleRandomGeometricNonEpisodicShapeEnv(side_length, max_steps, random_starting_pos=False) env = SimpleSequentialGeometricNonEpisodicShapeEnv(side_length, max_steps, random_starting_pos=False, random_missing_pixel=False, subtract_canvas=True) replace = 1000 lr = 0.001 gamma = 0.6 epsilon = 1 epsilon_min = 0 epsilon_dec = 1e-5 # epsilon_dec = 2.5e-6 # from 1 to 0 in 400000 steps mem_size = 1000000 batch_size = 32 # checkpoint_dir = config.checkpoint_dir n_states = env.num_states n_actions = env.num_actions - 2 n_hidden = 128 name = test_name + '/lr' + str(lr) + '_gamma' + str(gamma) + '_epsilon' + str( epsilon) + '_batch_size' + str(batch_size) + '_fc_size' + str(n_hidden) # agent = Agent(n_states, n_actions, n_hidden, lr, gamma, epsilon, epsilon_min, epsilon_dec, replace, mem_size, # batch_size, name, 'models/') # agent = Agent(n_states, n_actions, n_hidden, lr, gamma, epsilon, epsilon_min, epsilon_dec, replace, mem_size, # batch_size, name, 'models/') # agent = DuelingDDQNAgent(n_states, n_actions, n_hidden, lr, gamma, epsilon, epsilon_min, epsilon_dec, replace, mem_size, # batch_size, name, 'models/') agent = AgentDoubleOut(n_states, n_actions, n_hidden, lr, gamma, epsilon, epsilon_min, epsilon_dec, replace, mem_size, batch_size, name, 'models/') train(name, env, agent, n_train_games_to_avg=50, eval_games_freq=1000, n_eval_games=50, plots_path='plots/', max_steps=50)
def main(args): # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) log = set_log(args) # Create env env = make_env(log, args) # Set seeds env.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Initialize policy agent = set_policy(env, tb_writer, log, args, name=args.algorithm) if args.test: from tester import test test(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args) else: from trainer import train train(agent=agent, env=env, log=log, tb_writer=tb_writer, args=args)
def main(): # Setup logging logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) # Parse args args = parse_args() logger.info(vars(args)) # Setup CUDA, GPU training os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_id device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') args.device = device logger.info('Device is %s', args.device) # Set seed set_seed(args) tokenizer = BertTokenizer.from_pretrained(args.bert_model_dir) args.tokenizer = tokenizer # Load datasets and vocabs train_dataset, test_dataset, word_vocab, dep_tag_vocab, pos_tag_vocab = load_datasets_and_vocabs( args) model = Aspect_CS_GAT_BERT(args) model.to(args.device) # Train train(args, train_dataset, model, test_dataset, word_vocab)
def main(): """Main function to run model.""" config = get_config(os.environ) sys.path.append(os.path.join('tasks', config.task_folder)) # pylint: disable=import-error from trainer import train from tester import test # pylint: enable=import-error if config.is_distributed: torch.cuda.set_device(config.local_rank) torch.distributed.init_process_group(backend='nccl', init_method='env://') logger = setup_logger(config.work_dir, distributed_rank=get_rank()) logger.info(f'Using {config.num_gpus} GPUs.') logger.info(f'Collecting environment info:{get_env_info()}') logger.info(f'------------------------------') logger.info(f'Running configurations:') for key, val in config.__dict__.items(): logger.info(f' {key}: {val}') logger.info(f'------------------------------') if config.run_mode == 'train': train(config, logger) elif config.run_mode == 'test': test(config, logger)
def main(args): # Set logging if not os.path.exists("./log"): os.makedirs("./log") log = set_log(args) tb_writer = SummaryWriter('./log/tb_{0}'.format(args.log_name)) # Set seed set_seed(args.seed, cudnn=args.make_deterministic) # Set sampler sampler = BatchSampler(args, log) # Set policy policy = CaviaMLPPolicy( input_size=int(np.prod(sampler.observation_space.shape)), output_size=int(np.prod(sampler.action_space.shape)), hidden_sizes=(args.hidden_size, ) * args.num_layers, num_context_params=args.num_context_params, device=args.device) # Initialise baseline baseline = LinearFeatureBaseline( int(np.prod(sampler.observation_space.shape))) # Initialise meta-learner metalearner = MetaLearner(sampler, policy, baseline, args, tb_writer) # Begin train train(sampler, metalearner, args, log, tb_writer)
def main(): """ Main Function. """ # dataloader parameters data_path = 'data/kitti2012/training' receptive_size = 9 max_disp = 128 batch_size = 5 num_workers = 0 # training parameters learning_rate = 1e-2 max_epochs = 2 criterion = nn.CrossEntropyLoss() # create network net = SiameseNetwork() print(net) # create dataloader dataloaders, dataset_sizes = get_loaders(data_path, receptive_size, max_disp, batch_size, num_workers) # create optimizer p = net.parameters() optimizer = torch.optim.Adagrad(p, learning_rate) # train the network train(net, dataloaders, dataset_sizes, criterion, optimizer, max_epochs)
def train(self, train_x, train_y): input_dim = train_x.shape[1] self.model = MetricLearningModel(input_dim=input_dim, hidden_dim=self.hidden_dim, aplha=self.alpha, lambda1=self.lambda1, lambda2=self.lambda2, dropout_p=self.dropout, num_layers=self.num_layers) self.optimizer = Adam(self.model.parameters()) if self.cuda: self.model.cuda(device=0) self.dataset = DataSet(self.batch_size, train_x.shape[1]) for _x, _y in zip(train_x, train_y): if numpy.random.uniform() <= 0.1: self.dataset.add_data_entry(_x.tolist(), _y.item(), 'valid') else: self.dataset.add_data_entry(_x.tolist(), _y.item(), 'train') self.dataset.initialize_dataset(balance=self.balance, output_buffer=self.output_buffer) train(model=self.model, dataset=self.dataset, optimizer=self.optimizer, num_epochs=self.num_epoch, max_patience=self.max_patience, cuda_device=0 if self.cuda else -1, output_buffer=self.output_buffer) if self.output_buffer is not None: print('Training Complete', file=self.output_buffer)
def run(args, device): train_set, dev_set, test_set, train_labels, train_label_freq, input_indexer = prepare_datasets( args.data_setting, args.batch_size, args.max_len) logging.info(f'Taining labels are: {train_labels}\n') embed_weights = load_embedding_weights() label_desc = None # load_label_embedding(train_labels, input_indexer.index_of(constants.PAD_SYMBOL)) model = None for hyper_params in get_hyper_params_combinations(args): if args.model == 'Transformer': model = Transformer(embed_weights, args.embed_size, args.freeze_embed, args.max_len, args.num_trans_layers, args.num_attn_heads, args.trans_forward_expansion, train_set.get_code_count(), args.dropout_rate, device) elif args.model == 'TransICD': model = TransICD(embed_weights, args.embed_size, args.freeze_embed, args.max_len, args.num_trans_layers, args.num_attn_heads, args.trans_forward_expansion, train_set.get_code_count(), args.label_attn_expansion, args.dropout_rate, label_desc, device, train_label_freq) else: raise ValueError( "Unknown value for args.model. Pick Transformer or TransICD") if model: model.to(device) logging.info(f"Training with: {hyper_params}") train(model, train_set, dev_set, test_set, hyper_params, args.batch_size, device)
def cap_(self): global path3 f=open('write_data.txt','r') lines = f.readlines() id = len(lines) f.close() faceDetect = cv2.CascadeClassifier('haarcascade_frontalface_default.xml'); sampleNum=0 while (True): ret, img = cap.read() if len(img.shape) == 3 or len(img.shape) == 4: gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray=img faces = faceDetect.detectMultiScale(gray, 2.5, 5) x=0 y=0 w=0 h=0 for (x, y, w, h) in faces: sampleNum = sampleNum + 1; cv2.imwrite(path3+"/User." +str(id)+ "." + str(sampleNum) + ".jpg", gray[y:y + h, x:x + w]) cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2) height, width, bytesPerComponent = img.shape bytesPerLine = bytesPerComponent * width cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) self.image=QImage(img.data,width,height,bytesPerLine,QImage.Format_RGB888) self.label_camera.setPixmap(QPixmap.fromImage(self.image).scaled(self.label_camera.width(),self.label_camera.height())) cv2.waitKey(200); if (sampleNum > 100): QMessageBox.information(self,"成功","录入完毕") break cv2.destroyAllWindows() trainer.train(id)
def main(): args = parse_args() print('\nCalled with args:') for key in args: print(f"{key:<10}: {args[key]}") print("=" * 78) # get the command line args max_lr = args["max_lr"] min_lr = args["min_lr"] batch_size = args["batch_size"] num_epochs = args["epochs"] save_dir_path = args["save_dir_path"] if save_dir_path == "": save_dir_path = './model_checkpoints' # get the data print("\nLoading data now.", end=" ") x_train, y_train, x_test, y_test, y_train_cat, y_test_cat = get_cifar_data( ) training_data = [x_train, y_train, y_train_cat] validation_data = [x_test, y_test, y_test_cat] print("Data loading complete. \n") # pass the arguments to the trainer train(training_data=training_data, validation_data=validation_data, batch_size=batch_size, nb_epochs=num_epochs, min_lr=min_lr, max_lr=max_lr, save_dir_path=save_dir_path)
def main(args): # Create directories if not os.path.exists("./logs"): os.makedirs("./logs") if not os.path.exists("./pytorch_models"): os.makedirs("./pytorch_models") # Set logs log = set_log(args) tb_writer = SummaryWriter('./logs/tb_{0}'.format(args.log_name)) # Create env env = make_env(args) # Set seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) env.seed(args.seed) # Initialize agents agent1 = Agent(env, log, tb_writer, args, name="agent1", i_agent=1) agent2 = Agent(env, log, tb_writer, args, name="agent2", i_agent=2) # Start train train(agent1, agent2, env, log, tb_writer, args)
def main(config_file,mode,distributed): config = check_params(config_file) if mode in ["Train","train"]: train_dataset = Dataset(config["train_params"]["input_path"],config["train_params"]["imsize"]) if distributed: import horovod as hvd hvd.init() if hvd.rank()==0: writer = setup_tensorboard(get_params(config["train_params"],"tensorboard_location","./summary/")) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, num_replicas=hvd.size(), rank=hvd.rank()) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config["train_params"]["batch_size"], sampler=train_sampler, shuffle=True) model = ConvLSTM(**config["model_params"]) optimizer = hvd.DistributedOptimizer(model.optimizer, named_parameters=model.named_parameters()) hvd.broadcast_parameters(model.state_dict(), root_rank=0) train_distributed(model,train_loader,optimizer,config,writer) else: writer = setup_tensorboard(get_params(config["train_params"], "tensorboard_location", "./summary/")) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config["train_params"]["batch_size"], shuffle = True) model = ConvLSTM(**config["model_params"]) train(model,train_loader,model.optimizer,config,writer) elif mode in ["infer","Infer"]: model = ConvLSTM(**config["model_params"]) model.load_state_dict(config["infer_params"]["model_save_path"]) output_file = open(config["infer_params"]["output_path"])
def rnn_main(dataset): model = LanguageModel(dataset.vocab).to(_flags.device()) def sample(): return dataset.sample_train(aug_ratio=FLAGS.aug_ratio) def score_utts(utts): fake = [((), utt) for utt in utts] batch = make_batch(fake, model.vocab, staged=False) mean = model(None, batch.out_data, None, None).item() tot = mean * sum(len(utt) - 1 for utt in utts) return tot def callback(i_epoch): model.eval() final = i_epoch == FLAGS.n_epochs - 1 with hlog.task("eval_val", timer=False): val_acc = evaluate(score_utts, dataset.get_val(), dataset) if FLAGS.TEST and (final or FLAGS.test_curve): with hlog.task("eval_test", timer=False): evaluate(score_utts, dataset.get_test(), dataset) if (i_epoch + 1) % FLAGS.n_checkpoint == 0: torch.save( model.state_dict(), os.path.join(FLAGS.model_dir, "model.%05d.chk" % i_epoch)) return val_acc train(dataset, model, sample, callback, staged=False)
def main(): env_name = 'BreakoutNoFrameskip-v4' env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(env_name), episode_life=True, clip_rewards=True, frame_stack=True, scale=True) output_size = env.action_space.n input_shape = env.observation_space.shape with tf.Session() as sess: with tf.variable_scope('Breakout_lr'): input = tf.placeholder(tf.float32, [None, *input_shape]) model = PPO(sess, input, models.nature_cnn(input), actiontype.Discrete, output_size, learning_rate=lambda f: 2.5e-4 * (1 - f), epochs=4, minibatch_size=4, gamma=0.99, beta2=0.01, name='Breakout_lr') train(sess, model, env_name, 1e7, 256, log_interval=5, num_envs=16, atari=True) #run_only(sess, model, env, render=True) env.close()
def fit(args, model, device, optimizer, loss_fn, dataset, labels_list, task_id): # Dataloader train_loader = trainer.get_loader(mnist.getTrain(dataset), args, device, 'train') val_loader = trainer.get_loader(mnist.getVal(dataset), args, device, 'val') # Log Best Accuracy best_val_loss = 0 # Early Stopping early_stop = 0 # Training loop for epoch in range(1, args.epochs + 1): # Prepare model for current task model.set_task_id(labels_list[task_id]) trainer.train(args, model, device, train_loader, optimizer, epoch, loss_fn) val_loss, _ = trainer.test(args, model, device, val_loader, loss_fn, val=True) if val_loss > best_val_loss: best_val_loss = val_loss best_state = model.state_dict() early_stop = 0 else: early_stop += 1 if early_stop >= args.early_stop_after: break return best_state
def main(argv): torch.manual_seed(FLAGS.seed) np.random.seed(FLAGS.seed) hlog.flags() if not os.path.exists(FLAGS.model_dir): os.mkdir(FLAGS.model_dir) dataset = get_dataset() model = StagedModel(dataset.vocab, copy=True, self_attention=False).to(_flags.device()) #model = RetrievalModel( # dataset.vocab #) model.prepare(dataset) def callback(i_epoch): model.eval() evaluate(dataset, model) if (i_epoch + 1) % FLAGS.n_checkpoint == 0: torch.save( model.state_dict(), os.path.join(FLAGS.model_dir, "model.%05d.chk" % i_epoch)) train(dataset, model, dataset.sample_comp_train, callback, staged=True)
def main(_): if "absl.logging" in sys.modules: import absl.logging absl.logging.set_verbosity("info") absl.logging.set_stderrthreshold("info") config = FLAGS.config print(config) # Set the seed torch.manual_seed(config.seed) np.random.seed(config.seed) # Check if in the correct branch # group_name = config["model"][: config["model"].find("sa")] # if group_name not in ["z2", "mz2", "p4", "p4m"]: # raise ValueError( # "Mlp_encoding is required for rotations finer than 90 degrees. Please change to the mlp_encoding branch." # ) # initialize weight and bias os.environ["WANDB_API_KEY"] = "691777d26bb25439a75be52632da71d865d3a671" if not config.train: os.environ["WANDB_MODE"] = "dryrun" wandb.init( project="equivariant-attention", config=config, group=config["dataset"], entity="equivatt_team", ) # Define the device to be used and move model to that device config["device"] = ( "cuda:0" if (config.device == "cuda" and torch.cuda.is_available()) else "cpu" ) model = get_model(config) # Define transforms and create dataloaders dataloaders = dataset.get_dataset(config, num_workers=4) # Create model directory and instantiate config.path model_path(config) if config.pretrained: # Load model state dict model.module.load_state_dict(torch.load(config.path), strict=False) # Train the model if config.train: # Print arguments (Sanity check) print(config) print(datetime.datetime.now()) # Train the model trainer.train(model, dataloaders, config) # Test model tester.test(model, dataloaders["test"], config)
def process(conf): """ :param conf: Configurator :return: """ conf = dataset_creation(conf) train(conf)
def run(args): ms.context.set_context( mode=ms.context.GRAPH_MODE, device_target=args.device, save_graphs=False, ) net = LeNet5( num_class=10, num_channel=3, use_bn=args.use_bn, dbg_log_tensor=args.log_tensor, ) loss = ms.nn.loss.SoftmaxCrossEntropyWithLogits( sparse=True, reduction='mean', ) opt = build_optimizer(args, net) if args.mode == 'init': save_checkpoint( net, ckpt_file_name=os.path.join('seeds', '%d.ckpt' % (time.time())), ) if args.mode == 'train': ds_train = create_dataset( args=args, data_path=os.path.join(args.data_path, 'train'), batch_size=args.device_batch_size, ) if args.init_ckpt: print('using init checkpoint %s' % (args.init_ckpt)) load_ckpt(net, args.init_ckpt) train(args, net, loss, opt, ds_train) if args.mode == 'test': if args.use_kungfu: rank = kfops.kungfu_current_rank() if rank > 0: return ds_test = create_dataset( args=args, data_path=os.path.join(args.data_path, 'test'), batch_size=args.device_batch_size, ) if args.ckpt_files: checkpoints = args.ckpt_files.split(',') else: checkpoint_dir = get_ckpt_dir(args) print('checkpoint_dir: %s' % (checkpoint_dir)) checkpoints = list(sorted(glob.glob(checkpoint_dir + '/*.ckpt'))) print('will test %d checkpoints' % (len(checkpoints))) # for i, n in enumerate(checkpoints): # print('[%d]=%s' % (i, n)) test(args, net, loss, opt, ds_test, checkpoints)
def main(config): if config.task == 'train': config.train = 1 else: config.train = 0 if config.dataset == 'life': config.task = 'regression' config.experiment = 'train-test' else: config.task = 'classification' config.experiment = 'doublecv' config.expt_name = "Exp" + str( config.experiment ) + "_" + config.mod_split + "_" + config.build_model + "_" + config.last_layer # Create save directories utils.create_directories(config) data = load_dataset(config) if config.experiment == 'mar_doublecv' or config.experiment == 'doublecv': n_feature_sets = len(data.keys()) - 1 elif config.dataset == 'life': n_feature_sets = int(len(data.keys()) / 2) - 1 X = [np.array(data['{}'.format(i)]) for i in range(n_feature_sets)] y = np.array(data['y']) X_test = None y_test = None if config.task == 'classification': config.n_classes = len(set(y)) if config.dataset == 'life': X_test = [ np.array(data['{}_test'.format(i)]) for i in range(n_feature_sets) ] y_test = np.array(data['y_test']) config.n_feature_sets = n_feature_sets config.feature_split_lengths = [i.shape[1] for i in X] if config.verbose > 0: print('Dataset used ', config.dataset) print('Number of feature sets ', n_feature_sets) [ print('Shape of feature set {} {}'.format(e, np.array(i).shape)) for e, i in enumerate(X) ] trainer.train(X, y, config, X_test, y_test) print(config.expt_name) print(config.dataset)
def runner(args): config, name, port = args save_path = os.path.join(base_dir, name) os.makedirs(save_path, exist_ok=True) train(config, n_episodes=1000, save_path=save_path, base_port=port, name=name)
def clicke_(self): name = self.text_name.text() f = open('write_data.txt', 'r') lines = f.readlines() id = len(lines) f.close() id += 1 create.create(id, name) trainer.train()
def main(): args = setup_parser().parse_args() param = load_json(args.config) args = vars(args) # Converting argparse Namespace to a dict. args.update(param) # Add parameters from json '''@Author:defeng first use args to get the parameter of config.json, then add the params in config.json to args. ''' train(args)
def _train(command_list): # validate command list if _validate_train_commands(command_list): algorithm = command_list[1] filename = "input_data/" + command_list[2] # read training data from csv training_data = _read_csv(filename) # parse training data to build features and build classifier, catching errors returned by parser parsed_training_data = parser.prepare_training_data(training_data) if type(parsed_training_data) is str: print(parsed_training_data) return None else: classifier = trainer.train(parsed_training_data, algorithm) print("...Training complete.") # debugging output of classifier details # currently only works for naive Bayes... need tree-to-string to print random forest # _write_csv("output_data/classifier_details.csv", classifier.classifier_details) return classifier else: return None
def main(argv=None): writer = Writer(RESULTS_DIR) trainer = Trainer(RESULTS_DIR, 'train', writer) tester = Tester(RESULTS_DIR, 'valid', writer) step, _ = tester.test(EVAL_STEP_NUM) while (step < LAST_STEP): lr = learning_rate(step) step, _ = trainer.train(lr, EVAL_FREQUENCY, step, RESTORING_FILE) tester.test(EVAL_STEP_NUM, step)
def main(training_dir, test_file, output_file): print('Training...') training_data = train(training_dir) print('Processing...') final_data = process(training_data, test_file) print('Writing results...') output_result(final_data, output_file) print('Done.')
def train(batch, remote, debug, dependency = []): params = cache.get("batch/%s/params" % batch, remote) numEpisodes = params['episodes']['num'] trainParams = params['train'] numIters = trainParams['iters'] ij_ = [(i, j) for i, j in it.product(range(numEpisodes), range(numIters))] f = lambda (i, j) : trainer.train(batch, params, i, j, remote, debug) logging.info("running %s train instances" % len(ij_)) if (remote): k_ = cloud.map(f, ij_, _label = "%s/train" % batch, _depends_on = dependency, _type = 'c1', _max_runtime = 30) logging.info("k_ %s" % k_) return k_ else: results = map(f, ij_) return results
def main(argv=None): writer = Writer(RESULTS_DIR) trainer = Trainer(RESULTS_DIR, 'train', writer) tester = Tester(RESULTS_DIR, 'valid', writer) params_file = os.path.join(RESULTS_DIR, PARAMS_FILE) if (os.path.isfile(params_file)): with open(params_file, 'r') as handle: params = json.load(handle) else: params = {} params['min_test_step'], params['min_test_loss'] = tester.test(EVAL_STEP_NUM) params['step'] = params['min_test_step'] params['unchanged'] = 0 params['num_decays'] = 0 params['learning_rate'] = LEARNING_RATE while (params['num_decays'] <= MAX_DECAYS): params['step'], _ = trainer.train(params['learning_rate'], EVAL_FREQUENCY, params['step'], RESTORING_FILE) _, test_loss = tester.test(EVAL_STEP_NUM, params['step']) if (test_loss < params['min_test_loss']): params['min_test_loss'] = test_loss params['min_test_step'] = params['step'] params['unchanged'] = 0 else: params['unchanged'] += EVAL_FREQUENCY if (params['unchanged'] >= PATIENCE): params['learning_rate'] *= DECAY_FACTOR params['num_decays'] += 1 params['step'] = params['min_test_step'] params['unchanged'] = 0 with open(params_file, 'w') as handle: json.dump(params, handle, indent=2) print(params)
import trainer import numpy as np import theanets import glob np.set_printoptions(precision=3) mode = raw_input("1: Train\n2: Load\n3: Live\n") exp = theanets.Experiment( theanets.feedforward.Regressor, layers=(config.IMG_W * config.IMG_H, 500, 1) ) if mode == "1": trainer.train(exp) if mode == "2": exp = exp.load(path="net.data") print "Manual validation:" for file in glob.glob("data/manual/*.jpg"): image = util_image.load(file) input = util_image.data(image) input = input.reshape(1, len(input)) output = exp.predict(input) print "Prediction for ", file, " = ", output if mode == "3": exp = exp.load(path="net.data") session = util_guru.start() while True:
np.random.shuffle(train_epoch_data) #np.random.shuffle(val_epoch_data) #Full pass over the training data train_gen = ParallelBatchIterator(generator_train, train_epoch_data, ordered=False, batch_size=P.BATCH_SIZE_TRAIN//3, multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION, n_producers=P.N_WORKERS_LOAD_AUGMENTATION) self.do_batches(self.train_fn, train_gen, self.train_metrics) # And a full pass over the validation data: val_gen = ParallelBatchIterator(generator_val, val_epoch_data, ordered=False, batch_size=P.BATCH_SIZE_VALIDATION//3, multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION, n_producers=P.N_WORKERS_LOAD_AUGMENTATION) self.do_batches(self.val_fn, val_gen, self.val_metrics) self.post_epoch() if __name__ == "__main__": X_train = glob.glob(P.FILENAMES_TRAIN) X_val = glob.glob(P.FILENAMES_VALIDATION) train_generator = dataset_2D.load_images validation_generator = dataset_2D.load_images trainer = ResNetTrainer() trainer.train(train_generator, X_train, validation_generator, X_val)
def main(argv=None): hyper_file = os.path.join(RESULTS_DIR, HYPER_FILE) if (os.path.isfile(hyper_file)): with open(hyper_file, 'r') as handle: hyper = json.load(handle) else: hyper = {} hyper['min_test_step'] = LAYERS_NUM hyper['step'] = hyper['min_test_step'] hyper['unchanged'] = 0 hyper['restfile'] = RESTORING_FILE while (hyper['unchanged'] < HYPER_PATIENCE): results_dir = os.path.join(RESULTS_DIR, str(hyper['step'])) writer = Writer(results_dir) trainer = Trainer(results_dir, 'train', writer, hyper['step']) tester = Tester(results_dir, 'valid', writer, hyper['step']) params_file = os.path.join(results_dir, PARAMS_FILE) if (os.path.isfile(params_file)): with open(params_file, 'r') as handle: params = json.load(handle) else: params = {} params['min_test_step'], params['min_test_loss'] = tester.test(EVAL_STEP_NUM) params['step'] = params['min_test_step'] params['unchanged'] = 0 params['num_decays'] = 0 params['learning_rate'] = LEARNING_RATE if ('min_test_loss' not in hyper): hyper['min_test_loss'] = params['min_test_loss'] while (params['num_decays'] <= MAX_DECAYS): params['step'], _ = trainer.train(params['learning_rate'], EVAL_FREQUENCY, params['step'], hyper['restfile']) _, test_loss = tester.test(EVAL_STEP_NUM, params['step']) if (test_loss < params['min_test_loss']): params['min_test_loss'] = test_loss params['min_test_step'] = params['step'] params['unchanged'] = 0 else: params['unchanged'] += EVAL_FREQUENCY if (params['unchanged'] >= PATIENCE): params['learning_rate'] *= DECAY_FACTOR params['num_decays'] += 1 params['step'] = params['min_test_step'] params['unchanged'] = 0 with open(params_file, 'w') as handle: json.dump(params, handle, indent=2) print(params) #tester.test(step_num=None, init_step=params['min_test_step']) if (params['min_test_loss'] < hyper['min_test_loss']): hyper['min_test_loss'] = params['min_test_loss'] hyper['min_test_step'] = hyper['step'] hyper['unchanged'] = 0 else: hyper['unchanged'] += 1 hyper['restfile'] = os.path.join(results_dir, model_file(params['min_test_step'])) hyper['step'] += 2 with open(hyper_file, 'w') as handle: json.dump(hyper, handle, indent=2) print(hyper) print('\n NEW HYPER PARAMETER: %d' %hyper['step'])
else: model_name = "model" np.random.seed(conf.RANDOM_SEED) t0 = time() ### convert data ### if not os.path.exists("../data"): print "Converting data...\n" os.makedirs("../data") vocabulary = utils.load_vocabulary(conf.VOCABULARY_FILE) converter.convert_files(conf.PHASE1["TRAIN_DATA"], vocabulary, conf.PUNCTUATIONS, conf.BATCH_SIZE, False, PHASE1_TRAIN_PATH) converter.convert_files(conf.PHASE1["DEV_DATA"], vocabulary, conf.PUNCTUATIONS, conf.BATCH_SIZE, False, PHASE1_DEV_PATH) if conf.PHASE2["TRAIN_DATA"] and conf.PHASE2["DEV_DATA"]: converter.convert_files(conf.PHASE2["TRAIN_DATA"], vocabulary, conf.PUNCTUATIONS, conf.BATCH_SIZE, conf.PHASE2["USE_PAUSES"], PHASE2_TRAIN_PATH) converter.convert_files(conf.PHASE2["DEV_DATA"], vocabulary, conf.PUNCTUATIONS, conf.BATCH_SIZE, conf.PHASE2["USE_PAUSES"], PHASE2_DEV_PATH) ### train model ### print "Training model...\n" if not os.path.exists("../out"): os.makedirs("../out") trainer.train(model_name, PHASE1_TRAIN_PATH, PHASE1_DEV_PATH, PHASE2_TRAIN_PATH, PHASE2_DEV_PATH) print "Done in %.2f minutes" % ((time() - t0) / 60.)
input_path = "/home/jake/Dropbox/Projects/Sentiment Analysis/yelp_dataset/yelp_academic_dataset" output_path = "/home/jake/Dropbox/Projects/Sentiment Analysis/my_data/training_test" sc = SparkContext("local", "NBTrainer") all_reviews = sc.textFile(input_path).map(simplejson.loads).filter(lambda datum: datum['type']=='review') total = 330071 sample_size = 100 training_data = all_reviews.sample(False, float(sample_size)/float(total)) def categoryof(datum): if datum['stars'] > 3: return 'pos' else: return 'neg' def textof(datum): return datum['text'] output = train(training_data, categoryof, textof) output.map(simplejson.dumps).saveAsTextFile(output_path)
from trainer import train import submission as subm import numpy as np import files_load as fl import sys from random import shuffle if len(sys.argv) != 3: exit(1) trainD = fl.loadmatrixs(sys.argv[1]) validD = fl.loadmatrixs(sys.argv[2]) ftrainD, mtrainD = zip(*trainD) mean, eigenvec, thetas = train(mtrainD) print "Mean: ", mean print "eigenvec: \n", eigenvec count = 0 for idx, data in enumerate(validD): success = False filename, mtx = data thetaSubmit = subm.submit(mtx, mean, eigenvec) matchIdx = subm.compareAvgGap(thetaSubmit, thetas) if filename[:filename.rfind("_")] == ftrainD[matchIdx][:ftrainD[matchIdx].rfind("_")]: success = True count = count + 1 if success is True: print filename, " --> ", ftrainD[matchIdx], "[X]" else: print filename, " --> ", ftrainD[matchIdx], "[ ]" print count, " / ", idx + 1, "===>", count / (idx + 1.) * 100, "%"
def do_experiment(experiment_dir,beats=0,bars=0,nCodes=0,nIter=1e7, partialbar=0,keyInv=False,songKeyInv=True,lrate=1e-3, mat_dir='',useModel='VQ',autobar=False,randoffset=False): """ Main function to run an experiment, train a model and save to dir. """ # check for 'done' file if os.path.exists(os.path.join(experiment_dir,'DONE.txt')): return # if experiment folder does not exist, create it if not os.path.exists(experiment_dir): print 'creating experiment dir:',experiment_dir os.mkdir(experiment_dir) # ec = exit code for training, 0 = ok, >0 = bad ec = 1 # check if saved model exists alldirs = glob.glob(os.path.join(experiment_dir,'*')) if len(alldirs) > 0: alldirs = filter(lambda x: os.path.isdir(x), alldirs) alldirs = filter(lambda x: os.path.split(x)[-1][:4] == 'exp_',alldirs) # trim badly saved models alldirs = filter(lambda x: check_saved_model_full(x), alldirs) continue_training = len(alldirs) > 0 # continue from saved model if continue_training: # find most recent saved mdoel, and continue! # ec = exit_code, 0 if due to StopIteration, >0 otherwise savedmodel = np.sort(alldirs)[-1] ec = trainer.train(savedmodel) # no prior saved model if not continue_training: #initialize and save codebook codebook = initializer.initialize(nCodes, pSize=beats, usebars=bars, keyInv=keyInv, songKeyInv=songKeyInv, positive=True, do_resample=True, partialbar=partialbar, nThreads=4, oracle='MAT', artistsdb='', matdir=mat_dir,randoffset=randoffset) codebook_fname = os.path.join(experiment_dir,'codebook.mat') scipy.io.savemat(codebook_fname,{'codebook':codebook}) print 'after initialization, codebook saved to:',codebook_fname # train (from scratch) # ec = exit_code, 0 if due to StopIteration, >0 otherwise ec = trainer.train(codebook_fname, expdir=experiment_dir, pSize=beats, usebars=bars, keyInv=keyInv, songKeyInv=songKeyInv, positive=True, do_resample=True, partialbar=partialbar, lrate=lrate, nThreads=4, oracle='MAT', artistsdb='', matdir=mat_dir, nIterations=nIter, useModel=useModel, autobar=autobar,randoffset=randoffset) # write done file if ec == 0: f = open(os.path.join(experiment_dir,'DONE.txt'),'w') f.write('experiment appear to be done\n') f.close() # assume it's a keyboard interrupt, for multiprocessing purposes else: raise KeyboardInterruptError()
else: filename = argv[1] data = get_data_sets(filename) hmms = {} for label in data: hmms[label] = make_model(14, 8) print('training') for label in hmms: model = hmms[label] data_set = data[label] epochs = 3 hmms[label] = train(model, data_set, epochs, lambda x: None) print('training complete\n') count = 0 correct = 0 for label, data_set in data.items(): for observed in data_set: best_label = None max_prob = 0 for name, model in hmms.items(): prob = model.probability_of_observed(observed) if prob > max_prob: