def main(): print colored("Defaults are shown in []", 'blue') args = {} directory = raw_input(colored("Enter Corpus Directory['data/corpus3']\n$ ", 'blue')) if directory: args['directory'] = directory spam = raw_input(colored("Enter Spam Sub Directory['spam']\n$ ", 'blue')) if spam: args['spam'] = spam ham = raw_input(colored("Enter Clean Emails Sub Directory['ham']\n$ ", 'blue')) if ham: args['ham'] = ham limit = (raw_input(colored("Enter Limit of files per class(spam/ham)[1000]\n$ ",'blue'))) if limit: try: args['limit'] = int(limit) except: print colored("Number had to be inserted using default",'red') trainer = Trainer(**args) started_at = datetime.now() classy = trainer.train(verbose=1) ended_at = datetime.now() diff = str(ended_at - started_at).split(":") diff[2] = diff[2][:diff[2].find(".")] print colored("Training took %s hours %s minutes and %s seconds"%(diff[0],diff[1],diff[2]),'green') print colored("Trained, Testing Time",'green') test(trainer, classy,'spam') test(trainer, classy,'ham')
def _benchmark(benchmarker, test_slice): """Benchmarks the classifier based on a test set slice""" try: (start, end) = test_slice X_test, y_test = benchmarker.ds.testing_slice(start, end) clf = copy.deepcopy(benchmarker.clf) trainer = Trainer(benchmarker.config, benchmarker.ds, clf) return trainer.benchmark(X_test, y_test) except Exception as e: print(e) traceback.print_exc()
def save_pickle(): directory = CORPUS_DIR spam = os.path.join(directory, 'spam') ham = os.path.join(directory, 'ham') kwargs = { "directory": directory, "spam": spam, "ham": ham, "limit": 16545 } logging.info("Testing the classifier on dataset : {0}".format(directory)) # limit is given 16545 as """ $ ls data/full_corpus/ham | wc -l 16545 tasdik at Acer in ~/Dropbox/projects/spamfilter on pickling [!?] $ ls data/full_corpus/spam | wc -l 17157 (spamfilter) """ trainer = Trainer(**kwargs) ## saving the trainer object trainer_file = open(TRAINER_FILE, "wb") dill.dump(trainer, trainer_file) trainer_file.close() ## training it starting_time = datetime.now() classifier_object = trainer.train(verbose=1) end_time = datetime.now() ## saving the classifier save_classifier = open(CLASSIFIER_FILE, "wb") dill.dump(classifier_object, save_classifier) save_classifier.close() elapsed = end_time - starting_time # refer: [3] minutes_elapsed, seconds_elapsed = divmod( elapsed.total_seconds(), 60)[0], divmod(elapsed.total_seconds(), 60)[1] print colored("Training took {min} minutes : {sec} seconds".format( min=minutes_elapsed, sec=seconds_elapsed ),'green') logging.info("Training took {min} minutes : {sec} seconds".format( min=minutes_elapsed, sec=seconds_elapsed ))
def test(image, char=None): classifications = MLClassifications() coms = connected_components(image) com = largest_component(coms) xs = params_from_component(com, with_one=True) for c, ws in models.items(): s = dot(ws, Trainer.get_transformed_data(xs, polynomial_transform_order)) print c, s classifications.add(c, s) return classifications
def run(args): # TODO update args according to config.json here setup = import_module(args.setup) task = args.task data_source = setup.DataSource(args) spec = setup.LearningSpec() if task == 'train': trainer = Trainer(args, data_source, spec.training_classifier()) trainer.run() elif task == 'search': searcher = Searcher(args, data_source) searcher.fit(spec.gridsearch_pipelines()) elif task == 'benchmark': run_benchmarks(args, data_source) elif task == 'learning_curves': run_learning_curves(args, spec, data_source) elif task == 'plot_pca': X, y = data_source.train_data() plot_pca(X, y) elif task == 'misclassified': print_misclassified(args, spec.training_classifier(), data_source)
def job_main(): beanstalk = create_beanstalk() print "Job queue starts..." try: while True: try: job = beanstalk.reserve() except beanstalkc.DeadlineSoon: continue request = json.loads(job.body) jobId = job.jid print 'Working on job %s...' % jobId try: jobType = request["jobType"] if jobType == TRAIN: category = request["category"] model = request["model"] trainer = Trainer.create(category, model) if trainer: data = {} data["table_name"] = request["inputTableName"] data["feature_names"] = request.get("features", None) data["target_name"] = request.get("target", None) ret = trainer.run(**data) print 'Job %s finished.' % jobId else: ret = [] print 'No trainer for job %s.' % jobId elif jobType == PREDICT: modelId = request["modelId"] predictor = Predictor(modelId) data = {} data["table_name"] = request["inputTableName"] ret = predictor.run(**data) print 'Job %s finished.' % jobId except: ret = [] print 'Error on job %s.' % jobId job.delete() #time.sleep(30) io_loop.add_callback(job_finished, jobId, ret) except (KeyboardInterrupt, SystemExit): beanstalk.close() sys.exit()
"No such file exists in the specified path...Please see the 'dir' option under savingAndLoading in the config and ensure that your file is present there" ) lastEpoch = getLastEpochFromFileName(fileName) startEpoch = lastEpoch + 1 stage = getStageFromFileName(fileName) if stageChangeRequired(stage, lastEpoch): changeStage = True startEpoch = 1 stage += 1 lipreader = Lipreader(stage) lipreader = loadModel(lipreader, fileName, changeStage) else: lipreader = Lipreader() if config.gpuAvailable: lipreader = lipreader.cuda() if config.deviceCount > 1: lipreader = nn.DataParallel(lipreader) trainer = Trainer(lipreader) validator = Validation(lipreader) totalEpochs = config.training["Stage " + str(stage)]["epochs"] print("Started training at", datetime.now()) with tqdm(total=totalEpochs - startEpoch + 1, desc="Epochs", position=0) as t: for epoch in range(startEpoch - 1, totalEpochs): trainer.train(epoch) validator.validate(epoch) t.update() saveModel(lipreader, epoch + 1) print(f"Successfully completed training of Stage {stage}")
class A2C: def __init__(self, sess, args): self.args = args self.model = Model(sess, optimizer_params={ 'learning_rate': args.learning_rate, 'alpha': 0.99, 'epsilon': 1e-5 }, args=self.args) self.trainer = Trainer(sess, self.model, args=self.args) self.env_class = A2C.env_name_parser(self.args.env_class) def train(self): env = A2C.make_all_environments(self.args.num_envs, self.env_class, self.args.env_name, self.args.env_seed) print("\n\nBuilding the model...") self.model.build(env.observation_space.shape, env.action_space.n) print("Model is built successfully\n\n") with open(self.args.experiment_dir + self.args.env_name + '.pkl', 'wb') as f: pickle.dump((env.observation_space.shape, env.action_space.n), f, pickle.HIGHEST_PROTOCOL) print('Training...') try: # Produce video only if monitor method is implemented. try: if self.args.record_video_every != -1: env.monitor( is_monitor=True, is_train=True, experiment_dir=self.args.experiment_dir, record_video_every=self.args.record_video_every) except: pass self.trainer.train(env) except KeyboardInterrupt: print('Error occured..\n') self.trainer.save() env.close() def test(self, total_timesteps): observation_space_shape, action_space_n = None, None try: with open(self.args.experiment_dir + self.args.env_name + '.pkl', 'rb') as f: observation_space_shape, action_space_n = pickle.load(f) except: print( "Environment or checkpoint data not found. Make sure that env_data.pkl is present in the experiment by running training first.\n" ) exit(1) env = self.make_all_environments(num_envs=1, env_class=self.env_class, env_name=self.args.env_name, seed=self.args.env_seed) self.model.build(observation_space_shape, action_space_n) print('Testing...') try: # Produce video only if monitor method is implemented. try: if self.args.record_video_every != -1: env.monitor( is_monitor=True, is_train=False, experiment_dir=self.args.experiment_dir, record_video_every=self.args.record_video_every) else: env.monitor(is_monitor=True, is_train=False, experiment_dir=self.args.experiment_dir, record_video_every=20) except: pass self.trainer.test(total_timesteps=total_timesteps, env=env) except KeyboardInterrupt: print('Error occured..\n') env.close() def infer(self, observation): """Used for inference. :param observation: (tf.tensor) having the shape (None,img_height,img_width,num_classes*num_stack) :return action after noise and argmax :return value function of the state """ states = self.model.step_policy.initial_state dones = [] # states and dones are for LSTM, leave them for now! action, value, states = self.model.step_policy.step( observation, states, dones) return action, value # The reason behind this design pattern is to pass the function handler when required after serialization. @staticmethod def __env_maker(env_class, env_name, i, seed): def __make_env(): return env_class(env_name, i, seed) return __make_env @staticmethod def make_all_environments(num_envs=4, env_class=None, env_name="SpaceInvaders", seed=42): set_all_global_seeds(seed) return SubprocVecEnv([ A2C.__env_maker(env_class, env_name, i, seed) for i in range(num_envs) ]) @staticmethod def env_name_parser(env_name): from envs.gym_env import GymEnv envs_to_class = {'GymEnv': GymEnv} if env_name in envs_to_class: return envs_to_class[env_name] raise ValueError( "There is no environment with this name. Make sure that the environment exists." )
env ) # attention: I changed render parameters in wrappers file to make this work # env=env.make_train_0() env = processObservation(env, (86, 86)) env = gym_minigrid.wrappers.StateBonus(env) if flag.TRAIN: new_trainer = Trainer(num_training_steps=20000, num_env=16, num_game_steps=128, num_epoch=3, learning_rate=0.001, discount_factor=0.99, env=env, num_action=7, clip_range=0.1, value_coef=0.5, save_interval=50, log_interval=10, entropy_coef=0.0, lam=0.99, mini_batch_size=64, num_action_repeat=1) new_trainer.collect_experiance_and_train() elif flag.PLAY: new_player = Player(env=env, load_path='./trains/3/step1100-20191107-120331/train') new_player.play() # else: # new_player=Player(env=env)
def train_main(config): """ 训练函数 :param config: ConfigParser对象 :return: None """ logger = config.get_logger('train') # 训练数据的日志对象 data_manager = CSVDataManager( config['data_loader']) # 将json文件中指示数据载入要求信息传入CSV管理器中,装载训练集与测试集 classes = data_manager.classes # 获取所有类别 num_classes = len(classes) # 知晓类别数量 trans_type = config['transforms']['type'] # 变换器名字 trans_args = config['transforms']['args'] # 变换器参数 transformation = getattr(data_module, trans_type)(trans_args) # 对数据作变换 train_data = data_manager.get_loader('train', transformation) # 得到训练集 val_data = data_manager.get_loader('val', transforms=None) # 得到验证集 model_name = config['model'] # 从json文件中获取模型名称 model = ModelCalled(model_name, num_classes=num_classes) # 召唤模型 logger.info(model) # 记录模型的信息 # 为多GPU训练做准备 device, device_ids = prepare_device(config['n_gpu']) model = model.to(device) if len(device_ids) > 1: model = torch.nn.DataParallel(model, device_ids=device_ids) # if torch.cuda.is_available(): # 检测是否能用GPU运算 # model = model.cuda() # 将模型转移到GPU上去 loss = getattr(net_utils, config['loss']) # 获取损失函数 metrics = [getattr(net_utils, met) for met in config['metrics']] # 多分类评价标准需要传进类别数 trainable_params = filter( lambda p: p.requires_grad, model.parameters()) # filter函数用于过滤序列,过滤掉不符合条件的元素,返回由符合条件元素组成的新列表 optim_name = config['optimizer']['type'] # 优化器名字 optim_args = config['optimizer']['args'] # 优化器参数 optimizer = getattr(torch.optim, optim_name)(trainable_params, **optim_args) lr_name = config['lr_scheduler']['type'] # 学习率 lr_args = config['lr_scheduler']['args'] # 学习率参数 if lr_name == 'None': lr_scheduler = None else: lr_scheduler = getattr(torch.optim.lr_scheduler, lr_name)(optimizer, **lr_args) trainer = Trainer(model=model, loss=loss, metrics=metrics, optimizer=optimizer, config=config, data_loader=train_data, valid_data_loader=val_data, lr_scheduler=lr_scheduler, device=device) trainer.train()
os.makedirs(model_dir) if not os.path.exists(results_dir): os.makedirs(results_dir) file_ptr = open(mapping_file, 'r') actions = file_ptr.read().split('\n')[:-1] # list of classes file_ptr.close() actions_dict = dict() for a in actions: actions_dict[a.split()[1]] = int(a.split()[0]) num_classes = len(actions_dict) # initialize model & trainer model = MultiStageModel(args, num_classes) trainer = Trainer(num_classes) # ====== Main Program ====== # start_time = time.time() if args.action == "train": batch_gen_source = BatchGenerator(num_classes, actions_dict, gt_path, features_path, sample_rate) batch_gen_target = BatchGenerator(num_classes, actions_dict, gt_path, features_path, sample_rate) batch_gen_source.read_data( vid_list_file) # read & shuffle the source training list batch_gen_target.read_data( vid_list_file_target) # read & shuffle the target training list trainer.train(model, model_dir, results_dir, batch_gen_source, batch_gen_target, device, args)
from train import Trainer from predict import Predictor from loader import MyDataLoader if __name__ == '__main__': os.environ['CUDA_VISIBLE_DEVICES'] = '0' test_dir = './val' train_dir = './' models_dir = './models' if not os.path.exists(models_dir): os.mkdir(models_dir) trainset = MyDataLoader('./', batch_size=1) trainer = Trainer() model_path = trainer.run(trainset, models_dir) testset = [] test_files = os.listdir(test_dir) for test_file in test_files: image_path = os.path.join(test_dir, test_file) testset.append(image_path) predictor = Predictor(model_path=model_path) output_dir = './valid' if not os.path.exists(output_dir): os.mkdir(output_dir) predictor.run(testset, output_dir)
def main(argv=None): # IGNORE:C0111 '''Command line options.''' from . import device if argv is None: argv = sys.argv else: sys.argv.extend(argv) program_name = os.path.basename(sys.argv[0]) program_version = "v%s" % __version__ program_build_date = str(__updated__) program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date) program_shortdesc = __shortdesc__ program_license = '''%s Created by dbsystem group on %s. Copyright 2016 NUS School of Computing. All rights reserved. Licensed under the Apache License 2.0 http://www.apache.org/licenses/LICENSE-2.0 Distributed on an "AS IS" basis without warranties or conditions of any kind, either express or implied. USAGE ''' % (program_shortdesc, str(__date__)) global debug try: # Setup argument parser parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter) parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000") parser.add_argument("-param", "--parameter", dest="parameter", help="the parameter file path to be loaded") parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug") parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data") parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu") parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa") parser.add_argument('-V', '--version', action='version', version=program_version_message) # Process arguments args = parser.parse_args() port = args.port parameter_file = args.parameter mode = args.mode need_reload = args.reload_data use_cpu = args.use_cpu debug = args.debug #prepare data files config.read('file.cfg') file_prepare(need_reload) import network as net model = net.create() #load parameter parameter_file=get_parameter(parameter_file) if parameter_file: print "load parameter file: %s" % parameter_file model.load(parameter_file) if use_cpu: raise CLIError("Currently cpu is not support!") else: print "runing with gpu" d = device.create_cuda_gpu() model.to_device(d) if mode == "serve": print "runing singa in serve mode, listen to port: %s " % port global service from serve import Service service =Service(model,d) app.debug = debug app.run(host='0.0.0.0', port= port) elif mode == "train": print "runing singa in train mode" global trainer from train import Trainer trainer= Trainer(model,d) if not parameter_file: trainer.initialize() trainer.train() else: raise CLIError("Currently only serve mode is surpported!") return 0 except KeyboardInterrupt: ### handle keyboard interrupt ### return 0 except Exception, e: if debug: traceback.print_exc() raise(e) indent = len(program_name) * " " sys.stderr.write(program_name + ": " + str(e) + "\n") sys.stderr.write(indent + " for help use --help \n\n") return 2
#!/usr/bin/env python3 from train import Trainer # Command Line Argument Method HEIGHT = 256 WIDTH = 256 CHANNELS = 3 EPOCHS = 100 BATCH = 1 CHECKPOINT = 50 TRAIN_PATH = "/data/cityscapes/cityscapes/train/" TEST_PATH = "/data/cityscapes/cityscapes/val/" trainer = Trainer(height=HEIGHT,width=WIDTH, channels=CHANNELS,epochs =EPOCHS,\ batch=BATCH,\ checkpoint=CHECKPOINT,\ train_data_path=TRAIN_PATH,\ test_data_path=TEST_PATH) trainer.train()
def RunPyfit(config): # Try to ensure that all of the configuration settings make sense. If the # run is doomed to fail we want to catch is now and not when the job has # been running on a cluster for ten minutes already. # This function will do a decent job of pre-validating everything. # It tries to print helpful error information and will print the help # documentation when appropriate. If execution continues after this, it # is safe to say that the configuration is at least somewhat sane. It is # still possible that a file has invalid contents though. status = ValidateArgs(config) if not isinstance(status, tuple): return status log = status[1] # Now that basic configuration stuff is out of the way, we need to # generate a training set, train a neural network or both. potential = None training_set = None if config.generate_training_set: if torch.cuda.is_available() and not config.force_cpu: device = torch.device("cuda:%i" % config.gpu_affinity) else: device = torch.device('cpu') poscar_data = PoscarLoader(config.e_shift, log=log) poscar_data = poscar_data.loadFromFile(config.dft_input_file) potential = NetworkPotential(log=log) potential = potential.loadFromFile(config.neural_network_in) neighborList = NeighborList(potential, log=log) neighborList.GenerateNeighborList(poscar_data.structures) # This will be useful later on. structure_strides = neighborList.getStructureStrides() lspCalculator = TorchLSPCalculator(torch.float64, potential.config, log=log).to(device) lsp = lspCalculator.generateLSP(neighborList.atom_neighbors) training_set = TrainingSet(log=log) training_set = training_set.loadFromMemory(poscar_data, lsp, structure_strides, potential) training_set.writeToFile(config.training_set_output_file) if config.run_training: # If we generated the training set in this run, then there was # already a validation check run to make sure that the generated # training output file matches the training input file being used. # Instead of loading from disk, just use the existing instance. if not config.generate_training_set: training_set = TrainingSet(log=log) training_set = training_set.loadFromFile(config.training_set_in) potential = NetworkPotential(log=log) potential = potential.loadFromFile(config.neural_network_in) if potential.config != training_set.config: msg = "The training set file and network potential file have " msg += "different configurations. Please check them." print(msg) log.log("potential.config != training_set.config") return 1 if config.randomize: log.log("Randomizing network potential parameters.") potential.randomizeNetwork() potential.loadNetwork(None, values_loaded=True) if not config.no_warn: if not training_set.generateWarnings(config.validation_ratio): return 1 # By this point, 'training_set' holds a training set instance, one way # or another. Now we actually run the training. trainer = Trainer(potential, training_set, config, log=log) trainer.train()
typechecker = TypeCheckAdaptor(os.path.join(mydir, 'data', 'raw', 'typecheck.csv'), featurizer.vocab) model = get_model(config, featurizer.vocab, typechecker) model.load_weights(os.path.join(root, 'best_weights')) dev_generator = { 'kbp_eval': KBPEvaluationDataAdaptor().to_examples(os.path.join(mydir, 'data', 'raw', 'evaluation.tsv')), 'supervised': SupervisedDataAdaptor().to_examples(os.path.join(mydir, 'data', 'raw', 'supervision.csv')), 'kbp_sample': KBPDataAdaptor().to_examples(os.path.join(mydir, 'data', 'raw', 'test.sample.tsv')), }[args['--evaluation']] from train import Trainer dev_split = Split(dev_generator, featurizer, add=False) scoring_labels = [i for i in xrange(len(featurizer.vocab['rel'])) if i != featurizer.vocab['rel']['no_relation']] trainer = Trainer('.', model, typechecker, scoring_labels) best_scores = trainer.run_epoch(dev_split, train=False, return_pred=True) todir = os.path.join(root, 'preds') if not os.path.isdir(todir): os.makedirs(todir) print 'predictions output at', todir from plot_utils import plot_confusion_matrix, plot_histogram, get_sorted_labels, parse_gabor_report, parse_sklearn_report, combine_report, retrieve_wrong_examples import json from sklearn.metrics import classification_report wrongs = retrieve_wrong_examples(dev_split.examples, best_scores['ids'], best_scores['preds'], best_scores['targs'],
def run_train(rt): trainer = Trainer(rt, rt.config, rt.data_source, rt.spec.training_classifier()) trainer.run()
os.makedirs(args.outf) except OSError: pass if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) print("Random Seed: ", args.manualSeed) random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) #### # Fetch dataset #### args.device = torch.device("cuda:0" if args.cuda else "cpu") # custom weights initialization called on netG and netD dataset = Sound2ImageDataset(args.dataroot) print("Number of training instances: {} {} {}".format( len(dataset.dataset['img']), len(dataset.dataset['class']), len(dataset.dataset['sound_embeddings']))) trainer = Trainer(dataset, args) trainer.train(args)
image_glob.sort() label_glob = glob.glob('./data/rgbd-dataset/*/*/*[0-9]_depth.png') filtered_image_glob = [] filtered_label_glob = [] for filename in image_glob: labelname = re.sub('\.png$', '_depth.png', filename) if os.path.exists(labelname): filtered_image_glob.append(filename) filtered_label_glob.append(labelname) f = FileReader(filtered_image_glob, filtered_label_glob, crop_shape, batch_size=batch_size) t = Trainer(sess, m, f) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.trainable_variables()) try: saver.restore(sess, args.model) except: print('No save file found. Creating new file at {}'.format( args.model)) f.start_queue_runners() t.train(saver=saver, path=args.model) f.stop_queue_runners() elif args.mode == 'generate': if args.input is None: print("must provide an input file in generate mode") sys.exit(1)
from Models import * from loss import * from train import Trainer import os if __name__ == '__main__': net = UNet_2Plus() if not os.path.exists("./weight"): os.makedirs("./weight") save_path = r'.\weight\5k_unet2p.pt' if not os.path.exists("./log"): os.makedirs("./log") log = r'.\log\5k_unet2p_log.txt' loss = dice_bce_loss() traindataset_path = r'C:\Users\Desktop\5k\train_set' valdataset_path = r'C:\Users\Desktop\5k\validate_set' trainer = Trainer(net, loss_func=loss, save_path=save_path, traindataset_path=traindataset_path, valdataset_path=valdataset_path, batchsize=2, log=log) trainer.trainer(10)
def get_trainer(self): from train import Trainer if 'trainer' in self.data: return Trainer.get_trainer(self.data['trainer']) return Trainer.get_trainer('Trainer')
train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) test_transform = transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std) ]) train_dataset = torchvision.datasets.CIFAR10(root=path, train=True, transform=train_transform, download=True) train_loader = DataLoader(train_dataset, batch_size=10000, shuffle=True) test_dataset = torchvision.datasets.CIFAR10(root=path, train=False, transform=test_transform, download=True) test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, pin_memory=True) pics = enumerate(train_loader) batch_idx, (data, labels) = next(pics) data = np.squeeze(data) print(data) model = torch.load('model.pth') model.eval() acc = Trainer.validate(model, test_loader, 'test')
from dataset import RecSysDataset from train import Trainer from models import VBPR import torch if __name__ == '__main__': k=10 k2=20 batch_size=128 n_epochs=20 dataset = RecSysDataset() vbpr = VBPR( dataset.n_users, dataset.n_items, dataset.corpus.image_features, k, k2) tr = Trainer(vbpr, dataset) tr.train(n_epochs, batch_size) torch.save(vbpr, 'vbpr_resnet50_v1.pth')
if args[k] is not None: opt[k] = args[k] if "TV" in opt['mode']: dataset = LocalTemporalDataset(opt) elif "implicit" in opt['mode']: dataset = LocalImplicitDataset(opt) else: dataset = LocalDataset(opt) model = GenericModel(opt) else: opt = load_options(os.path.join(save_folder, args["load_from"])) opt["device"] = args["device"] opt["save_name"] = args["load_from"] for k in args.keys(): if args[k] is not None: opt[k] = args[k] model = load_model(opt, args["device"]) if "TV" in opt['mode']: dataset = LocalTemporalDataset(opt) elif "implicit" in opt['mode']: dataset = LocalImplicitDataset(opt) else: dataset = LocalDataset(opt) if "TV" in opt['mode']: trainer = TemporalTrainer(opt) elif "implicit" in opt['mode']: trainer = ImplicitNetworkTrainer(opt) else: trainer = Trainer(opt) trainer.train(model, dataset)
f2i, args.clean, args.chars, args.mxlen, mxfiltsz, vec_alloc=long_0_tensor_alloc2, ExType=TorchExamples) print('Loaded test data') nc = len(f2i) mdsave(f2i, embeddings.vocab, args.outdir, args.save) model = ConvModel(embeddings, nc, args.filtsz, args.cmotsz, args.hsz, args.dropout, not args.static) trainer = Trainer(gpu, model, args.optim, args.eta, args.mom) max_acc = 0 last_improved = 0 for i in range(args.epochs): print('Training epoch %d' % (i + 1)) trainer.train(ts, args.batchsz) this_acc = trainer.test(vs, args.batchsz, 'Validation') if this_acc > max_acc: max_acc = this_acc last_improved = i model.save(args.outdir, args.save) print('Highest dev acc achieved yet -- writing model') if (i - last_improved) > args.patience:
yolo = YoloV3(in_chs, cls_num, small_anchors=[[10, 13], [16, 30], [33, 23]], medium_anchors=[[30, 61], [62, 45], [59, 119]], large_anchors=[[116, 90], [156, 198], [373, 326]]) # (3) build a trainer cuda = torch.cuda.is_available() if cuda: device = torch.device("cuda") else: device = torch.device("cpu") model_path = "./checkpoint" trainer = Trainer(model=yolo, cuda=cuda, device=device, model_path=model_path) if True: trainer.restore_model(model_name="yolo.pkl") # (4) train the model # train hyparameters batch_size = 6 grad_accumulation = 1 eval_interval = 4 epoch_num = int(1e3) if False: trainer.train_model(train_data=train_data, val_data=val_data, batch_size=batch_size, grad_accumulation=grad_accumulation,
help="Number of episodes.") testarg.add_argument("--seed", type=int, help="Random seed.") args = parser.parse_args() if args.seed: rand.seed(args.seed) if not os.path.exists(args.ckpt_dir): os.makedirs(args.ckpt_dir) #Checking for/Creating gym output directory if args.out: if not os.path.exists(args.out): os.makedirs(args.out) else: if not os.path.exists('gym-out/' + args.game): os.makedirs('gym-out/' + args.game) args.out = 'gym-out/' + args.game ##here we go... # initialize gym environment and dqn env = Environment(args) agent = DQN(env, args) # train agent Trainer(agent).run() # play the game #env = gym.wrappers.Monitor(env, args.out, force=True) env.gym.monitor.start(args.out, force=True) agent.play() env.gym.monitor.close()
import sys from game_utils import * import logging from train import Trainer from torch import multiprocessing import torch from examplegenerator import ExampleGenerator import copy from network import Net if __name__ == '__main__': print(sys.path) logger = logging.getLogger('alphazero') multiprocessing.set_start_method('spawn') trainer = Trainer() trainer.current_net.load_state_dict( torch.load('../../meteor01/alphazero-connect4/models/soft-Z5.pth', map_location=trainer.device)) test_zero_vs_human(trainer.current_net.predict)
parser.add_argument('-lr', type=float, help="learning rate", default=config.training.lr) parser.add_argument('-batch_size', type=int, help="batch size", default=config.training.batch_size) parser.add_argument('-l2', type=float, help="l2_regulation", default=config.training.l2) args = parser.parse_args() config.training.epoch = args.epoch config.training.lr = args.lr config.training.batch_size = args.batch_size config.training.l2 = args.l2 if args.mode == 'prepro': prepro(config) elif args.mode == 'train': trainer = Trainer(config) trainer.start() elif args.mode == 'evaluate': pass elif args.mode == 'interactive': pass else: raise RuntimeError("Mode %s is undefined." % args.mode)
from train import Trainer from test import Tester from model import LeNet from dataset import DataSet, transform, train_set, train_loader, test_set, test_loader import torch as t import torch.nn as nn import torchvision as tv from torch import optim from torch.autograd import Variable # CIFAR-10的全部类别,一共10类 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # 数据集 dataSet = DataSet(transform, train_set, train_loader, test_set, test_loader, classes) # 网络结构 net = LeNet() # 交叉熵损失函数 criterion = nn.CrossEntropyLoss() # SGD优化器 optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) trainer = Trainer(net, criterion, optimizer, dataSet.train_loader) tester = Tester(dataSet.test_loader, net) trainer.train(epochs=10) tester.test()
"threads": 0, "epochs": 1, "epoch_lapse": 1, "epoch_save": 50, "input_size": (628, 628), } cell_dir = "D:/Machine_Learning/Codes/CellSegment/supplementary/dataset1/train/" mask_dir = "D:/Machine_Learning/Codes/CellSegment/supplementary/dataset1/train_GT/SEG/" module_save_dir = "D:/Machine_Learning/Codes/CellSegment/save/" tmp_dir = "D:/Machine_Learning/Codes/CellSegment/supplementary/dataset1/_tmp/" valid_rate = 0.1 use_cuda = True trainer = Trainer() trainer.setup(cell_dir=cell_dir, mask_dir=mask_dir, module_save_dir=module_save_dir, tmp_dir=tmp_dir, valid_rate=valid_rate, hyper_params=hyper_parameters, use_cuda=use_cuda, PRETRAINED=False) trainer.train() trainer.save_module() elif mode == "Test": hyper_parameters = {
return image.reshape(shape) * 255 def write_image_to_file(image, filename): image = reshape_for_output(image) images_out = tf.image.encode_jpeg(image) fh = open(filename, "wb+") fh.write(images_out.eval()) fh.close() if args.mode == 'train': from train import Trainer from files import FileReader m = Model(batch_size=args.batch_size) m.build_model() with tf.Session() as sess: t = Trainer(sess, m) f = FileReader('./images/sets/train/*.JPEG', (args.subimage_size, args.subimage_size), batch_size=args.batch_size) v = FileReader('./images/sets/validation/*.JPEG', (args.subimage_size, args.subimage_size), batch_size=args.batch_size) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.trainable_variables()) try: saver.restore(sess, args.model) except: print('No save file found. Creating new file at {}'.format( args.model)) f.start_queue_runners()
flag.LOAD = True else: flag.LOAD = False if args.env_type == "MR": num_action = 18 # number of actions for montezuma revenge flag.ENV = "MR" if flag.TRAIN: new_trainer = Trainer(num_training_steps=args.train_steps, num_env=args.num_env, num_game_steps=args.game_steps, num_epoch=args.num_epoch, learning_rate=args.lr, discount_factor=args.gamma, int_discount_factor=args.int_gamma, num_action=num_action, clip_range=args.clip_range, value_coef=args.value_coef, save_interval=args.save_int, entropy_coef=args.ent_coef, lam=args.lambda_gae, mini_batch_num=args.mini_batch, num_action_repeat=args.action_re, load_path=args.path, ext_adv_coef=args.ext_adv_coef, int_adv_coef=args.int_adv_coef, num_pre_norm_steps=args.num_pre_norm_steps, predictor_update_proportion=args.predictor_update_p) new_trainer.collect_experiance_and_train() elif flag.PLAY: new_player = Player(load_path=args.path) new_player.play()
def main(): # parse arg and start experiment global args best_err1 = 100. best_epoch = 0 args = parser.parse_args() args.config_of_data = config.datasets[args.data] args.num_classes = config.datasets[args.data]['num_classes'] if configure is None: args.tensorboard = False print(Fore.RED + 'WARNING: you don\'t have tesnorboard_logger installed' + Fore.RESET) # optionally resume from a checkpoint if args.resume: if args.resume and os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) old_args = checkpoint['args'] print('Old args:') print(old_args) # set args based on checkpoint if args.start_epoch <= 0: args.start_epoch = checkpoint['epoch'] + 1 best_epoch = args.start_epoch - 1 best_err1 = checkpoint['best_err1'] for name in arch_resume_names: if name in vars(args) and name in vars(old_args): setattr(args, name, getattr(old_args, name)) model = getModel(**vars(args)) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print( "=> no checkpoint found at '{}'".format( Fore.RED + args.resume + Fore.RESET), file=sys.stderr) return else: # create model print("=> creating model '{}'".format(args.arch)) model = getModel(**vars(args)) cudnn.benchmark = True # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() # define optimizer optimizer = get_optimizer(model, args) trainer = Trainer(model, criterion, optimizer, args) # create dataloader if args.evaluate == 'val': _, val_loader, _ = getDataloaders( splits=('val'), **vars(args)) trainer.test(val_loader, best_epoch) return elif args.evaluate == 'test': _, _, test_loader = getDataloaders( splits=('test'), **vars(args)) trainer.test(test_loader, best_epoch) return else: train_loader, val_loader, _ = getDataloaders( splits=('train', 'val'), **vars(args)) # check if the folder exists if os.path.exists(args.save): print(Fore.RED + args.save + Fore.RESET + ' already exists!', file=sys.stderr) if not args.force: ans = input('Do you want to overwrite it? [y/N]:') if ans not in ('y', 'Y', 'yes', 'Yes'): os.exit(1) tmp_path = '/tmp/{}_{}'.format(os.path.basename(args.save), time.time()) print('move existing {} to {}'.format(args.save, Fore.RED + tmp_path + Fore.RESET)) shutil.copytree(args.save, tmp_path) shutil.rmtree(args.save) os.makedirs(args.save) print('create folder: ' + Fore.GREEN + args.save + Fore.RESET) # copy code to save folder if args.save.find('debug') < 0: shutil.copytree( '.', os.path.join( args.save, 'src'), symlinks=True, ignore=shutil.ignore_patterns( '*.pyc', '__pycache__', '*.path.tar', '*.pth', '*.ipynb', '.*', 'data', 'save', 'save_backup')) # set up logging global log_print, f_log f_log = open(os.path.join(args.save, 'log.txt'), 'w') def log_print(*args): print(*args) print(*args, file=f_log) log_print('args:') log_print(args) print('model:', file=f_log) print(model, file=f_log) log_print('# of params:', str(sum([p.numel() for p in model.parameters()]))) f_log.flush() torch.save(args, os.path.join(args.save, 'args.pth')) scores = ['epoch\tlr\ttrain_loss\tval_loss\ttrain_err1' '\tval_err1\ttrain_err5\tval_err'] if args.tensorboard: configure(args.save, flush_secs=5) for epoch in range(args.start_epoch, args.epochs + 1): # train for one epoch train_loss, train_err1, train_err5, lr = trainer.train( train_loader, epoch) if args.tensorboard: log_value('lr', lr, epoch) log_value('train_loss', train_loss, epoch) log_value('train_err1', train_err1, epoch) log_value('train_err5', train_err5, epoch) # evaluate on validation set val_loss, val_err1, val_err5 = trainer.test(val_loader, epoch) if args.tensorboard: log_value('val_loss', val_loss, epoch) log_value('val_err1', val_err1, epoch) log_value('val_err5', val_err5, epoch) # save scores to a tsv file, rewrite the whole file to prevent # accidental deletion scores.append(('{}\t{}' + '\t{:.4f}' * 6) .format(epoch, lr, train_loss, val_loss, train_err1, val_err1, train_err5, val_err5)) with open(os.path.join(args.save, 'scores.tsv'), 'w') as f: print('\n'.join(scores), file=f) # remember best err@1 and save checkpoint is_best = val_err1 < best_err1 if is_best: best_err1 = val_err1 best_epoch = epoch print(Fore.GREEN + 'Best var_err1 {}'.format(best_err1) + Fore.RESET) # test_loss, test_err1, test_err1 = validate( # test_loader, model, criterion, epoch, True) # save test save_checkpoint({ 'args': args, 'epoch': epoch, 'best_epoch': best_epoch, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_err1': best_err1, }, is_best, args.save) if not is_best and epoch - best_epoch >= args.patience > 0: break print('Best val_err1: {:.4f} at epoch {}'.format(best_err1, best_epoch))
embedding_size=const.EMBEDDING_SIZE, number_of_layers=const.NUMBER_OF_LAYERS, number_of_heads=const.NUMBER_OF_HEADS, forward_expansion=const.FORWARD_EXPANSION, device=device, ).to(device) model.apply(model_utils.initialize_weights) optimizer = torch.optim.Adam(model.parameters(), lr=const.LEARNING_RATE) cross_entropy = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX) print(f'The model has {model_utils.count_parameters(model):,} trainable parameters') trainer = Trainer( const=const, optimizer=optimizer, criterion=cross_entropy, device=device, ) trainer.train( model=model, train_iterator=train_iterator, valid_iterator=valid_iterator, ) model.load_state_dict(torch.load('./checkpoints/model.best.pt')) trainer.test(model=model, test_iterator=test_iterator) bleu_score = inference_utils.calculate_bleu( data=test_data,
import cmd_args from train import Trainer import os import zipfile def _maybe_unpack_files(dataset_dir): for mode in ['training', 'validation', 'test']: zip_path = dataset_dir + '/%s.zip' % mode target_dir = dataset_dir + '/%s' % mode if os.path.exists(target_dir): continue with zipfile.ZipFile(zip_path, 'r') as f: print('Extracting %s' % zip_path) f.extractall(target_dir) if __name__ == '__main__': args = cmd_args.Args() _maybe_unpack_files(args.dataset_dir) trainer = Trainer(args) trainer.train()
def main(): """ Main File """ # Parse argument config, logger = parse_args() if config.kfold > 0 and not config.eval: logger.info("Splitting dataset into {0}-fold".format(config.kfold)) splitter.main(input_file=config.data_file, output_dir=config.root_path, verbose=config.verbose, kfold=config.kfold, pos=config.use_pos, log_file=config.data_log) tot_acc = 0 tot_prec = 0 tot_rec = 0 tot_f1 = 0 for i in range(0, config.kfold): # To match the output filenames k = str(i + 1) if not config.eval: logger.info("Starting training on {0}th-fold".format(k)) # Load data iterator dataloader = Dataloader(config, k) # Debugging purpose. Don't delete # sample = next(iter(train_iter)) # print(sample.TEXT) # Load model if config.use_char or config.use_graph: assert config.use_char ^ config.use_graph, "Either use Character-Level or Grapheme-Level. Not both!!!" lstm = CharLSTMTagger(config, dataloader).to(config.device) else: lstm = LSTMTagger(config, dataloader).to(config.device) # Print network configuration logger.info(lstm) model = Trainer(config, logger, dataloader, lstm, k) if not config.eval: # Train logger.info("Training started !!!") model.fit() # Test model.load_checkpoint() logger.info("Testing Started !!!") acc, prec, rec, f1 = model.predict() logger.info( "Accuracy: %6.2f%%; Precision: %6.2f%%; Recall: %6.2f%%; FB1: %6.2f " % (acc, prec, rec, f1)) tot_acc += acc tot_prec += prec tot_rec += rec tot_f1 += f1 logger.info( "Final Accuracy: %6.2f%%; Final Precision: %6.2f%%; Final Recall: %6.2f%%; Final FB1: %6.2f " % (tot_acc / config.kfold, tot_prec / config.kfold, tot_rec / config.kfold, tot_f1 / config.kfold))
agents_init = { "initial_position": agents[a].initial_position, "position": agents[a].position, "direction": agents[a].direction, "target": agents[a].target, "speed_data": agents[a].speed_data, "moving": agents[a].moving, "status": agents[a].status, "old_position": agents[a].old_position, "old_direction": agents[a].old_direction, "handle": agents[a].handle } agents_init_info.append(agents_init) last_position.update({a: agents[a].old_position}) trainer = Trainer(memory, agent, epochs, agents_init_info) if __name__ == '__main__': # TODO 八成是obs的问题 # TODO 或者reward的问题 # TODO 会不会不太适合这种从memory中取样训练的方式 print("agent_target:", agents_init_info[a]['target']) print("agent_init_position:", agents_init_info[a]['initial_position']) time.sleep(2) trainer.train(env, env_renderer) # agents for a in range(num_of_agents): agent[a].show(show_acc=False)
from train import Trainer HEIGHT = 28 WIDTH = 28 CHANNEL = 1 LATENT_SPACE_SIZE = 100 EPOCHS = 50001 BATCH = 32 CHECKPOINT = 500 MODEL_TYPE = -1 trainer = Trainer(height=HEIGHT, \ width=WIDTH, \ channels=CHANNEL, \ latent_size=LATENT_SPACE_SIZE, \ epochs=EPOCHS, \ batch=BATCH, \ checkpoint=CHECKPOINT, model_type=MODEL_TYPE) trainer.train()
# epoch, state dict first_epoch = args.epoch + 1 if first_epoch > 0: path_state_dict = logdir_train / f'{hp.model_name}_{args.epoch}.pt' if not path_state_dict.exists(): raise FileNotFoundError(path_state_dict) else: path_state_dict = None # Training + Validation Set dataset_temp = MulchWavDataset('train', n_file=hp.n_file) dataset_train, dataset_valid = MulchWavDataset.split(dataset_temp, (hp.train_ratio, -1)) # run trainer = Trainer(path_state_dict) if args.train: loader_train = DataLoader( dataset_train, batch_size=hp.batch_size, num_workers=hp.num_disk_workers, collate_fn=dataset_train.pad_collate, pin_memory=True, shuffle=True, ) loader_valid = DataLoader( dataset_valid, batch_size=hp.batch_size, num_workers=hp.num_disk_workers, collate_fn=dataset_valid.pad_collate, pin_memory=True,
def main(): # load pop for extrapolation pop = pickle.load(open("Results/CIFAR10_baseline/Run5/pop_extra.pkl", "rb")) for i in range(len(pop)): genome = pop[i].genome # parse the arguments args = parser.parse_args() args.save = os.path.join("Extrapolation_results", "Model_ID_{}".format(pop[i].id)) random.seed(args.manual_seed) torch.manual_seed(args.manual_seed) utils.saveargs(args) # initialize the checkpoint class checkpoints = Checkpoints(args) # Create Model # genome = [[[0], [0, 0], [0, 0, 0], [0, 0, 0, 1], [0, 0, 0, 0, 0], [0]], # [[0], [0, 0], [0, 1, 0], [0, 1, 0, 1], [0, 1, 1, 1, 1], [0]], # [[1], [0, 0], [0, 0, 0], [0, 1, 1, 1], [0, 0, 0, 0, 1], [1]]] # genome = [[[0], [0, 1], [1, 0, 0], [0, 0, 1, 1], [0, 0, 1, 1, 1], [0]], # [[0], [0, 1], [0, 0, 0], [0, 0, 1, 1], [1, 1, 1, 1, 1], [0]], # [[0], [0, 0], [0, 0, 1], [1, 1, 0, 1], [1, 1, 0, 1, 1], [1]]] models = Model(args, genome) model, criterion, num_params = models.setup() model = calculate_flops.add_flops_counting_methods(model) # print(model) # Data Loading dataloader = Dataloader(args) loaders = dataloader.create() # The trainer handles the training loop trainer = Trainer(args, model, criterion) # The trainer handles the evaluation on validation set tester = Tester(args, model, criterion) # start training !!! loss_best = 1e10 acc_test_list = [] acc_best = 0 for epoch in range(args.nepochs): # train for a single epoch start_time_epoch = time.time() if epoch == 0: model.start_flops_count() loss_train, acc_train = trainer.train(epoch, loaders) loss_test, acc_test = tester.test(epoch, loaders) acc_test_list.append(acc_test) if acc_test > acc_best: model_best = True # update the best test accu found so found acc_best = acc_test loss_best = loss_test checkpoints.save(epoch, model, model_best) time_elapsed = np.round((time.time() - start_time_epoch), 2) if epoch == 0: n_flops = (model.compute_average_flops_cost() / 1e6 / 2) if np.isnan(np.average(loss_train)): break print( "Epoch {:d}:, test error={:0.2f}, FLOPs={:0.2f}M, n_params={:0.2f}M, {:0.2f} sec" .format(epoch, 100.0 - acc_test, n_flops, num_params / 1e6, time_elapsed)) # save the final model parameter # torch.save(model.state_dict(), # "model_file/model%d.pth" % int(args.genome_id - 1)) pop[i].fitness[0] = acc_best pop[i].fitness[1] = n_flops pop[i].n_params = num_params # error = 100 - np.mean(acc_test_list[-3:]) # accuracy = acc_best # fitness = [acc_best, n_flops, num_params] # with open("output_file/output%d.pkl" % int(args.genome_id - 1), "wb") as f: # pickle.dump(fitness, f) with open("Results/CIFAR10_baseline/Run5/pop_extra_evaluated.pkl", "wb") as f: pickle.dump(pop, f)
def main(): kwargs = {} # building up the arguments for the test() function directory = raw_input(colored("Enter Corpus Directory['eg: corpus2'] : ", 'blue')) logging.info("Training against the directory {0} ".format(directory)) spam = raw_input(colored("Enter Spam Sub Directory[eg : 'spam']: ", 'blue')) ham = raw_input(colored("Enter Clean Emails Sub Directory[eg :'ham']: ", 'blue')) limit = raw_input(colored("Enter Limit of files per class(spam/ham)[eg: 1000]: ", 'blue')) directory = os.path.join(CORPUS_DIR, directory) spam = os.path.join(directory, spam) ham = os.path.join(directory, ham) # TO-DO: Raise custom exceptions in test.py:main() # ================================================ # raising custom exceptions: http://stackoverflow.com/a/9157277/3834059 # try: # try: # directory_exists = map( # lambda x: os.path.exists, [directory, spam, ham]) # # one of the entered directory or subdirectory doesn't exists # if False in directory_exists: # raise OSError('Directory does not exist!') # kwargs['directory'] = directory # kwargs['spam'] = spam # kwargs['ham'] = ham # except OSError as e: # print e # try: # kwargs['limit'] = int(limit) # except TypeError as err: # raise TypeError('Enter valid input for "limit": {0}'.format(err)) # except TypeError as e: # print colored(e, 'red') # # exiting # raise SystemExit # refer http://stackoverflow.com/a/19747562/3834059 # except OSError as e: # print colored(e, 'red') # raise SystemExit try: kwargs['directory'] = directory kwargs['spam'] = spam kwargs['ham'] = ham except ValueError: print colored("Switching back to the default test values:") logging.error("Switching back to the default test values:") kwargs['directory'] = 'corpus3' kwargs['spam'] = 'spam' kwargs['ham'] = 'ham' try: kwargs['limit'] = int(limit) except TypeError as e: print "Taking default value of limit (1000)" logging.error("Taking default value of limit (1000)") kwargs['limit'] = 500 trainer = Trainer(**kwargs) starting_time = datetime.now() classifier_object = trainer.train(verbose=1) end_time = datetime.now() elapsed = end_time - starting_time # ref: http://stackoverflow.com/a/1345852/3834059 minutes_elapsed, seconds_elapsed = divmod( elapsed.total_seconds(), 60)[0], divmod(elapsed.total_seconds(), 60)[1] print colored("Training took {min} minutes : {sec} seconds".format( min=minutes_elapsed, sec=seconds_elapsed ),'green') logging.info("Training took {min} minutes : {sec} seconds".format( min=minutes_elapsed, sec=seconds_elapsed )) # Testing the accuracy test(trainer, classifier_object, 'spam') test(trainer, classifier_object, 'ham')