def main(): args = get_argparser() # our dataset has two class classes = utils.parse_config(args.config_path) print(len(classes), args.num_classes, classes) assert len(classes) + 1 == args.num_classes, "Number of classes\ in config and argument is not same" # use our dataset and defined transformations dataset = loader.CellDataset(args.root_dir, utils.get_transform(args.model, train=True), args.labels_type, args.model, classes) dataset_test = loader.CellDataset(args.root_dir, utils.get_transform(args.model, train=False), args.labels_type, args.model, classes, mode="Test") indices = torch.arange(len(dataset)).tolist() dataset = torch.utils.data.Subset(dataset, indices[:int(len(indices) * 0.9)]) dataset_test = torch.utils.data.Subset(dataset_test, indices[int(len(indices) * 0.9):]) print("Images in Test set", len(dataset_test), "Images in Train set ", len(dataset)) # define training and validation data loaders data_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) model = models.get_model(args.model, args.weight_path, args.num_classes, args.max_instances, args.maskrcnn_backbone) if args.cuda: device = "cuda:0" model.to(device) else: device = "cpu" # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, # momentum=0.9, weight_decay=0.0005) print("\n\nStarting Training of ", args.model, "\n\n") optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) model_trainer = trainer.TrainModel(model, optimizer, args.model, device) for epoch in range(args.epochs): model_trainer.train(epoch, data_loader, data_loader_test) print("That's it!")
def argument_parser(sys_argv): # ARGUMENT HANDLING parser = argparse.ArgumentParser(prog='Test models') parser.add_argument('--data-config', help="configuration file path", required=True, type=str) parser.add_argument('--model-folder', help="Folder where the model is available", required=True, type=str) parser.add_argument('--results-folder', help="where to store probabilities of each class", type=str) parser.add_argument('--metrics', nargs='+', help="Metrics solicited", default=None, type=str) parser.add_argument('--datasets', nargs='+', help="Datasets to be evaluated", default=None, type=str) parser.add_argument('--overload', help="Pairs of parameters to overload", nargs='+', type=str) parser.add_argument('--cnn-out', action='store_true') args = parser.parse_args(sys_argv) qrel_file_flag = False if args.metrics: assert all(metric in AVAILABLE_METRICS for metric in args.metrics), \ "Supported metrics %s" % (" ".join(AVAILABLE_METRICS)) if 'NDCG20' in args.metrics or 'ERR20' in args.metrics: qrel_file_flag = True config = parse_config(args.data_config) if args.overload: config = edit_config(config, args.overload) # Remove train and dev from data config config['datasets'] = { dset: config['datasets'][dset] for dset in config['datasets'] if dset not in ['train', 'dev'] } # Force test to run on CPU os.environ["CUDA_VISIBLE_DEVICES"] = "-1" return config, args, qrel_file_flag
def initialize_pairs(root, binary): dataset = list() sets = os.listdir(root) sets = [i for i in sets if "set" in i] if binary == 2: classes = {"cell": 1} else: classes = utils.parse_config("config.txt") assert len(classes)+1 == binary assert len(sets) > 1, "No Sets found inside the data directory" if args.convert_to_coco: os.makedirs("./temp", exist_ok=True) for a_set in sets: target_json = {"shapes": []} set_path = os.path.join(root, a_set) tifs = glob.glob(os.path.join(set_path, "raw.tif")) pngs = glob.glob(os.path.join(set_path, "*png")) assert len(tifs) == 1, "Raw tif not found" masks = list() for key in classes.keys(): class_id = classes[key] class_name = key + ".png" if binary != 2 else "labeled.png" class_label_path = os.path.join(set_path, class_name) assert class_label_path in pngs, class_name+" Not Found" mask = cv2.imread(class_label_path, -1) polygons = mask_to_poly(mask, binary) for poly in polygons: target_json["shapes"].append( {"label": key, "points": poly}) print ("Converting", tifs[0]) with open(tifs[0].replace("tif", "json"), 'w') as f: json.dump(target_json, f) if args.convert_to_coco: im_path = "./temp/" + \ "_".join(tifs[0].split("/")[-2:]) target_json["imagePath"] = im_path.split("/")[-1] with open(im_path.replace("tif", "json"), "w") as f: json.dump(target_json, f) im = cv2.imread(tifs[0]) cv2.imwrite(im_path.replace("json", "tif"), im) print ("\nConverting to COCO...") json2coco.process( **{"labels": "config.txt", "input_dir": "./temp", "output_dir": args.out_dir}) if os.path.exists("./temp"): shutil.rmtree("./temp")
def __init__(self): super(GPT2Encoder, self).__init__(embed_size=768) self.codec = get_codec() self.gpt2_config = parse_config() self.gpt2_model = GPT2(self.gpt2_config) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') if not os.path.exists('gpt2-pytorch_model.bin'): print("Downloading GPT-2 checkpoint...") url = 'https://s3.amazonaws.com/models.huggingface.co/bert/gpt2-pytorch_model.bin' r = requests.get(url, allow_redirects=True) open('gpt2-pytorch_model.bin', 'wb').write(r.content) self.gpt2_model = load_weight( self.gpt2_model, torch.load('gpt2-pytorch_model.bin', map_location=device)) self.gpt2_model = self.gpt2_model.to(device) self.gpt2_model.eval()
def argument_parser(sys_argv): # ARGUMENT HANDLING parser = argparse.ArgumentParser(prog='Train models', ) parser.add_argument('--data-config', help="Data configuration file path", required=True, type=str) parser.add_argument('--model-config', help="Model configuration file path", required=True, type=str) parser.add_argument('--model-folder', help="Path to save model's outputs", required=True, type=str) parser.add_argument('--metrics', help="Metrics to calculate while training model", default=['ERR20', 'NDCG20'], nargs='+', type=str) parser.add_argument('--overload', help="Pairs of parameters to overload", nargs='+', type=str) parser.add_argument( '--round-robin', help= "If true, does every train combination (every train folder gets to be validation once)", default=False, action='store_true') args = parser.parse_args(sys_argv) data_config = parse_config(args.data_config) model_config = parse_config(args.model_config) if args.round_robin: assert 'dev' not in data_config['datasets'], \ "When using --round-robin, dev can't be specified, put all files under 'train'" assert len(data_config['datasets']['train']) >= 2, \ "Please provide more than 1 file for train when using --round-robin" # Get train combinations (leave 1 out for dev) train_combinations = [] aux_dict = {} for i, dev_file in enumerate(data_config['datasets']['train']): train_combinations.append(([ 'train_%d' % (x + 1) for x in range(len(data_config['datasets']['train'])) if x != i ], ['dev_%d' % (i + 1)])) aux_dict['train_%d' % (i + 1)] = [dev_file] aux_dict['dev_%d' % (i + 1)] = [dev_file] # For TREC qrel file model_config['val_qrel_file_%d' % i] = dev_file # For retraining if model_config['retrain']: model_config['train_qrel_files_%d' % i] = \ [d for x, d in enumerate(data_config['datasets']['train']) if x != i] # Replace with aux_dict data_config['datasets'] = aux_dict else: # Retain only train and dev data_config['datasets'] = { 'train': data_config['datasets']['train'], 'dev': data_config['datasets']['dev'] } train_combinations = [(['train'], ['dev'])] # For TREC qrel file assert len(data_config['datasets']['dev']) == 1, \ "Only provide one QREL file for dev" model_config['val_qrel_file_0'] = data_config['datasets']['dev'][0] if model_config['retrain']: model_config['train_qrel_files_0'] = data_config['datasets'][ 'train'] # Pass some keys of model_config to data_config data_config['sim_matrix_config'] = model_config['sim_matrix_config'] data_config['query_idf_config'] = model_config['query_idf_config'] data_config['num_negative'] = model_config['num_negative'] data_config['use_description'] = model_config['use_description'] data_config['use_topic'] = model_config['use_topic'] data_config['custom_loss'] = model_config['custom_loss'] if model_config['retrain']: data_config['retrain_mode'] = model_config['retrain_mode'] # if model_config['sim_matrix_config']['use_static_matrices'] and model_config['top_k'] != 0: # raise Exception("'use_embedding_layer' is set to True but 'top_k' != 0 and 'use_static_matrices' set to True, which makes embeddings useless") if 'embeddings_path' in data_config: model_config['embeddings_path'] = data_config['embeddings_path'] model_config['model_folder'] = args.model_folder for metric in args.metrics + [model_config['metric']]: assert metric in AVAILABLE_METRICS, \ "Unavailable metric %s" % metric config = { 'data': data_config, 'model': model_config, 'monitoring_metric': model_config['metric'], 'metrics': args.metrics, 'num_gpus': 1 } if args.overload: config = edit_config(config, args.overload) if 'gpu_device' in config['model']: # Bruteforced for Keras/TF if not isinstance(config['model']['gpu_device'], tuple): config['model']['gpu_device'] = [config['model']['gpu_device']] os.environ["CUDA_VISIBLE_DEVICES"] = "%s" % ','.join( str(x) for x in config['model']['gpu_device']) config['num_gpus'] = len(config['model']['gpu_device']) return config, train_combinations
model = get_model(config['arch']) trainer = eval(config['arch']['algorithm'])( config=config, model=model, criterion=criterion, train_loader=train_loader, post_process=post_p, metric=metric, validate_loader=validate_loader, converter=converter) trainer.train() if __name__ == '__main__': import os import sys project = 'tianrang-ocr' # 工作项目根目录 sys.path.append(os.getcwd().split(project)[0] + project) from utils.utils import parse_config args = init_args() assert os.path.exists(args.config_file) config = anyconfig.load(open(args.config_file, 'rb')) if 'base' in config: config = parse_config(config) mapping = Dict(config) main(mapping)
print "\nsnom multicast PnP Provisioning Server (mcserv)\n" print "(c) 2008-2009 snom technology AG\n" print "=" * 80 config['prov_uri'] = options.prov_uri # Configuration file has been provided # # NOTE: Local (command-line) options overwrite config file # configuration = ConfigParser.ConfigParser() if options.config: print "Reading configuration from %s" % options.config configuration.read(options.config) # Fixme: make sure the file exists (config, msconfig) = utils.parse_config(configuration, options) if not config['multistage']: print "Provisioning URI is %s\n" % config['prov_uri'] sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP) sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) sock.bind(('224.0.1.75', options.local_port)) mreq = struct.pack('4sl', socket.inet_aton('224.0.1.75'), socket.INADDR_ANY) sock.setsockopt(socket.IPPROTO_IP, socket.IP_ADD_MEMBERSHIP, mreq) if not options.local_ip and not config['local_ip']: config['local_ip'] = utils.get_ip_address() else: config['local_ip'] = options.local_ip
dropout=args.dropout).to(device) logging.info('\nNetwork architecture:\n{}'.format(str(model))) optim_handle = { 'adam': optim.Adam, 'sgd': optim.SGD, 'adagrad': optim.Adagrad, 'rmsprop': optim.RMSprop } optimizer = optim_handle[args.optim](model.parameters(), lr=args.lr, weight_decay=args.weight_decay) loss_fun_handle = {'f1': f1_loss, 'bce': nn.BCELoss(), 'hamming': hamming_loss} criterion = loss_fun_handle[args.loss_fun] logging.info(parse_config(args.__dict__) + '\n') def train(epoch): t = time.time() model.train() optimizer.zero_grad() output = model(features, adj) loss_train = criterion(output[idx_train], labels[idx_train]) loss_train.backward() optimizer.step() acc_train = accuracy(output[idx_train], labels[idx_train]) f1_train = f1_score(output[idx_train], labels[idx_train])