示例#1
0
def load_models(opt):

    models = []
    model_cnns = []

    for iteration in opt.iterations:
        print(iteration)

        # Load infos
        infos = load_infos(opt, iteration)

        ignore = [
            "id", "batch_size", "beam_size", "start_from_best", "input_json",
            "input_h5", "input_anno", "images_root", "aic_caption_path",
            "input_bu"
        ]

        for k in vars(infos['opt']).keys():
            if k not in ignore:
                vars(opt).update({k: vars(infos['opt'])[k]})

        # Setup cnn model
        model_cnn = setup_cnn(opt, iteration)
        # Setup model
        model = setup(opt, iteration)

        models.append(model)
        model_cnns.append(model_cnn)

    return models, model_cnns
示例#2
0
 def parse_texts(self):
     url = "%s/text/" % self.base_url
     content = self.request_content(url)
     # print content
     if content:
         pattern = re.compile(
             '<img.*?u-logo lazy".*?data-original="(.*?)".*?></a>.*?'
             +  # user icon url
             '<a.*?u-user-name.*?target="_blank">(.*?)</a>.*?'
             +  # user name 
             '<span.*?f-ib f-fr">(.*?)</span>.*?' +  # release time
             '<div.*?j-r-list-c-desc">(.*?)</div>',
             re.S)  # text
         items = re.findall(pattern, content)
         models = []
         for item in items:
             # name, icon
             user = User(item[0], item[1])
             # user, time, text, content_from
             model = TextModel(user, item[2], item[3], self.name)
             models.append(model)
         return models
     else:
         print 'request content error'
         return None
示例#3
0
 def parse_texts(self):
     url = "%s/" % self.base_url
     content = self.request_content(url)
     # print content
     if content:
         pattern = re.compile(
             '<img.*?"user-img lazy-load left".*?data-src="(.*?)".*?onerror.*?>.*?'
             +  # user icon url
             '<div.*?class="name-time-wrapper left">.*?<span.*?class="name">(.*?)</span>.*?'
             +  # user name 
             '<span.*?class="time timeago".*?>(.*?)</span>.*?'
             +  # release time
             '<div.*?class="upload-txt  no-mb">.*?<p>(.*?)</p>',
             re.S)  # text
         items = re.findall(pattern, content)
         models = []
         for item in items:
             # name, icon
             user = User(item[0], item[1])
             # user, time, text, content_from
             model = TextModel(user, item[2], item[3], self.name)
             models.append(model)
         return models
     else:
         print 'request content error'
         return None
示例#4
0
    def __init__(self, posts):
        m = []
        n = []
        fmap = posts.familyhash.copy()
        for family in fmap.keys():
            flist = fmap[family]
            if len(flist) >= 3:
                while (len(m)<=2):
                    try:
                        ele = flist.pop()
                    except Exception as e:
                        break
                    if (ele.syllables > 7 and  ele.syllables < 11):
                        m.append(ele.text)

        for family in fmap.keys():
            flist = fmap[family]
            if (len(flist)) >= 2:
                while(len(n)<=1):
                    try:
                        ele = flist.pop()
                    except Exception as e:
                        break
                    if (ele.syllables > 4 and ele.syllables < 8):
                        n.append(ele.text)

        if (len(m)+len(n)) < 5:
            return #Not a poem!

        self.poems += [[m[0],m[1],n[0],n[1],m[1]]]
示例#5
0
def get_model_lists():
        from django.utils.text import capfirst
	from django.utils.html import escape
	from django.contrib import admin
	
	def no_auto_fields(field):
		from django.db import models
		return not isinstance(field[2], models.AutoField)
	
	excluded_models = [Role, SupplyPlace, LateHealthPost]
	models = []	
	for model, m_admin in admin.site._registry.items():
		
		# fetch ALL fields (including those nested via
		# foreign keys) for this model
		
		fields = [{ "caption": escape(capt), "name": name, "help_text": field.help_text }
			for name, capt, field in filter(no_auto_fields, nested_fields(model))]
		
		# pass model metadata and fields array
		# to the template to be rendered

		#select models only related to rutfet
		if model._meta.app_label == "rutfet" and model not in excluded_models:
                        models.append({
                                "caption": capfirst(model._meta.verbose_name_plural),
                                "name":    model.__name__.lower(),
                                "app_label": model._meta.app_label,
                                "fields": fields
                        })

        return models
示例#6
0
def get_defined_models():
	import models
	import sqlalchemy
	members = dict(inspect.getmembers(models))
	members.pop('Base')
	models = list()
	for name, member in members.items():
		if isinstance(member, sqlalchemy.ext.declarative.api.DeclarativeMeta):
			models.append(member)
	return models
def main(args):

    print("Loading config file: ", args.config)
    params = utils.load_config_file(args.config)
    params["dataset_paths"] = utils.format_dataset_path(
        params["dataset_paths"])
    if "nyu" not in params:
        params["nyu"] = False

    # Data loading code
    print("Creating data loaders...")
    if params["nyu"]:
        from dataloaders.nyu import NYUDataset
        val_dataset = NYUDataset(params["dataset_paths"], split='val')
    else:
        val_dataset = Datasets.FastDepthDataset(params["dataset_paths"],
                                                split='val',
                                                depth_min=params["depth_min"],
                                                depth_max=params["depth_max"],
                                                input_shape_model=(224, 224),
                                                random_crop=False)

    # set batch size to be 1 for validation
    data_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=params["num_workers"],
        pin_memory=True)

    # Set GPU
    params["device"] = torch.device(
        "cuda:{}".format(params["device"])
        if params["device"] >= 0 and torch.cuda.is_available() else "cpu")
    print("Using device", params["device"])

    print("Loading models...")
    models = []
    model_names = []
    for model_dict in params["models"]:
        model_names.append(Path(model_dict["model_path"]).stem)
        model, _ = utils.load_model(model_dict, model_dict["model_path"],
                                    params["device"])
        model.to(params["device"])
        models.append(model)

    # Create output directory
    output_directory = os.path.join(params["save_folder"],
                                    ".".join(model_names))
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    params["output_directory"] = output_directory
    print("Saving results to " + output_directory)

    compare_models(params, data_loader, models)
def compile_all_results (scenarios, dir='../data'):
    """
    Compiles the results across multiple scenarios produced by running run_on_cluster on each 
    one into a single sv file. The specified directory must be where where the results of 
    running run_on_cluster for each scenario are stored (each is a sub-directory named v0, v1, etc.)
    and is also where the output from this function will be saved.    
    """

    models = []
    causes = []
    time = []
    true_cf = []
    true_std = []
    std_bias = []
    mean_abs_err = []
    median_abs_err = []
    mean_rel_err = []
    median_rel_err = []
    mean_csmf_accuracy = []
    median_csmf_accuracy = []
    mean_coverage_bycause = []
    mean_coverage = []
    percent_total_coverage = []
    scenario = []

    for i in range(scenarios):
        for j in ['bad_model', 'latent_simplex']: 
            read = csv.reader(open('%s/v%s/%s_summary.csv' % (dir, i, j)))
            read.next()
            for row in read: 
                models.append(row[0])
                causes.append(row[1])
                time.append(row[2])
                true_cf.append(row[3])
                true_std.append(row[4])
                std_bias.append(row[5])
                mean_abs_err.append(row[6])
                median_abs_err.append(row[7])
                mean_rel_err.append(row[8])
                median_rel_err.append(row[9])
                mean_csmf_accuracy.append(row[10])
                median_csmf_accuracy.append(row[11])
                mean_coverage_bycause.append(row[12])
                mean_coverage.append(row[13])
                percent_total_coverage.append(row[14])
                scenario.append(i)

    all = pl.np.core.records.fromarrays([scenario, models, time, true_cf, true_std, causes, mean_abs_err, median_abs_err, mean_rel_err, median_rel_err, 
                                         mean_csmf_accuracy, median_csmf_accuracy, mean_coverage_bycause, mean_coverage, percent_total_coverage], 
                                        names=['scenario', 'model', 'time', 'true_cf', 'true_std', 'cause', 'mean_abs_err', 'median_abs_err', 
                                         'mean_rel_err', 'median_rel_err', 'mean_csmf_accuracy', 'median_csmf_accuracy', 
                                         'mean_covearge_bycause', 'mean_coverage', 'percent_total_coverage'])
    pl.rec2csv(all, fname='%s/all_summary_metrics.csv' % (dir))  
示例#9
0
def fit(csv_file,
        classifiers=None,
        features=None,
        svm_settings=None,
        knn_settings=None,
        lda_settings=None):
    """
    Function gets user input of which features to perform the fitting on. 
    User can also choose which classifier to use

    Arguments:
        csv_file:       the file path to csv file containing data. This is formatted and split into
                        training and control data
        classifiers:    the array of classifier names
    
    Returns:
        models:         array of trained classifiers
        control_data:   data you can use to control the fit of the data per classifier
    """

    if (classifiers == None):
        classifiers = select_classifiers()

    if (features == None):
        features = select_features(csv_file)

    training_data, control_data = data.get_dataframes(csv_file, features)
    target_data = training_data['diabetes']
    training_data = training_data.drop(columns=['diabetes'])

    models = []  #The array containing trained models
    for c in classifiers:
        if c == 'svm':
            model = get_model(c, training_data, target_data, svm_settings)
        elif c == 'knn':
            model = get_model(c, training_data, target_data, knn_settings)
        elif c == 'lda':
            model = get_model(c, training_data, target_data, lda_settings)
        else:
            model = get_model(c, training_data, target_data)

        if model == None:
            pass
        else:
            models.append((c, model))

    return models, training_data, target_data, control_data
示例#10
0
 def parse_images(self):
     url = '%s/pic/' % self.base_url
     content = self.request_content(url)
     # print content
     if content:
         pattern = re.compile(
             '<img.*?"user-img lazy-load left".*?data-src="(.*?)".*?onerror.*?>.*?'
             +
             '<div.*?class="name-time-wrapper left">.*?<span.*?class="name">(.*?)</span>.*?'
             + '<span.*?class="time timeago".*?>(.*?)</span>.*?' +
             '<div.*?class="upload-txt.*?<p>(.*?)</p>.*?' +
             '<img.*?class="upload-img lazy".*?data-src="(.*?)".*?>',
             re.S)  #
         items = re.findall(pattern, content)
         models = []
         for item in items:
             user = User(item[0], item[1])
             # (self, user, time, image_url, text, content_from):
             model = ImageModel(user, item[2], item[3], item[4], self.name)
             models.append(model)
         return models
示例#11
0
 def parse_images(self):
     url = '%s/pic/' % self.base_url
     content = self.request_content(url)
     # print content
     if content:
         pattern = re.compile(
             '<img.*?u-logo lazy".*?data-original="(.*?)".*?></a>.*?'
             +  # user icon url
             '<a.*?u-user-name.*?target="_blank">(.*?)</a>.*?'
             +  # user name 
             '<span.*?f-ib f-fr">(.*?)</span>.*?' +  # release time
             '<div.*?j-r-list-c-desc">(.*?)</div>.*?' +  # text
             '<img.*?class="lazy".*?data-original="(.*?)".*?>',
             re.S)  #image url
         items = re.findall(pattern, content)
         models = []
         for item in items:
             user = User(item[0], item[1])
             # (self, user, time, image_url, text, content_from):
             model = ImageModel(user, item[2], item[3], item[4], self.name)
             models.append(model)
         return models
示例#12
0
def getRelatedModels(entry):
    entries = {}
    models = []
    try:
        for query, fk in reversed(list(entry.dependencies())):
            #for x in dir(fk):
            #print x
            for x in fk.model_class.select().where(query):
                #print 'here:'
                #print x
                modelname = fk.model_class.__name__
                try:
                    entries[modelname].append(x)
                except:
                    models.append(modelname)
                    entries[modelname] = []
                    entries[modelname].append(x)

                #entries.append((fk.model_class.__name__, x))
    except:
        pass
    return (models, entries)
示例#13
0
def init():
    if os.path.exists('./saliency_data/{}'.format(args.name)):
        models = []
        folds = []
        for i in range(len(os.listdir('./saliency_data/{}/models'.format(args.name)))):
            models.append(keras.models.load_model('./saliency_data/{}/models/model{}.hdf5'.format(args.name, i)))
            folds.append((pd.read_pickle('./saliency_data/{}/data/train{}.p'.format(args.name, i)), pd.read_pickle('./saliency_data/{}/data/test{}.p'.format(args.name, i)), pd.read_pickle('./saliency_data/{}/data/val{}.p'.format(args.name, i))))
    else:
        models = []
        folds = deepconv_kfold.get_data('min', 'max', cols, args.preprocessing, args.simple, args.test_split, args.runs)
        if args.fold is not None:
            folds = [folds[args.fold]]
        for i, fold in enumerate(folds):
            train, test, val = fold
            model = deepconv_kfold.train_model_fold(train, val, args.model, args.epochs, i, use_keras=True)
            models.append(model)
            os.makedirs('./saliency_data/{}/models/'.format(args.name), exist_ok=True)
            os.makedirs('./saliency_data/{}/data/'.format(args.name), exist_ok=True)
            model.save('./saliency_data/{}/models/model{}.hdf5'.format(args.name, i))
            test.to_pickle('./saliency_data/{}/data/test{}.p'.format(args.name, i))
            train.to_pickle('./saliency_data/{}/data/train{}.p'.format(args.name, i))
            val.to_pickle('./saliency_data/{}/data/val{}.p'.format(args.name, i))
    return models, folds
def input_layer(sh_list, sh_test, sh_c_list, shm_list, train_loader,
                test_loader, model, rank, split, batch_size, batch_num,
                test_batch_num, epoch_num, lamda, lr, cv):

    update = 0
    feed_q1 = sh_list[rank]
    grad_q1 = sh_list[rank + split - 1]  #split = 3

    send_output = sh_c_list[rank]

    feed_test = sh_test[rank]

    send_target = shm_list[0]

    models = []
    outputs = []
    inputs = []
    optim = []

    n = -1 * (rank - (split - 1))
    #num_of_models = 2*split - 1
    #num_of_models = n + 1
    num_of_models = split
    #delay = n *(2)# + 1
    delay = n

    #model.reset_parameters()
    for i in range(num_of_models):
        models.append(copy.deepcopy(model))
        outputs.append(0)
        inputs.append(0)
        optim.append(
            torch.optim.SGD(models[i].parameters(),
                            lr=lr,
                            momentum=0.9,
                            weight_decay=0.0005,
                            nesterov=True))
        #optim.append(torch.optim.Adam(models[i].parameters(),lr=1e-4))
        #optim.append(torch.optim.SGD(models[i].parameters(),lr=lr))
    for i in models:
        i.cuda(rank)
    model.cuda(rank)

    #data = data_set[:,:-mnist_data.NUM_LABELS]
    time_tot = 0

    steps = int(batch_num / lamda)
    if batch_num % lamda != 1:
        steps += 1
    lamda_back = lamda

    for epoch in range(epoch_num):

        #with torch.autograd.profiler.profile() as prof:
        s_t_u = resource_usage(RUSAGE_SELF)
        s_t = timestamp()
        model.train()
        for i in models:
            i.train()
        train_data = train_loader.__iter__()
        t = 0

        t1 = 0
        t2 = 0
        t3 = 0
        t4 = 0
        t5 = 0
        t6 = 0
        t7 = 0
        t8 = 0
        td1 = 0
        td2 = 0
        td3 = 0
        td4 = 0
        td5 = 0
        td6 = 0

        #for time in range(1,(batch_num + 2 * split - (rank + 1) - 1 + 1)):
        for step in range(1, steps + 1):
            #off = (step-1)*lamda

            #cv.acquire()
            #cv.wait()
            #cv.notify_all()
            #cv.release()
            #cv.sync(rank)

            lamda = lamda_back
            if step == steps:
                lamda = batch_num - (step - 1) * lamda
            #print('step',step,'lamda',lamda)

            for time in range(1, lamda + delay + 1):

                #if time <= off + lamda :
                if time <= lamda:

                    #offset = (time-1) * batch_size
                    t1 = timestamp()
                    #offset = t * batch_size
                    data, target = next(train_data)
                    send_target.send(target)
                    #print('rank',rank,time,target)

                    #x = x.view(-1,784)
                    #input_feat = Variable(data,requires_grad=True).to("cuda:0")
                    data = data.cuda(rank, non_blocking=True)
                    #input_feat = Variable(data[offset:offset+batch_size,:],requires_grad=True).cuda(rank)
                    t2 = timestamp()
                    #print(input_feat)
                    #print(input_feat.size())

                    model_idx = (time % num_of_models) - 1
                    #output = models[model_idx].forward(input_feat)
                    output = models[model_idx].forward(data)
                    #inputs[model_idx] = input_feat
                    outputs[model_idx] = output
                    t3 = timestamp()
                    #print(output.size())
                    #output_send = output.to("cpu")
                    feed_q1.send_wait()
                    send_output.copy_(output.data)
                    #print('send',t,send_output)
                    feed_q1.async_send_signal()
                    #feed_q1.send(output.data.to("cpu"))
                    t += 1
                    t4 = timestamp()

                if time > delay:  #  t-(2K-k-1)
                    #if time >= 1+ delay :   #  t-(2K-k-1)
                    t5 = timestamp()
                    pg = grad_q1.recv()
                    pg = pg.cuda(rank)
                    t6 = timestamp()
                    output_idx = ((time - delay) % num_of_models) - 1
                    optimizer = optim[output_idx]
                    optimizer.zero_grad()
                    output = outputs[output_idx]
                    output.backward(pg)
                    #a = list(models[output_idx].parameters())[0].clone()
                    optimizer.step()
                    t7 = timestamp()
                    #b = list(models[output_idx].parameters())[0].clone()
                    #print(torch.equal(a.data,b.data))
                td1 += t2 - t1
                td2 += t3 - t2
                td3 += t4 - t3

                td4 += t6 - t5
                td5 += t7 - t6

            #print(time)
            #feed_q1.init()
            #grad_q1.init()
            model.init_zero()

            with torch.cuda.device(rank):

                for i in range(num_of_models):
                    j = models[i].parameters()
                    for k in model.parameters():
                        #k = 0
                        l = j.__next__()
                        k.requires_grad_(False)
                        k.copy_(k.data + l.data / num_of_models)

                for i in range(num_of_models):
                    j = model.parameters()
                    for k in models[i].parameters():
                        l = j.__next__()
                        k.requires_grad_(False)
                        k.copy_(l.data)
                        k.requires_grad_(True)

        #print('average_done worker 1')

        e_t_u = resource_usage(RUSAGE_SELF)
        e_t = timestamp()
        u_t = e_t_u.ru_stime - s_t_u.ru_stime
        t = e_t - s_t
        time_tot = time_tot + t
        #print('node1 user time = %f time = %f time_tot = %f' % ( u_t , t, time_tot))
        #print(prof)
        print('rank =', rank, 'recv output =', td1)
        print('rank =', rank, 'forward =', td2)
        print('rank =', rank, 'send output', td3)
        print('rank =', rank, 'recv grad =', td4)
        print('rank =', rank, 'backward =', td5)

        model.eval()
        for i in models:
            i.eval()

        for data, target in test_loader:
            #for i in range(test_batch_num):
            #print(data,target)
            #print('rank',rank,target)
            #offset = i * batch_size
            #x = Variable(test_set[offset:offset+batch_size,:])
            x = Variable(data).cuda(rank)

            #x = x.view(-1,784)
            #x = x.to("cuda:0")
            output = model.forward(x)
            #output = output.to("cpu")
            #print(output.size())
            feed_test.send(output.data.to('cpu'))
            #i += 1
        if epoch == 150 or epoch == 225:
            lr = lr * 0.1
            for i in optim:
                for j in i.param_groups:
                    j['lr'] = lr
示例#15
0
def predict_fast(args):

    transforms = Compose([])
    dataset = OpenEDSDatasetTest(data_path=args.data_path,
                                 labels_file=args.label_file,
                                 save_path=args.save_dir,
                                 transforms=transforms,
                                 normalize={
                                     "mean": [0.485, 0.456, 0.406],
                                     "std": [0.229, 0.224, 0.225]
                                 },
                                 cumulative=True)
    data_loader = DataLoader(dataset,
                             batch_size=1,
                             num_workers=8,
                             shuffle=False,
                             pin_memory=False)

    models = []
    for model_config in configs:
        conf = load_config(model_config.config_path)
        models_zoo = conf.get('models_zoo', 'selim')
        if models_zoo == 'qubvel':
            import segmentation_models_pytorch as smp
            model = smp.Unet(encoder_name=conf['encoder'],
                             classes=conf['num_classes'])
        else:
            model = models.__dict__[conf['network']](
                seg_classes=4, backbone_arch=conf['encoder'])
        model = torch.nn.DataParallel(model).cuda()

        checkpoint_path = model_config.weights_path
        checkpoint = torch.load(checkpoint_path, map_location="cpu")
        model.load_state_dict(checkpoint['state_dict'])
        model.eval()
        models.append([model, model_config])

    with torch.no_grad():
        for sample in tqdm(data_loader):
            imgs = sample["image"].cuda().float()

            preds_dict = {}
            for model, model_config in models:

                output = model(imgs)
                output_flip = torch.flip(model(torch.flip(imgs, dims=(3, ))),
                                         dims=(3, ))

                output = (output + output_flip) / 2
                output = output.cpu()

                for i in range(output.shape[0]):
                    img_name = sample["img_name"][i]

                    if img_name not in preds_dict:
                        preds_dict[img_name] = {
                            'output': output[i] * model_config.weight,
                            'total_weight': model_config.weight
                        }
                    else:
                        preds_dict[img_name][
                            'output'] += output[i] * model_config.weight
                        preds_dict[img_name][
                            'total_weight'] += model_config.weight

            preds_total = normalize_preds(preds_dict)
            save_preds(args, preds_total)
示例#16
0
        for j in range(int((n_frames - n_trains[i])/10)):
            (x, y) = next(test_gen)
            test_preds[j*batch_size:(j+1)*batch_size, :] = net.forward(x).cpu().detach().numpy()
            test_targets[j*batch_size:(j+1)*batch_size, :] = y.cpu().detach().numpy()

        temp = pearsonr(train_preds.squeeze(), train_targets.squeeze())
        train_pcc[i] = temp[0]
        temp = pearsonr(test_preds.squeeze(), test_targets.squeeze())
        test_pcc[i] = temp[0]
        temp = spearmanr(train_preds.squeeze(), train_targets.squeeze())
        train_srocc[i] = temp[0]
        temp = spearmanr(test_preds.squeeze(), test_targets.squeeze())
        test_srocc[i] = temp[0]

        models.append(net)

    savemat('results/nn_' + str(args.n_feats) + '_train_size_analysis.mat', {'nn_' + str(args.n_feats) + '_train_pcc': train_pcc, 'nn_' + str(args.n_feats) + '_test_pcc': test_pcc,
            'nn_' + str(args.n_feats) + '_train_srocc': train_srocc, 'nn_' + str(args.n_feats) + '_test_srocc': test_srocc})

elif args.mode == 'analyze_scale_qp':

    n_train = args.train_size

    Net = models.model_class[args.model]
    data_gen_function = models.data_generator[args.model]

    net = Net(n_feats, h_size).cuda()
    train_gen = data_gen_function(n_train, n_feats, batch_size, args.data_path, 'train')
    net.train(train_gen, int(n_train/10), int(args.epochs))
    test_gen = data_gen_function(n_train, n_feats, batch_size, args.data_path, 'test')
示例#17
0

if __name__ == '__main__':
    args = argparser()
    # model_types = sorted(Prediction_Gammas.keys())
    # model_types = ['dphprg','dphprgln','dphpg','dppn']
    # model_types = ['dppprg','dppprgln','dpppg','dpppgln','dppn']
    model_types = [
        'sdppprg', 'sdpppg', 'sdpppgln', 'sdppprgln', 'mdppprg', 'mdpppg'
    ]

    models = []
    for model_type in model_types:
        mm = glob.glob(os.path.join(args.path, model_type, 'results*.db'))
        for m in mm:
            models.append((model_type, m))

    pplrs = []
    for model in models:
        print('Processing model {}   '.format(model[0]), end=' ')
        try:
            pplrs.append(ppl_generation(model))
            print('Passed')
        except pd.io.sql.DatabaseError:
            print('Failed')
            pass

    df = pd.DataFrame(
        pplrs,
        # columns = ('type','name','PPL_L1','PPL_L2','PPL_Linf','ES_Linf'),
        columns=('type', 'name', 'PPL_Linf', 'ES_Linf', 'PPL_Linf_F',
示例#18
0
    return kldr


if __name__ == '__main__':
    args = argparser()
    paths = glob.glob(os.path.join(args.path, 'sim_*'))
    # model_types = ['dphpg','dphprg','dphprgln','dppn','vhpg']
    model_types = ['dpppg', 'dppprg', 'dppprgln', 'dppn', 'vppg']
    models = []
    gens = []

    for path in paths:
        for model_type in model_types:
            mm = glob.glob(os.path.join(path, model_type, 'results*.db'))
            for m in mm:
                models.append(m)

        gg = glob.glob(os.path.join(path, 'data.db'))
        for g in gg:
            gens.append(g)

    pool = Pool(processes=cpu_count(), initializer=limit_cpu)
    kldrs = list(pool.map(kl_wrapper, models + gens))
    pool.close()

    df = pd.DataFrame(
        [(x[0], x[1], *x[2]) for x in kldrs],
        columns=['type', 'scenario'] +
        ['k_{}'.format(i) for i in range(len(kldrs[0][2]))],
    )
    df.to_csv(os.path.join(args.path, 'kl_divergence.csv'), index=False)
示例#19
0
def make_predictions(file,modelPath):
    trainingDataFiles = [file]#glob.glob("/scratch/staff/ak1774/shared_folder/data/train/*.h5")
    data = data_loader.load_data_from_file(trainingDataFiles[0])

    models = []
    for i in range(1,2):
        print(i)
        models.append( load_pytorch_model('ModelData/' +str(i) +'/' +'model.model',
                            get_config('/' +str(i) +'/config.json'), data) )



    


    #fullGameData,fullGameLabels = data_loader.getSequencialNaive(data,hero_feature_indicies,label_indicies)


    xLims = data['time'].values

    #¢health = data['player_4_m_iHealth'].values

    
    #######################
    # get original health
    ######################

    norm_stats = None
    with open("norm_stats.pickle", 'rb') as f:
        norm_stats = pickle.load(f)

    for label,min_value,max_value in normalization_stats:
        if "_m_iHealth" in label:
            health_min = min_value
            health_max = max_value
        if "m_iMaxHealth" in label:
            maxhealth_min = min_value
            maxhealth_max = max_value

    healthes = []
    max_healthes = []
    relative_healthes = []
    for i in range(0,10):
        health_vals = data['player_' + str(i) + '_m_iHealth'].values
        maxhealth_vals = data['player_' + str(i) + '_m_iMaxHealth'].values

        health_vals = health_vals * (health_max - health_min) + health_min
        maxhealth_vals = maxhealth_vals * (maxhealth_max - maxhealth_min) + maxhealth_min

        relative_health_vals = health_vals / maxhealth_vals # hopefully maxhealth is never 0

        healthes.append(health_vals)
        max_healthes.append(maxhealth_vals)
        relative_healthes.append(relative_health_vals)


    #######################
    # get death times
    ######################

    labels = [(i,label) for i,label in  enumerate(list(data))]
    death_time_indicies = preprocess.labels_to_indicies(preprocess.select_features_by_name("time_until_next_death",labels))
    death_times = data.values[:,death_time_indicies].astype(np.float32)



    for m in models:

        X = [torch.from_numpy(hero_X) for hero_X in m.fullGameData]

        pred = model(X)
        pred = torch.sigmoid(pred)
        pred = pred.cpu().detach().numpy()

        y = m.fullGameLabels




    currentMeanTrueAccuracy = 0
    currentMeanFalseAccuracy=0

    numTruePos = 0
    numFalsePos = 0
    numTrueNeg = 0
    numFalseNeg = 0
    for i in range(0,data.shape[0]):
        predX = 0
        for m in models:
            y = m.fullGameLabels[i]
            y = np.array(y)
            y = np.expand_dims(y,0)

            X = [torch.from_numpy(hero_X[i:(i+1),:]) for hero_X in m.fullGameData]
            print(i)
            #predX = averagePred(models,X)
            predX = modelPred(m.model,X) +predX


        predX = predX/len(models)

        '''
        true_pos = ((predX > 0.5) == (y > 0.5)).reshape(-1).astype(np.float32)
        true_neg = ((predX < 0.5) == (y <0.5)).reshape(-1).astype(np.float32)
        false_pos = ((predX > 0.5) == (y < 0.5)).reshape(-1).astype(np.float32)
        false_neg = ((predX < 0.5) == (y > 0.5)).reshape(-1).astype(np.float32)

        for pos in true_neg:
            if pos ==1:
                numTrueNeg +=1
        for pos in false_neg:
            if pos ==1:
                numFalseNeg +=1

        for pos in true_pos:
            if pos ==1:
                numTruePos +=1
        for pos in false_pos:
            if pos ==1:
                numFalsePos +=1
        '''


        prediction = predX
        currentMeanTrueAccuracy += np.mean(true_pos)
        currentMeanFalseAccuracy += np.mean(false_pos)

        prediction = np.squeeze(prediction,0)
        if i %3000 ==0:
            print('Current true pos ' +str(currentMeanTrueAccuracy/(i+1)))
            print('Current false pos ' +str(currentMeanFalseAccuracy/(i+1)))

        heroStuff.append(prediction)
        labelStuff.append(np.squeeze(y,0))

    print()
    print(numTruePos)
    print(numTrueNeg)
    print()
    print(numFalsePos)
    print(numFalseNeg)

    print()
    print('True Pos = ' + str(currentMeanTrueAccuracy/19326))
    print('False pos = ' + str(currentMeanFalseAccuracy/19326))

    heroStuff1 = np.swapaxes(heroStuff,0,1)
    labelStuff1= np.swapaxes(labelStuff,0,1)

    xLims = xLims - xLims[0] - 90

    np.save('hero.npy', np.array(heroStuff1))
    np.save('label.npy', np.array(labelStuff1))
    np.save('xLims.npy', np.array(xLims))
    np.save('health.npy',np.array(healthes))
def hidden_layer(sh_list, sh_test, sh_c_list, model, rank, split, batch_num,
                 test_batch_num, epoch_num, lamda, lr, cv):

    feed_q1 = sh_list[rank - 1]
    grad_q1 = sh_list[rank + split - 2]
    send_output = sh_c_list[2 * rank]
    send_grad = sh_c_list[2 * rank - 1]

    feed_test = sh_test[rank - 1]

    if split > 2:
        feed_q2 = sh_list[rank]
        grad_q2 = sh_list[rank + split - 1]
        feed_test2 = sh_test[rank]

    models = []
    outputs = []
    inputs = []
    optim = []

    n = -1 * (rank - (split - 1))
    #num_of_models = 2*split - 1
    num_of_models = split
    #delay = n *(2)# + 1
    #delay = 2* split -(rank+1) #- 1
    delay = n

    #model.reset_parameters()
    for i in range(2 * split - 1):
        models.append(copy.deepcopy(model))
        outputs.append(0)
        inputs.append(0)
        optim.append(
            torch.optim.SGD(models[i].parameters(),
                            lr=lr,
                            momentum=0.9,
                            weight_decay=0.0005,
                            nesterov=True))
        #optim.append(torch.optim.Adam(models[i].parameters(),lr=1e-4))
        #optim.append(torch.optim.SGD(models[i].parameters(),lr=lr))
    for i in models:
        i.cuda(rank)
    model.cuda(rank)
    time_tot = 0

    steps = int(batch_num / lamda)
    if batch_num % lamda != 0:
        steps += 1
    lamda_back = lamda
    t = 0

    for epoch in range(epoch_num):
        #with torch.autograd.profiler.profile() as prof:
        s_t_u = resource_usage(RUSAGE_SELF)
        s_t = timestamp()

        model.train()
        for i in models:
            i.train()
        t = 0

        t1 = 0
        t2 = 0
        t3 = 0
        t4 = 0
        t5 = 0
        t6 = 0
        t7 = 0
        t8 = 0
        td1 = 0
        td2 = 0
        td3 = 0
        td4 = 0
        td5 = 0
        td6 = 0

        ##########################################################################################################
        #for time in range(1,(batch_num + 2*split - (rank + 1) -1 + 1)):
        for step in range(1, steps + 1):
            #off = (step-1)*lamda
            #for time in range(off+1 , off+lamda + delay ):

            #cv.acquire()
            #cv.wait()
            #cv.release()
            #cv.sync(rank)
            lamda = lamda_back
            if step == steps:
                lamda = batch_num - (step - 1) * lamda

            #print(rank,'steps',steps,'step',step,'lamda',lamda)
            for time in range(1, lamda + delay + 1):

                #if time <= off + lamda: # k = 2 ; t >= k
                if time <= lamda:  # k = 2 ; t >= k

                    t1 = timestamp()
                    x = feed_q1.recv()
                    x = x.cuda(rank, non_blocking=True)
                    #print('recv',x)
                    t2 = timestamp()

                    input_feat = Variable(x, requires_grad=True)
                    #input_feat = input_feat.to("cuda:1")

                    model_idx = (time % num_of_models) - 1
                    output = models[model_idx].forward(input_feat)
                    inputs[model_idx] = input_feat
                    outputs[model_idx] = output
                    t3 = timestamp()
                    feed_q2.send_wait()
                    send_output.copy_(output.data)
                    feed_q2.async_send_signal()
                    t += 1
                    t4 = timestamp()

                #pg = grad_q2.get()
                #if len(pg) > 0:
                #if time > delay:   #  t-(2K-k-1)
                if time > delay:  #  t-(2K-k-1)

                    t5 = timestamp()
                    pg = grad_q2.recv()
                    pg = pg.cuda(rank)
                    t6 = timestamp()

                    output_idx = ((time - delay) % num_of_models) - 1
                    optimizer = optim[output_idx]
                    optimizer.zero_grad()
                    output = outputs[output_idx]
                    output.backward(pg)
                    #outputs[output_idx].backward(pg)
                    #a = list(models[output_idx].parameters())[0].clone()
                    optimizer.step()
                    t7 = timestamp()
                    #outputs[output_idx].backward(pg)
                    #b = list(models[output_idx].parameters())[0].clone()
                    #print(torch.equal(a.data,b.data))
                    #grad = inputs[output_idx].grad.data.to('cpu')

                    grad_q1.send_wait()
                    send_grad.copy_(inputs[output_idx].grad.data)
                    #grad = pg
                    grad_q1.async_send_signal()
                    t8 = timestamp()
                    #outputs[output_idx].backward(pg)
                td1 += t2 - t1
                td2 += t3 - t2
                td3 += t4 - t3

                td4 += t6 - t5
                td5 += t7 - t6
                td6 += t8 - t7
        ###############################################################################################################

        #feed_q2.init()
        #grad_q2.init()
        #print(time)
            model.init_zero()

            with torch.cuda.device(rank):

                for i in range(num_of_models):
                    j = models[i].parameters()
                    for k in model.parameters():
                        #k = 0
                        l = j.__next__()
                        k.requires_grad_(False)
                        k.copy_(k.data + l.data / num_of_models)

                for i in range(num_of_models):
                    j = model.parameters()
                    for k in models[i].parameters():
                        l = j.__next__()
                        k.requires_grad_(False)
                        k.copy_(l.data)
                        k.requires_grad_(True)

        #print('average_done')
        e_t_u = resource_usage(RUSAGE_SELF)
        e_t = timestamp()
        u_t = e_t_u.ru_stime - s_t_u.ru_stime
        t = e_t - s_t
        time_tot = time_tot + t
        #print('node2 user time = %f time = %f tot_time = %f' % ( u_t , t, time_tot))
        #print(prof)
        print('rank =', rank, 'recv output =', td1)
        print('rank =', rank, 'forward =', td2)
        print('rank =', rank, 'send output', td3)
        print('rank =', rank, 'recv grad =', td4)
        print('rank =', rank, 'backward =', td5)
        print('rank =', rank, 'send grad =', td6)

        model.eval()
        for i in models:
            i.eval()
        #for data,target in test_loader:
        for i in range(test_batch_num):
            x = feed_test.recv()
            x = x.cuda(rank)
            output = model.forward(x)
            #output = output.to('cpu')
            feed_test2.send(output.data.to('cpu'))

        if epoch == 150 or epoch == 225:
            lr = lr * 0.1
            for i in optim:
                for j in i.param_groups:
                    j['lr'] = lr
示例#21
0
    def generate_models(self, queue=None, output_fits=True):
        """ gblend.generate_models( queue=None, output_fits=True )
		
		Generate all the high resolution models used in the fitting process.
		The high resolution catalog must be set before models can be generated.
		If output_fits = True then a fits file will be generated with the inital models. This will be created in the model_dir.
		queue is used to return models when run in parallel mode. """

        if not self.hres_loaded:
            raise ValueError(
                'You must set the high resolution catalog with gblend.set_hres_catalog() before generating models!'
            )

        imgs = []

        # loop through high resolution models
        for (i, model_obj) in enumerate(self):

            # catch any and all errors
            try:

                # generate models
                model_obj.generate_model(
                    self.pad, (self.ny, self.nx),
                    self.psf.copy(),
                    use_integration=self.config['use_integration'])

                # the rest is for outputting fits files of input models - skip if output_fits==False
                if not output_fits: continue

                # generate extension for outputting to fits
                imgs.append(pyfits.ImageHDU(model_obj.model_img.copy()))

                # add to complete model
                if i == 0:
                    full_model = model_obj.model_img.copy()
                else:
                    full_model += model_obj.model_img.copy()

            except KeyboardInterrupt:
                raise
            except:
                # on exception return the traceback string.  Also return the traceback string to the queue if was passed
                trace = traceback.format_exc()
                if queue is not None: queue.put((self.number, trace))
                return trace

        # output a fits file for the full model and individual models
        if output_fits:
            # prepare hdulist
            hdus = [pyfits.PrimaryHDU(full_model)]
            hdus.extend(imgs)
            hdulist = pyfits.HDUList(hdus)

            # write out file - delete first
            file = '%s%d_input.fits' % (self.config['model_dir'], self.number)
            if os.path.isfile(file): os.remove(file)
            hdulist.writeto(file)

        # if queue is not None, then it is an ouput queue used to return the models to pygfit when running in parallel mode
        # that means I have to fetch the model from every model object and return them through the queue.
        # whatever I set in the queue will be passed back through gblend.set_models()
        if queue is not None:

            models = []
            for model_obj in self:
                models.append(model_obj.model_img)

            # and send it off!
            queue.put((self.number, models))

        # now we are ready for fitting!
        self.ready = True
        return True
示例#22
0
        if model_type == 'rf':
            m = RandomForestRegressor(n_estimators=100, random_state=1)
        elif model_type == 'dt':
            m = DecisionTreeRegressor()
        elif model_type == 'linear':
            m = LinearRegression()
        elif model_type == 'ridge':
            m = RidgeCV()
        elif model_type == 'svm':
            m = SVR(gamma='scale')
        elif model_type == 'gb':
            m = GradientBoostingRegressor(random_state=1)

        for feat_set in ['basic', 'dasc']:
            models.append(f'{model_type}_{feat_set}')
            if feat_set == 'basic':
                feat_set = feat_names[1:]
            elif feat_set == 'dasc':
                feat_set = ['X_d1', 'X_d2', 'X_d3']

            m.fit(df_full[feat_set], df_full['Y_sig_mean_normalized'].values)

            for i, (k, v) in enumerate(ds.keys()):
                if v == 'test':
                    df = ds[(k, v)]
                    #if k == 'clath_aux+gak_a7d2_new':
                    #    df = df.dropna()
                    X = df[feat_set]
                    X = X.fillna(X.mean())
                    #y = df['Y_sig_mean_normalized']
def output_layer(sh_list, sh_test, sh_c_list, shm_list, train_loader,
                 test_loader, model, loss_function, rank, split, batch_size,
                 batch_num, test_batch_num, test_num, epoch_num, lamda, lr,
                 cv):

    feed_q2 = sh_list[rank - 1]
    grad_q2 = sh_list[rank + split - 2]

    send_grad = sh_c_list[rank + split - 2]

    feed_test = sh_test[rank - 1]
    send_target = shm_list[0]

    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)

    models = []
    outputs = []
    inputs = []
    optim = []
    loss_tot = 0
    time_tot = 0
    cuda_time = 0
    #test_num = test_set_labels.size(0)
    #num_of_models = 1#2*n + 1
    n = -1 * (rank - (split - 1))
    #num_of_models = 2*split - 1
    num_of_models = split
    #delay = n *(2)# + 1
    #delay = 2* split -(rank+1) #- 1
    delay = n
    #model.reset_parameters()
    for i in range(num_of_models):
        models.append(copy.deepcopy(model))
        outputs.append(0)
        inputs.append(0)
        optim.append(
            torch.optim.SGD(models[i].parameters(),
                            lr=lr,
                            momentum=0.9,
                            weight_decay=0.0005,
                            nesterov=True))
        #optim.append(torch.optim.Adam(models[i].parameters(),lr=1e-4))
        #optim.append(torch.optim.SGD(models[i].parameters(),lr=lr))
    for i in models:
        i.cuda(rank)
    model.cuda(rank)

    #labels = data_set[:,-mnist_data.NUM_LABELS:].cuda(rank) ######### if have a error, cuda(rank) add

    steps = int(batch_num / lamda)
    if batch_num % lamda != 0:
        steps += 1
    lamda_back = lamda
    t = 0

    for epoch in range(epoch_num):
        #with torch.autograd.profiler.profile() as prof:
        s_t_u = resource_usage(RUSAGE_SELF)
        s_t = timestamp()
        start.record()
        loss_sum = 0
        model.train()
        t = 0

        t1 = 0
        t2 = 0
        t3 = 0
        t4 = 0
        t5 = 0
        t6 = 0
        t7 = 0
        t8 = 0
        td1 = 0
        td2 = 0
        td3 = 0
        td4 = 0
        td5 = 0
        td6 = 0

        train_data = train_loader.__iter__()
        #for time in range(1,(batch_num + 2*split - (rank+1) -1 + 1 )):
        for step in range(1, steps + 1):
            #for time in range(step , step+lamda + 2*split -(rank+1)- 1 + 1 ):
            #off = (step-1)*lamda
            #cv.acquire()
            #cv.wait()
            #cv.notify_all()
            #cv.release()
            #cv.sync(rank)
            lamda = lamda_back
            if step == steps:
                lamda = batch_num - (step - 1) * lamda
            #print(rank,'steps',steps,'step',step,'lamda',lamda)
            #print('sync',step,steps)
            for time in range(1, lamda + 1):

                #if time >= (rank +) : # t >=  k ; k = 3
                t1 = timestamp()

                # recv output
                offset = t * batch_size
                #data,target = next(train_data)
                #offset = (time - 1) * batch_size
                x = feed_q2.recv()
                x = x.cuda(rank, non_blocking=True)
                #print('recv',t,x)

                #  label gpu load
                #data,target = next(train_data)
                #target = target.cuda(rank).long()
                target = send_target.recv()
                target = target.cuda(rank, non_blocking=True).long()
                #target = Variable(labels[offset:offset + batch_size,:]).long()
                #target = Variable(labels[offset:offset + batch_size,:]).cuda(rank)

                t2 = timestamp()
                model_idx = (
                    time % num_of_models
                ) - 1  ########################################### model idx correct
                #model_idx = 0
                input_feat = Variable(x, requires_grad=True)

                output = models[model_idx].forward(input_feat)
                #print('rank',rank,time,target)
                #print(target.size())
                loss = loss_function(output, target)
                t3 = timestamp()
                #loss = loss_function(output,torch.max(target,1)[1])
                optimizer = optim[model_idx]
                optimizer.zero_grad()
                loss.backward()
                #a = list(models[model_idx].parameters())[0].clone()
                optimizer.step()
                t4 = timestamp()
                #b = list(models[model_idx].parameters())[0].clone()
                #print(torch.equal(a.data,b.data))
                #grad = input_feat.grad.data.to('cpu')
                grad_q2.send_wait()
                send_grad.copy_(input_feat.grad.data)
                grad_q2.async_send_signal()
                t5 = timestamp()
                loss_sum = loss_sum + loss.data
                t += 1
                td1 += t2 - t1
                td2 += t3 - t2
                td3 += t4 - t3
                td4 += t5 - t4

            #print(time)
            model.init_zero()

            with torch.cuda.device(rank):

                for i in range(num_of_models):
                    j = models[i].parameters()
                    for k in model.parameters():
                        #k = 0
                        l = j.__next__()
                        k.requires_grad_(False)
                        k.copy_(k.data + l.data / num_of_models)

                for i in range(num_of_models):
                    j = model.parameters()
                    for k in models[i].parameters():
                        l = j.__next__()
                        k.requires_grad_(False)
                        k.copy_(l.data)
                        k.requires_grad_(True)
        loss_tot = loss_sum / batch_num

        e_t_u = resource_usage(RUSAGE_SELF)
        e_t = timestamp()
        u_t = e_t_u.ru_stime - s_t_u.ru_stime
        t = e_t - s_t
        end.record()
        torch.cuda.synchronize()
        cuda_time = cuda_time + start.elapsed_time(end)
        print(
            'node3 user time = %f time = %f cuda time = %f cuda tot time = %f loss_tot = %f'
            % (u_t, t, start.elapsed_time(end), cuda_time, loss_tot))
        #print('node3 user time = %f time = %f loss_tot = %f' % ( u_t , t,loss_tot))
        #print(prof)
        time_tot = time_tot + t
        print('rank =', rank, 'recv output =', td1)
        print('rank =', rank, 'forward =', td2)
        print('rank =', rank, 'backward =', td3)
        print('rank =', rank, 'send grad =', td4)

        model.eval()
        total = 0
        correct = 0
        dev_loss_tot = 0

        for data, target in test_loader:
            #for i in range(test_batch_num) :

            offset = i * batch_size  #####################################
            #print('rank',rank,i,target)
            x = feed_test.recv()
            #print(x)
            x = x.cuda(rank)
            target = target.cuda(rank)

            #target = Variable(test_set_labels[offset:offset+batch_size,:]).long()
            #target = Variable(test_set_labels[offset:offset+batch_size,:])

            output = model.forward(x)
            _, pred = torch.max(output.data, 1)
            #dev_loss = loss_function(output,torch.max(target,1)[1])
            dev_loss = loss_function(output, target)
            dev_loss_tot += dev_loss.item()
            #print('rank',rank,i,pred)

            #print(target,pred)
            #total += target.size(0)
            #print(total)
            #correct += (pred == torch.max(target,1)[1]).sum()
            correct += (pred == target).sum()
            #print('correct',correct)
            #i += 1
        print('epoch=', epoch, 'tot_time =', time_tot, 'accuracy =',
              (100 * correct / test_num), 'test_loss',
              dev_loss_tot / test_batch_num)

        if epoch == 150 or epoch == 225:
            lr = lr * 0.1
            for i in optim:
                for j in i.param_groups:
                    j['lr'] = lr
示例#24
0
def find_best_segmentation(model_funct,
                           folder,
                           validation_frac,
                           metric="choose",
                           raw_vol=raw,
                           gt_vol=gt,
                           aff_vol=aff,
                           top_n_valid=3,
                           gpus=1):

    l = custom_loss.loss()

    weights = np.zeros((3, 2))

    weights[0] = [2.6960856, 0.61383891]
    weights[1] = [4.05724285, 0.57027915]
    weights[2] = [4.09752934, 0.56949214]

    models = []
    model_keys = []

    print("\nGrabbing models...\n")
    for file in os.listdir(folder):

        model = model_funct(verbose=0)

        try:
            model.load_weights(folder + "/" + file)
            models.append(model)
            model_keys.append(file)
        except OSError:
            print("\n%s is invalid\n" % file)
            continue

    print("\nLoaded %i models.\n" % len(models))
    for key in model_keys:
        print("\t" + key)

    proc = process.process(l.weighted_cross,
                           raw_vol,
                           gt_vol,
                           aff_vol,
                           model=models[0],
                           validation_frac=validation_frac,
                           gpus=gpus)

    valid_loss = []

    print("\nGetting validation loss for all models...\n")
    for model in models:

        proc.model = model

        valid_loss.append(proc.calc_validation_loss())

    top_models = []
    top_model_keys = []

    top_indexs = np.asarray(valid_loss).argsort()[-top_n_valid:][::-1]

    for index in top_indexs:

        top_models.append(models[index])
        top_model_keys.append(model_keys[index])

    print("\nFound top %i models.\n" % top_n_valid)

    for key in top_model_keys:
        print("\t%s" % key)

    print("\nWatershed sweep...\n")
    segs, metrics = predict_and_watershed_on_list_of_models(top_models,
                                                            metric=metric)

    if metric != "choose":

        index = np.where(metrics == np.max(metrics))

        index = index[0]

        print("BEST PERFORMANCE WAS =" + str(np.max(metrics)))

        seg = segs[int(index)]

    else:

        n = 0
        for metric in metrics:
            print("\tSegmentation %i: " % n)
            print("\t\tSplit: " + str(metric[0]))
            print("\t\tMerge: " + str(metric[1]))
            n += 1

        index = input("\n\nWhich model?")

        seg = segs[int(index)]

    model_key = top_model_keys[math.floor(
        (int(index) / len(segs)) * len(top_models))]

    print("Model %s is the choice" % model_key)

    save_segmentation_tifs(gt_vol, seg)
示例#25
0
def parse_args_and_settings():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "-p",
        "--phase",
        default=None,
        help=
        "can be train, deploy (by default includes evaluate) or evaluate. Automatically inferred if not specified."
    )

    # Only for training:
    # TODO @Future Make parser give warnings when wrong combination of arguments is given (standard feature of argparse perhaps?)
    parser.add_argument(
        "-c",
        "--conf_file",
        default=None,
        help=
        "Path to config file including .ini; can be left None only in deploy/evaluation, in which case it is derived from model/answers path.",
        metavar="FILE")
    parser.add_argument(
        "-r",
        "--random",
        action=
        'store_true',  # Note: also kinda used, but overwritten, during deploy/evaluate
        help="To sample values randomly from intervals in config file.")
    parser.add_argument(
        "-d",
        "--subdir",
        default=None,  # Note: also used, but overwritten, during deploy
        help=
        "(training phase) use /models/<subdir> for output; by default set to year_month."
    )
    parser.add_argument(
        "-n",
        "--run_name",
        default=None,  # Note: also used, but overwritten, during deploy/evaluate
        help=
        "(training phase) by default set to time stamp; always automatically appended by randomly sampled params."
    )

    # For deployment/evaluation:
    parser.add_argument(
        "-t",
        "--deploy_data",
        default=None,
        help=
        "Default is data on which the model was trained; can be 'train', 'test' or 'trial', abbreviations as defined in config_utils. During evaluation this can be omitted: it will be read from the predictions .csv file."
    )
    parser.add_argument(
        "-l",
        "--deploy_level",
        default=None,
        help=
        "Scene or episode; default is the level on which the model was trained. During evaluation this can be omitted: it will be read from the predictions .csv file."
    )

    # Only for deployment
    parser.add_argument(
        "-m",
        "--model",
        default=None,
        help=
        "(deployment phase) path to model, with base name (though without .pt suffix); if fold number is included, only that fold is considered."
    )
    parser.add_argument(
        "--answers_per_fold",
        action='store_true',
        help=
        "To write an answers.txt file for each fold separately (in addition to their merger)."
    )
    parser.add_argument(
        "--no_cv",
        action='store_true',
        help=
        "Option to prevent respecting cross-validation (which is done by default when deployed on training data)."
    )
    parser.add_argument(
        "-s",
        "--no_eval",
        action='store_true',
        help=
        "If data includes keys, evaluation phase is run automatically after deployment; this option prevents that."
    )

    # Only for evaluation
    parser.add_argument(
        "-a",
        "--answer_file",
        default=None,
        help="Evaluates an answer file, outputting interesting statistics.")
    parser.add_argument(
        "--no_semeval",
        action='store_true',
        help=
        "To turn off the SemEval filter for evaluation (filter which groups infrequent (< 3) entities together as 'other')."
    )

    # Meta:
    parser.add_argument(
        "--no_cuda",
        action='store_true',
        help="Forces not using cuda; otherwise cuda is used whenever available."
    )
    parser.add_argument(
        "-v",
        "--verbosity",
        type=int,
        default=3,
        help=
        "Sets verbosity regarding console output (default 3; lower to print less)."
    )
    parser.add_argument(
        "-f",
        "--no_files",
        action='store_true',
        help="Prevents generation of folders and files (log, model, answer).")

    args = parser.parse_args()

    # If phase is not specified, this can usually be inferred from other arguments:
    if args.phase is None:
        if args.model is not None:
            args.phase = 'deploy'
        elif args.answer_file is not None:
            args.phase = 'evaluate'
        else:
            args.phase = 'train'

    # Use CUDA only if available:
    if not args.no_cuda and not torch.cuda.is_available:
        print(
            'WARNING: CUDA requested but unavailable; running on cpu instead.')
        args.no_cuda = True

    # Deploy either a single model or a set of models (of the same type).
    # Also, from the model file arg.model also extract model_dir and run_name:
    if args.phase == 'deploy':
        if '.pt' in args.model:
            # A single model file .pt was provided, so deploy only on that:
            runs_path, args.run_name = os.path.split(args.model)
            args.model_dir, args.subdir = os.path.split(runs_path)
            args.run_name = args.run_name[:-3]  # removes the .pt
            if '--fold' in args.run_name:
                args.run_name = args.run_name.split('--fold')[0]
            args.model = [args.model]
        else:
            # model name doesn't contain .pt (i.e., either directory, or directory+run_name:
            if os.path.isdir(args.model):
                # model is a directory
                runs_path = args.model
                args.run_name = None  # To be extracted below
            else:
                # model is not a directory, nor .pt; hence only run_name of model is given:
                runs_path, args.run_name = os.path.split(args.model)
            args.model_dir, args.subdir = os.path.split(runs_path)
            # Get all model paths from directory (with run_name):
            models = []
            for file in os.listdir(runs_path):
                if file.endswith(".pt"):
                    if args.run_name is None:
                        args.run_name = file[:-3]  # removes the .pt
                        if '--fold' in args.run_name:
                            args.run_name = args.run_name.split('--fold')[0]
                    if file.startswith(args.run_name):
                        models.append(os.path.join(runs_path, file))
                    elif os.path.isdir(args.model):
                        print(
                            "ERROR: run_name could not be inferred; directory contains multiple runs.\n Rerun with more specific --model (i.e., including model file name, minus .pt and minus --fold#)."
                        )
                        quit()
            args.model = sorted(models)

    # When evaluating, obtain run name etcetera from the provided answers .csv file:
    if args.phase == 'evaluate':
        args.run_name = os.path.basename(args.answer_file)[:-4]
        if args.run_name.endswith('--ensemble'):
            args.run_name = args.run_name[:
                                          -10]  # removes the --ensemble suffix
        if '--fold' in args.run_name:
            args.run_name = args.run_name.split('--fold')[0]
        if '--cv' in args.run_name:
            args.run_name = args.run_name.split('--cv')[0]
        args.model_dir = None  # This is kinda ugly.

    # For train phase a config file is mandatory; otherwise it can be automatically obtained:
    if args.conf_file is None:
        if args.phase == 'train':
            print(
                'ERROR: training requires a config file (try including -c config.ini)'
            )
            quit()
        elif args.phase == 'deploy':
            args.conf_file = os.path.join(runs_path, args.run_name + '.ini')
        elif args.phase == 'evaluate':
            args.conf_file = os.path.join(os.path.dirname(args.answer_file),
                                          args.run_name + '.ini')

    # Read the config file (either given as argument, or obtained from pre-trained model or its predictions file:
    if args.phase == 'deploy' or args.phase == 'evaluate':
        # Of course don't randomly sample when deploying or evaluating.
        args.random = False
    settings, fixed_params, sampled_params = config_utils.settings_from_config(
        args.conf_file, args.random)
    # NOTE: Which params were fixed or sampled determines the subdir and run_name in case of training.

    # If no level and data for deployment are given, these are taken from training data/level in config file
    if args.phase == 'deploy':
        args.deploy_level = args.deploy_level or settings.data.level
        args.deploy_data = args.deploy_data or settings.data.dataset
        if args.deploy_data in config_utils.data_paths:
            args.deploy_data = config_utils.data_paths[args.deploy_data][
                args.deploy_level]

    # For evaluate, if deploy_data is not provided, attempt to read it from the answer_file:
    # (The alternative, of reading it from directory structure, seems too unsafe.)
    if args.phase == 'evaluate' and args.deploy_data is None:
        with open(args.answer_file) as file:
            firstline = file.readline()
            if firstline.startswith('#'):
                args.deploy_data = firstline.strip('# \n')

    # When deploying on a new dataset (not training data), cross-validation doesn't apply:
    if args.deploy_data != settings.data.dataset:
        args.no_cv = True

    # When training, create runs dir, id and run name if none were given (mostly time stamps).
    if args.phase == 'train':
        args.model_dir = 'models'  # Default for training output.
        args.subdir = args.subdir or time.strftime("%Y_%m")
        args.run_name = args.run_name or time.strftime("%Y_%m_%d_%H_%M_%S")
        if not sampled_params:
            args.run_name = 'fixed--' + args.run_name
        else:
            # TODO @Future: Automatic run naming can be considerably improved wrt. readability.
            sampled_params_strings = sorted([
                k[0:3] + "--" + str(sampled_params[k])[0:5].replace(",", "-")
                for k in sampled_params
            ])
            args.run_name = '{0}--{1}'.format(
                args.run_name, "--".join(sampled_params_strings))

    # Within the settings Namespace, which is subject to overwriting, make sure to include a backup,
    # so that original settings can at any time be saved to a new config file.
    settings.orig = copy.deepcopy(settings)

    print('--------------Setttings---------------------')
    print('[phase] ', args.phase)
    print('[deploy_data] ', args.deploy_data)
    print('[settings.data.datasets] ', settings.data.dataset)
    print('--------------------------------------------')

    return args, settings
示例#26
0
def compile_all_results(scenarios, dir='../data'):
    """
    Compiles the results across multiple scenarios produced by running run_on_cluster on each 
    one into a single sv file. The specified directory must be where where the results of 
    running run_on_cluster for each scenario are stored (each is a sub-directory named v0, v1, etc.)
    and is also where the output from this function will be saved.    
    """

    models = []
    causes = []
    time = []
    true_cf = []
    true_std = []
    std_bias = []
    mean_abs_err = []
    median_abs_err = []
    mean_rel_err = []
    median_rel_err = []
    mean_csmf_accuracy = []
    median_csmf_accuracy = []
    mean_coverage_bycause = []
    mean_coverage = []
    percent_total_coverage = []
    scenario = []

    for i in range(scenarios):
        for j in ['bad_model', 'latent_simplex']:
            read = csv.reader(open('%s/v%s/%s_summary.csv' % (dir, i, j)))
            read.next()
            for row in read:
                models.append(row[0])
                causes.append(row[1])
                time.append(row[2])
                true_cf.append(row[3])
                true_std.append(row[4])
                std_bias.append(row[5])
                mean_abs_err.append(row[6])
                median_abs_err.append(row[7])
                mean_rel_err.append(row[8])
                median_rel_err.append(row[9])
                mean_csmf_accuracy.append(row[10])
                median_csmf_accuracy.append(row[11])
                mean_coverage_bycause.append(row[12])
                mean_coverage.append(row[13])
                percent_total_coverage.append(row[14])
                scenario.append(i)

    all = pl.np.core.records.fromarrays(
        [
            scenario, models, time, true_cf, true_std, causes, mean_abs_err,
            median_abs_err, mean_rel_err, median_rel_err, mean_csmf_accuracy,
            median_csmf_accuracy, mean_coverage_bycause, mean_coverage,
            percent_total_coverage
        ],
        names=[
            'scenario', 'model', 'time', 'true_cf', 'true_std', 'cause',
            'mean_abs_err', 'median_abs_err', 'mean_rel_err', 'median_rel_err',
            'mean_csmf_accuracy', 'median_csmf_accuracy',
            'mean_covearge_bycause', 'mean_coverage', 'percent_total_coverage'
        ])
    pl.rec2csv(all, fname='%s/all_summary_metrics.csv' % (dir))