示例#1
0
def train(epoch):
    t = time.time()
    model.train()
    optimizer.zero_grad()
    output = model(features, adj)
    loss_train = criterion(output[idx_train], labels[idx_train])
    f1_train_micro, f1_train_macro = Evaluation(output[idx_train], labels[idx_train])
    loss_train.backward()
    optimizer.step()

    if not args.fastmode:
        # Evaluate validation set performance separately, deactivates dropout during validation run.
        model.eval()
        output = model(features, adj)

    #loss_val = criterion(output[idx_val], labels[idx_val])
    #f1_val_micro, f1_val_macro = Evaluation(output[idx_test], labels[idx_test])
    loss_test = criterion(output[idx_test], labels[idx_test])
    f1_test_micro, f1_test_macro = Evaluation(output[idx_test], labels[idx_test])

    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          "f1_train_micro= {:.4f}".format(f1_train_micro),
          "f1_train_macro= {:.4f}".format(f1_train_macro),
          #'loss_val: {:.4f}'.format(loss_val.item()),
          #"f1_val_micro= {:.4f}".format(f1_val_micro),
          #"f1_val_micro= {:.4f}".format(f1_val_macro),
          'loss_test: {:.4f}'.format(loss_test.item()),
          "f1_test_micro= {:.4f}".format(f1_test_micro),
          "f1_test_macro= {:.4f}".format(f1_test_macro),
          'time: {:.4f}s'.format(time.time() - t))
def eval_avgs_against_descriptions(embedding=None):
    """
    Compute ROUGE-L and cosine similarity evaluation measures where ideal
    summaries are taken as the TED Talk descriptions.

    :param embedding: fastText word embedding.
    :return: List containing averages of precision, recall, F-score, and cosine
        similarity over 50 documents.
    :rtype: list(float)
    """
    # Get records where one or more tag is present in transcript.
    df = read_data(75)
    df = drop_noconcept_recs(df)[:50]

    results = []
    for j in range(len(df)):
        s = summarize(df.iloc[[j]], df['d_cnt'].iloc[[j]][j])
        ideal = preprocess_transcripts(df.iloc[[j]],
                                       clean_transcript_data=False,
                                       df_field='description')
        rl = Evaluation.rouge_l(s, ideal[0][0])
        cs = Evaluation.cos_similarity(s, ideal[0][0])
        results.append([rl, cs])

    # Average evaluation scores over number of dataframe records.
    results = np.asarray(results)
    rlresults = results[:, 0]
    cossim_results = results[:, 1]
    avg_prec = np.average([rlresults[j][0] for j in range(results.shape[0])])
    avg_recall = np.average([rlresults[j][1] for j in range(results.shape[0])])
    avg_fscore = np.average([rlresults[j][2] for j in range(results.shape[0])])
    avg_cossim = np.average(cossim_results)

    return [avg_prec, avg_recall, avg_fscore, avg_cossim]
示例#3
0
 def fitting(self, **kwargs):
     r"""Fitting method."""
     x, y = self._data[0], self._data[1]
     _models = [_.fit(x, y, **kwargs) for _ in self._models]
     _eva = [Evaluation(_) for _ in _models]
     _pmfs = np.array([_.T[1] for _ in self._datas])
     _xs = np.array([_.T[0] for _ in self._datas])
     _pmfs[_pmfs < 0] = 0
     _n = len(_eva)
     # AIC AICc BIC LIH: 0 1 2 3
     ret = np.zeros((4, _n))
     _n_mc, _n_sample = self._n_mc_sample
     for __ in range(_n_mc):
         samples = np.array([Evaluation.make_sample(_n_sample, _x, _pmf)
                             for _x, _pmf in zip(_xs, _pmfs)])
         sample = samples.flatten()
         ret[0] += np.array([_.aic(sample) for _ in _eva])
         ret[1] += np.array([_.aicc(sample) for _ in _eva])
         ret[2] += np.array([_.bic(sample) for _ in _eva])
         ret[3] += np.array([_.score(sample) for _ in _eva])
     ret /= _n_mc
     _best_aic = _models[np.argmin(ret[0])]
     _best_aicc = _models[np.argmin(ret[1])]
     _best_bic = _models[np.argmin(ret[2])]
     print("Best estamation by AIC is %d\nThe parameters are: %s" %
           (self._R[0] + np.argmin(ret[0]), _best_aic.Parameters))
     print("Best estamation by AICc is %d\nThe parameters are: %s" %
           (self._R[0] + np.argmin(ret[1]), _best_aicc.Parameters))
     print("Best estamation by BIC is %d\nThe parameters are: %s" %
           (self._R[0] + np.argmin(ret[2]), _best_bic.Parameters))
     print("The normalization factor is %.4f, the original is %.4f" %
           (self.area_2, self.area_1))
     np.savetxt('by_aic.txt',
                np.vstack([x,
                           _best_aic.Function(x, *_best_aic.Parameters)]).T)
     np.savetxt('by_aicc.txt',
                np.vstack([x, _best_aicc.Function(x,
                          *_best_aicc.Parameters)]).T)
     np.savetxt('by_bic.txt',
                np.vstack([x,
                           _best_bic.Function(x, *_best_bic.Parameters)]).T)
     o = open('AIC_AICc_BIC_LIH.txt', 'w')
     o.write('#n_components\tAIC\tAICc\tBIC\tLIH\n')
     o.close()
     o = open('AIC_AICc_BIC_LIH.txt', 'a')
     np.savetxt(o, np.hstack([np.arange(*self._R)[:, np.newaxis], ret.T]),
                fmt=['%d', '%.6f', '%.6f', '%.6f', '%.6f'])
     o.close()
     return ret, _best_aic, _best_aicc, _best_bic
def compute_performance(prediction, target, data):  # 计算模型性能
    # 下面的try和except实际上在做这样一件事:当训练+测试模型的时候,数据肯定是经过dataloader的,所以直接赋值就可以了
    # 但是如果将训练好的模型保存下来,然后测试,那么数据就没有经过dataloader,是dataloader型的,需要转换成dataset型。
    try:
        dataset = data.dataset  # 数据为dataloader型,通过它下面的属性.dataset类变成dataset型数据
    except:
        dataset = data  # 数据为dataset型,直接赋值

    # 下面就是对预测和目标数据进行逆归一化,recover_data()函数在上一小节的数据处理中
    #  flow_norm为归一化的基,flow_norm[0]为最大值,flow_norm[1]为最小值
    # prediction.numpy()和target.numpy()是需要逆归一化的数据,转换成numpy型是因为 recover_data()函数中的数据都是numpy型,保持一致
    prediction = LoadData.recover_data(dataset.flow_norm[0],
                                       dataset.flow_norm[1],
                                       prediction.numpy())
    target = LoadData.recover_data(dataset.flow_norm[0], dataset.flow_norm[1],
                                   target.numpy())

    # 对三种评价指标写了一个类,这个类封装在另一个文件中,在后面
    mae, mape, rmse = Evaluation.total(
        target.reshape(-1), prediction.reshape(-1))  # 变成常向量才能计算这三种指标

    performance = [mae, mape, rmse]
    recovered_data = [prediction, target]

    return performance, recovered_data  # 返回评价结果,以及恢复好的数据(为可视化准备的)
def score_all_data(df,
                   summary_length,
                   metric=rouge_n,
                   N=2,
                   use_tag_concepts=True,
                   embedding=None):
    """
    Compute evaluation measures for every record in dataframe.

    :param df: Dataframe containing trascripts, tags, and descriptions.
    :param summary_length: Desired length of returned text summary.
    :param metric: Evaluation metric to be computed.
    :param N: Parameter for ROUGE-N computuation (Evaluation.rouge_n).
    :param use_tag_concepts: Bool to signal using tags as document concepts.
    :param embedding: fastText word embedding.
    :return: Nunpy array of evaluation results.
    :rtype: array_type
    """
    results = np.zeros(len(df))
    for j in range(len(df)):
        s = summarize(df.iloc[[j]], summary_length, use_tag_concepts,
                      embedding)
        ideal = preprocess_transcripts(df.iloc[[j]],
                                       df_field='description',
                                       clean_transcript_data=False)
        score = Evaluation.metric(s, ideal[0][0])
        results[j] = score
    return results
def eval_against_humangenerated(method, embedding=None):
    """
    Compute ROUGE-L and cosine similarity evaluation measures for first five
    records where ideal summaries are human generated.

    :param method: LSA or TextRank summarization method.
    :param embedding: fastText word embedding.
    :return results: List containing evalution measure computations.
    :rtype: list(array_type): float
    """
    human_summaries = [
        ("It's never happened before in software! Remember, the "
         "hard part is not deciding what features to add, it's "
         "The lesson was: simplicity sells."),
        ("This is where I realized that there was really a need to communicate, "
         "because the data of what's happening in the world and the child "
         "health of every country is very well aware."
         "Now, statisticians don't like it, because they say that this will not "
         "show the reality; we have to have statistical, analytical methods. "
         "And it's a new technology coming in, but then amazingly, how well it "
         "fits to the economy of the countries."),
        ("And the interesting thing is: if you do it for love, the money comes "
         "anyway. 'To be successful, put your nose down in something and get "
         "damn good at it.' Persistence is the number one reason for our success."
         ),
        ("So honeybees are important for their role in the economy as well as "
         "in agriculture. We need bees for the future of our cities and urban "
         "living. What can you do to save the bees or to help them or to think "
         "of sustainable cities in the future?"),
        ("So now I want to introduce you to my new hero in the global climate "
         "change war, and that is the eastern oyster. So the oyster was the "
         "basis for a manifesto-like urban design project that I did about the "
         "New York Harbor called oyster-tecture. To conclude, this is just one "
         "cross-section of one piece of city, but my dream is, my hope is, that "
         "when you all go back to your own cities that we can start to work "
         "together and collaborate on remaking and reforming a new urban "
         "landscape towards a more sustainable, a more livable and a more "
         "delicious future.")
    ]
    df = read_data(5)
    results = []
    for j in range(len(df)):
        s = method.summarize_text(df.iloc[[j]], 3)
        rl = Evaluation.rouge_l(s, human_summaries[j])
        cs = Evaluation.cos_similarity(s, human_summaries[j])
        results.append([rl, cs])
    return results
示例#7
0
def test():
    model.eval()
    output = model(features, adj)
    loss_test = criterion(output[idx_test], labels[idx_test])
    f1_test_micro, f1_test_macro = Evaluation(output[idx_test], labels[idx_test])
    print("Dataset: " + args.dataset)
    print("Test set results:",
          "loss= {:.4f}".format(loss_test.item()),
          "f1_test_micro= {:.4f}".format(f1_test_micro),
          "f1_test_macro= {:.4f}".format(f1_test_macro))
示例#8
0
def main(args):
    path = pathlib.Path('./src/gkernel')
    if not path.is_file():
        subprocess.call(["make"], cwd="./src", shell=True)
    dataset = TUDataset(root=f'{args.dir}/Pytorch_geometric/{args.dataset}', name=args.dataset)

    if dataset.num_features == 0:
        max_degree = -1
        for data in dataset:
            edge_index = data.edge_index
            degrees = Counter(list(map(int, edge_index[0])))
            if max_degree < max(degrees.values()):
                max_degree = max(degrees.values())

        dataset.transform = OneHotDegree(max_degree=max_degree, cat=False)

    path = pathlib.Path(f'{args.dir}/GraphML/{args.dataset}/{args.dataset.lower()}_{args.kernel}.kernel')
    if not path.is_file():
        save_to_graphml(dataset, f'{args.dir}/GraphML/{args.dataset}')
        cmd = ['./src/gkernel']
        cmd.append('-k')
        cmd.append(args.kernel)
        if args.parameter:
            cmd.append('-p')
            cmd.append(args.parameter)
        cmd.append('-i')
        cmd.append(f'{args.dir}/GraphML/{args.dataset}/{args.dataset.lower()}.list')
        cmd.append('-g')
        cmd.append(f'{args.dir}/GraphML/{args.dataset}/data/')
        cmd.append('-o')
        cmd.append(f'{args.dir}/GraphML/{args.dataset}/{args.dataset.lower()}_{args.kernel}.kernel')
        subprocess.call(cmd)

    K = read_kernel_matrix(f'{args.dir}/GraphML/{args.dataset}/{args.dataset.lower()}_{args.kernel}.kernel')

    y = dataset.data.y.data.numpy()

    ev = Evaluation(K, y, args, verbose=True)

    accs = ev.evaluate(dataset)
def compute_performance(prediction, target, data):
    try:
        dataset = data.dataset  # dataloader
    except:
        dataset = data  # dataset

    prediction = LoadData.recover_data(dataset.flow_norm[0],
                                       dataset.flow_norm[1],
                                       prediction.numpy())
    target = LoadData.recover_data(dataset.flow_norm[0], dataset.flow_norm[1],
                                   target.numpy())

    mae, mape, rmse = Evaluation.total(target.reshape(-1),
                                       prediction.reshape(-1))

    performance = [mae, mape, rmse]
    recovered_data = [prediction, target]

    return performance, recovered_data
def reference_evaluate_data(ref_id):
    if fk.request.method == 'POST':
        _ref = ReferenceModel.objects.with_id(ref_id)
        if _ref is None:
            return core_response(404, 'Request suggested an empty response',
                                 'Unable to find this reference.')
        else:
            if fk.request.files:
                file_obj = fk.request.files['file']
                file_name = file_obj.filename
                _set, created = SetModel.objects.get_or_create(
                    created_at=str(datetime.datetime.utcnow()))
                if created:
                    _set.filename = '{0}-{1}'.format(str(_set.id), file_name)
                    file_path = 'sets/tmp-{0}'.format(_set.filename)
                    try:
                        with open(file_path, 'wb') as set_file:
                            set_file.write(file_obj.read())
                        wb = load_workbook(file_path, read_only=True)
                        ws = wb.active
                        pressure = {
                            'aliq1': {
                                'run1': [],
                                'run2': []
                            },
                            'aliq2': {
                                'run1': [],
                                'run2': []
                            }
                        }
                        uptake = {
                            'aliq1': {
                                'run1': [],
                                'run2': []
                            },
                            'aliq2': {
                                'run1': [],
                                'run2': []
                            }
                        }
                        for odx, row in enumerate(ws.rows):
                            if odx >= 2:
                                # print "--- row ---"
                                if row[0].value is not None:
                                    pressure['aliq1']['run1'].append(
                                        row[0].value)
                                if row[1].value is not None:
                                    uptake['aliq1']['run1'].append(
                                        row[1].value)

                                if row[3].value is not None:
                                    pressure['aliq1']['run2'].append(
                                        row[3].value)
                                if row[4].value is not None:
                                    uptake['aliq1']['run2'].append(
                                        row[4].value)

                                if row[7].value is not None:
                                    pressure['aliq2']['run1'].append(
                                        row[7].value)
                                if row[8].value is not None:
                                    uptake['aliq2']['run1'].append(
                                        row[8].value)

                                if row[10].value is not None:
                                    pressure['aliq2']['run2'].append(
                                        row[10].value)
                                if row[11].value is not None:
                                    uptake['aliq2']['run2'].append(
                                        row[11].value)

                        evaluation = Evaluation(eval_id=_set.filename,
                                                reference=_ref,
                                                pressure=pressure,
                                                uptake=uptake)
                        evaluation.run()
                        os.remove(file_path)
                        _set.delete()
                        return core_response(
                            200,
                            'Results of new set {0} evaluated on reference[{1}].'
                            .format(file_name, ref_id), evaluation.results)
                    except:
                        print traceback.print_exc()
                        _set.delete()
                        print "An error occured!!"
                        return core_response(204, 'Nothing created',
                                             'An error occured.')
                else:
                    return core_response(
                        204, 'Already exists',
                        'This should normaly never happened.')
            else:
                return core_response(204, 'Nothing created',
                                     'You must a set file.')
    # else:
    #     return core_response(405, 'Method not allowed', 'This endpoint supports only a POST method.')

    return """
def reference_evaluate_plot(ref_id, aliq):
    if fk.request.method == 'POST':
        _ref = ReferenceModel.objects.with_id(ref_id)
        if _ref is None:
            return core_response(404, 'Request suggested an empty response',
                                 'Unable to find this reference.')
        else:
            if fk.request.files:
                file_obj = fk.request.files['file']
                file_name = file_obj.filename
                _set, created = SetModel.objects.get_or_create(
                    created_at=str(datetime.datetime.utcnow()))
                if created:
                    _set.filename = '{0}-{1}'.format(str(_set.id), file_name)
                    file_path = '/tmp/{0}'.format(_set.filename)
                    try:
                        with open(file_path, 'wb') as set_file:
                            set_file.write(file_obj.read())
                        wb = load_workbook(file_path, read_only=True)
                        ws = wb.active
                        pressure = {
                            'aliq1': {
                                'run1': [],
                                'run2': []
                            },
                            'aliq2': {
                                'run1': [],
                                'run2': []
                            }
                        }
                        uptake = {
                            'aliq1': {
                                'run1': [],
                                'run2': []
                            },
                            'aliq2': {
                                'run1': [],
                                'run2': []
                            }
                        }
                        for odx, row in enumerate(ws.rows):
                            if odx >= 2:
                                # print "--- row ---"
                                if row[0].value is not None:
                                    pressure['aliq1']['run1'].append(
                                        row[0].value)
                                if row[1].value is not None:
                                    uptake['aliq1']['run1'].append(
                                        row[1].value)

                                if row[3].value is not None:
                                    pressure['aliq1']['run2'].append(
                                        row[3].value)
                                if row[4].value is not None:
                                    uptake['aliq1']['run2'].append(
                                        row[4].value)

                                if row[7].value is not None:
                                    pressure['aliq2']['run1'].append(
                                        row[7].value)
                                if row[8].value is not None:
                                    uptake['aliq2']['run1'].append(
                                        row[8].value)

                                if row[10].value is not None:
                                    pressure['aliq2']['run2'].append(
                                        row[10].value)
                                if row[11].value is not None:
                                    uptake['aliq2']['run2'].append(
                                        row[11].value)

                        evaluation = Evaluation(eval_id=_set.filename,
                                                reference=_ref,
                                                pressure=pressure,
                                                uptake=uptake)
                        evaluation.run()
                        # print str(evals)
                        os.remove(file_path)
                        _set.delete()
                        # reslt_path = evaluation.plot(aliq)
                        reslt_path = evaluation.error(aliq)
                        file_buffer = None
                        try:
                            with open(reslt_path, 'r') as _file:
                                file_buffer = StringIO(_file.read())
                            file_buffer.seek(0)
                        except:
                            print traceback.print_exc()
                        if file_buffer != None:
                            os.remove(reslt_path)
                            return fk.send_file(
                                file_buffer,
                                attachment_filename=reslt_path.split('/')[1],
                                mimetype='image/png')
                        else:
                            return core_response(
                                404, 'Request suggested an empty response',
                                'Unable to return plot image.')
                    except:
                        print traceback.print_exc()
                        _set.delete()
                        print "An error occured!!"
                        return core_response(204, 'Nothing created',
                                             'An error occured.')
                else:
                    return core_response(
                        204, 'Already exists',
                        'This should normaly never happened.')
            else:
                return core_response(204, 'Nothing created',
                                     'You must a set file.')
    # else:
    #     return core_response(405, 'Method not allowed', 'This endpoint supports only a POST method.')

    return """
示例#12
0
def main():
    # import pretrained imagenet         
    imagenet = torchvision.models.resnet18(pretrained=True)

    # Set up folder for model saving
    model_path = '{}/models/new_trans/{}/'.format(os.getcwd(), time.strftime("%Y%m%d-%H%M%S"))
    model_pathlib = pathlib.Path(model_path)
    if not model_pathlib.exists():
        pathlib.Path(model_pathlib).mkdir(parents=True, exist_ok=True)


    # Check if your system supports CUDA
    use_cuda = torch.cuda.is_available()

    # Setup GPU optimization if CUDA is supported
    if use_cuda:
        computing_device = torch.device("cuda")
        extras = {"num_workers": 4, "pin_memory": True} # fix parameter: 5; finetuning: 
        print("CUDA is supported")
    else: # Otherwise, train on the CPU
        computing_device = torch.device("cpu")
        extras = False
        print("CUDA NOT supported")

    # Setup: initialize the hyperparameters/variables
    num_epochs = 1           # Number of full passes through the dataset
    batch_size = 32          # Number of samples in each minibatch
    seed = np.random.seed(1) # Seed the random number generator for reproducibility
    p_val = 0.1              # Percent of the overall dataset to reserve for validation
    p_test = 0.2             # Percent of the overall dataset to reserve for testing
    val_every_n = 50         #
    learning_rate = 0.0000001


    class channelCopy(object):
        
        def __call__(self, img):
            return torch.cat([img, img, img], 0)

    # TODO: Convert to Tensor - you can later add other transformations, such as Scaling here
    transform = transforms.Compose([transforms.Resize(224), transforms.ToTensor(), channelCopy()])

    # Setup the training, validation, and testing dataloaders
    train_loader, val_loader, test_loader, label_weights = create_balanced_split_loaders(batch_size, seed, transform=transform,
                                                                 p_val=p_val, p_test=p_test,
                                                                 shuffle=True, show_sample=False,
                                                                 extras=extras, z_score=True)
    # Instantiate a BasicCNN to run on the GPU or CPU based on CUDA support
    transfer = Transfer(14, finetuning=False)
    model = transfer(imagenet)
    model = model.to(computing_device)
    print("Model on CUDA?", next(model.parameters()).is_cuda)

    #TODO: Define the loss criterion and instantiate the gradient descent optimizer
    criterion = nn.MultiLabelSoftMarginLoss() #TODO - loss criteria are defined in the torch.nn package

    #TODO: Instantiate the gradient descent optimizer - use Adam optimizer with default parameters
    optimizer = optim.Adam(filter(lambda param: param.requires_grad, model.parameters()), lr=0.000002) #TODO - optimizers are defined in the torch.optim package

    # Track the loss across training
    total_loss = []
    avg_minibatch_loss = []
    avg_minibatch_val_loss = []
    val_loss_min = float('inf')
    
    
    # Begin training procedure
    for epoch in range(num_epochs):

        N = val_every_n
        N_minibatch_loss = 0.0

        # Get the next minibatch of images, labels for training
        for minibatch_count, (images, labels) in enumerate(train_loader):

            # Put the minibatch data in CUDA Tensors and run on the GPU if supported
            images, labels = images.to(computing_device), labels.to(computing_device)
            
            # Zero out the stored gradient (buffer) from the previous iteration
            optimizer.zero_grad()

            # Perform the forward pass through the network and compute the loss
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Automagically compute the gradients and backpropagate the loss through the network
            loss.backward()

            # Update the weights
            optimizer.step()

            # Add this iteration's loss to the total_loss
            total_loss.append(loss.item())
            N_minibatch_loss += loss
            
            print('training on {0} minibatch'.format(minibatch_count))
            
            # TODO: Implement holdout-set-validation
            if not minibatch_count % val_every_n:
                model.eval()
                with torch.no_grad():
                    val_loss = 0
                    
                    
                    for val_batch_count, (val_image, val_labels) in enumerate(val_loader):   
                        
                        print('validating on {0} minibatch'.format(val_batch_count))
                        val_image, val_labels = val_image.to(computing_device), val_labels.to(computing_device)
                        val_outputs = model(val_image)
                        val_loss += criterion(val_outputs, val_labels)
 
                    val_loss /= val_batch_count
                    print('validation loss: {0}'.format(val_loss))
                    avg_minibatch_val_loss.append(val_loss)
                    model_name = "epoch_{}-batch_{}-loss_{}-{}.pt".format(epoch, minibatch_count, val_loss, time.strftime("%Y%m%d-%H%M%S"))
                    torch.save(model.state_dict(), os.path.join(model_path, model_name))
                    if val_loss < val_loss_min:
                        torch.save(model.state_dict(), os.path.join(model_path, 'best model'))
                        val_loss_min = val_loss
                    print('val: ', [l.item() for l in avg_minibatch_val_loss])

            if minibatch_count % N == 0:
                # Print the loss averaged over the last N mini-batches
                if minibatch_count > 0:
                    N_minibatch_loss /= N  
                print('Epoch %d, average minibatch %d loss: %.3f' %
                    (epoch + 1, minibatch_count, N_minibatch_loss))

                # Add the averaged loss over N minibatches and reset the counter
                avg_minibatch_loss.append(N_minibatch_loss)
                N_minibatch_loss = 0.0
                print('train: ', [l.item() for l in avg_minibatch_loss])

        print("Finished", epoch + 1, "epochs of training")
    print("Training complete after", epoch, "epochs")
    

    # Begin testing
    labels_all = []
    predictions_all = []
    model.eval()
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(computing_device), labels.to(computing_device)
            labels_all.append(labels)
            output = model(images)
            predictions = output > 0.5
            predictions_all.append(predictions)

    labels = torch.cat(labels_all, 0)
    predctions = torch.cat(predictions_all, 0)

    eval = Evaluation(predctions.float(), labels)
    print('acc: ', eval.accuracy())
    print('acc: ', eval.accuracy().mean())
    print('pre: ', eval.precision())
    print('pre: ', eval.precision().mean())
    print('rec: ', eval.recall())
    print('rec: ', eval.recall().mean())
示例#13
0
                      outputWidth=128,
                      outputHeight=128)
options['data']['dir'] = options["globals"][dataset.value]
datasetHC, datasetPC = get_datasets(options, dataset=dataset)
config = get_config(trainer=ConstrainedAAE,
                    options=options,
                    optimizer='ADAM',
                    intermediateResolutions=[16, 16],
                    dropout_rate=0.1,
                    dataset=datasetHC)

config.kappa = 1.0
config.scale = 10.0
config.rho = 1.0

# Create an instance of the model and train it
model = ConstrainedAAE(tf.Session(),
                       config,
                       network=constrained_adversarial_autoencoder_Chen)

# Train it
model.train(datasetHC)

# Evaluate
Evaluation.evaluate(
    datasetPC,
    model,
    options,
    description=f"{type(datasetHC).__name__}-{options['threshold']}",
    epoch=str(options['train']['numEpochs']))
def main():

    conf = {}
    conf['z_score'] = True


    # Setup: initialize the hyperparameters/variables
    num_epochs = 1           # Number of full passes through the dataset
    batch_size = 16          # Number of samples in each minibatch
    learning_rate = 0.00001
    seed = np.random.seed(1) # Seed the random number generator for reproducibility
    p_val = 0.1              # Percent of the overall dataset to reserve for validation
    p_test = 0.2             # Percent of the overall dataset to reserve for testing
    val_every_n = 100         #


    # Set up folder for model saving
    model_path = '{}/models/baseline/{}/'.format(os.getcwd(), time.strftime("%Y%m%d-%H%M%S"))
    model_pathlib = pathlib.Path(model_path)
    if not model_pathlib.exists():
        pathlib.Path(model_pathlib).mkdir(parents=True, exist_ok=True)


    # TODO: Convert to Tensor - you can later add other transformations, such as Scaling here
    transform = transforms.Compose([transforms.Resize(512), transforms.ToTensor()])

    # resize to 224*224:
    # transform = transforms.Compose([transforms.Resize(224), transforms.ToTensor()])

    # resize to 256*256, then center cropping to 224*224:
    # transform = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()])

    # random rotation:
    # transform = transforms.Compose([transforms.RandomRotation(20, resample=Image.BILINEAR),
    #                                transforms.CenterCrop(900),
    #                                transforms.Resize(512),
    #                                transforms.ToTensor()])

    # Check if your system supports CUDA
    use_cuda = torch.cuda.is_available()

    # Setup GPU optimization if CUDA is supported
    if use_cuda:
        computing_device = torch.device("cuda")
        extras = {"num_workers": 0, "pin_memory": True}
        print("CUDA is supported")
    else: # Otherwise, train on the CPU
        computing_device = torch.device("cpu")
        extras = False
        print("CUDA NOT supported")

    # Setup the training, validation, and testing dataloaders

    #train_loader, val_loader, test_loader = create_balanced_split_loaders(batch_size, seed, transform=transform,
    #                                                                     p_val=p_val, p_test=p_test,
    #                                                                     shuffle=True, show_sample=False,
    #                                                                     extras=extras, z_score=conf['z_score'])

    train_loader, val_loader, test_loader, label_weights = create_balanced_split_loaders(batch_size, seed, transform=transform,
                                                                 p_val=p_val, p_test=p_test,
                                                                 shuffle=True, show_sample=False,
                                                                 extras=extras, z_score=conf['z_score'])
    # label_weights = label_weights.to(computing_device)


    # Instantiate a BasicCNN to run on the GPU or CPU based on CUDA support
    model = BasicCNN()
    model = model.to(computing_device)
    print("Model on CUDA?", next(model.parameters()).is_cuda)

    #TODO: Define the loss criterion and instantiate the gradient descent optimizer
    # criterion = nn.MultiLabelSoftMarginLoss(weight=label_weights) #TODO - loss criteria are defined in the torch.nn package
    criterion = nn.BCELoss()

    #TODO: Instantiate the gradient descent optimizer - use Adam optimizer with default parameters
    optimizer = optim.Adam(model.parameters(), lr=learning_rate) #TODO - optimizers are defined in the torch.optim package

    # Track the loss across training
    total_loss = []
    avg_minibatch_loss = []
    val_loss_min = float('inf')

    # Begin training procedure
    for epoch in range(num_epochs):

        N = 50
        N_minibatch_loss = 0.0

        # Get the next minibatch of images, labels for training
        for minibatch_count, (images, labels) in enumerate(train_loader):

            # Put the minibatch data in CUDA Tensors and run on the GPU if supported
            images, labels = images.to(computing_device), labels.to(computing_device)

            # Zero out the stored gradient (buffer) from the previous iteration
            optimizer.zero_grad()

            # Perform the forward pass through the network and compute the loss
            outputs = model(images)
            loss = criterion(outputs, labels)
            print('training', minibatch_count, loss)
            # Automagically compute the gradients and backpropagate the loss through the network
            loss.backward()

            # Update the weights
            optimizer.step()

            # Add this iteration's loss to the total_loss
            total_loss.append(loss.item())
            N_minibatch_loss += loss

            # TODO: Implement holdout-set-validation
            if minibatch_count % val_every_n == 0:
                model.eval()
                with torch.no_grad():
                    val_loss = 0
                    for val_batch_count, (val_image, val_labels) in enumerate(val_loader, 1):
                        val_image, val_labels = val_image.to(computing_device), val_labels.to(computing_device)
                        val_outputs = model(val_image)
                        val_loss += criterion(val_outputs, val_labels)
                        print('val', val_batch_count, val_loss/val_batch_count)
                    val_loss /= (val_batch_count + 1)
                    if val_loss < val_loss_min:
                        model_name = "epoch_{}-batch_{}-{}-loss_{}.pt".format(epoch, minibatch_count, time.strftime("%Y%m%d-%H%M%S"), val_loss)
                        torch.save(model.state_dict(), os.path.join(model_path, model_name))
                        val_loss_min = val_loss

            if minibatch_count % N == 0:
                # Print the loss averaged over the last N mini-batches
                N_minibatch_loss /= N
                print('Epoch %d, average minibatch %d loss: %.3f' %
                      (epoch + 1, minibatch_count, N_minibatch_loss))

                # Add the averaged loss over N minibatches and reset the counter
                avg_minibatch_loss.append(N_minibatch_loss)
                N_minibatch_loss = 0.0

        print("Finished", epoch + 1, "epochs of training")
    print("Training complete after", epoch + 1, "epochs")

    # Begin testing
    labels_all = []
    predictions_all = []
    model.eval()
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(computing_device), labels.to(computing_device)
            labels_all.append(labels)
            output = model(images)
            predictions = output > 0.5
            predictions_all.append(predictions)

    labels = torch.cat(labels_all, 0)
    predctions = torch.cat(predictions_all, 0)

    eval = Evaluation(predctions.float(), labels)
    print(eval.accuracy())
    print(eval.accuracy().mean())
def main(model_name, model_path):

    conf = {}
    conf['z_score'] = True

    # Setup: initialize the hyperparameters/variables
    num_epochs = 5  # Number of full passes through the dataset
    batch_size = 128  # Number of samples in each minibatch
    learning_rate = 1e-5
    seed = np.random.seed(
        1)  # Seed the random number generator for reproducibility
    p_val = 0.1  # Percent of the overall dataset to reserve for validation
    p_test = 0.2  # Percent of the overall dataset to reserve for testing
    val_every_n = 100  #

    early_stop_counter = 0
    early_stop_max = 7
    is_converged = False

    # TODO: Convert to Tensor - you can later add other transformations, such as Scaling here
    transform = transforms.Compose(
        [transforms.Resize(512), transforms.ToTensor()])

    # Check if your system supports CUDA
    use_cuda = torch.cuda.is_available()

    # Setup GPU optimization if CUDA is supported
    if use_cuda:
        computing_device = torch.device("cuda")
        extras = {"num_workers": 0, "pin_memory": True}
        print("CUDA is supported")
    else:  # Otherwise, train on the CPU
        computing_device = torch.device("cpu")
        extras = False
        print("CUDA NOT supported")

    train_loader, val_loader, test_loader, _ = create_balanced_split_loaders(
        batch_size,
        seed,
        transform=transform,
        p_val=p_val,
        p_test=p_test,
        shuffle=True,
        show_sample=False,
        extras=extras,
        z_score=conf['z_score'])

    if model_name == 'intensive':
        model = IntensiveCNN()
        model = model.to(computing_device)
        model.load_state_dict(torch.load(model_path)['model_state_dict'])
    elif model_name == 'baseline':
        model = BasicCNN()
        model = model.to(computing_device)
        model.load_state_dict(torch.load(model_path))
    model.eval()

    labels_all = []
    predictions_all = []
    model.eval()
    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images, labels = images.to(computing_device), labels.to(
                computing_device)
            labels_all.append(labels)
            output = model(images)
            predictions = output > 0.5
            predictions_all.append(predictions)

    labels = torch.cat(labels_all, 0)
    predctions = torch.cat(predictions_all, 0)

    eval = Evaluation(predctions.float(), labels)
    eval.evaluate()
示例#16
0
                dpi=450,
                transparent=True)


# load utils classes
params = Params("params.json")
params.data_dir = "./data"
params.model_directory = "./output"
params.continuing_training = False
params.batchnorm = True
params.is_training = True
params.shuffle = True
params.mode = "test"

# instantiate eval class
evalutaion = Evaluation(params)

# get train ops
trainops = TrainOps(params)

# get model
model = mt.get_deep_unet(params)
'''train and save model'''
save_model_path = os.path.join(params.model_directory, "weights.hdf5")
'''Load models trained weights'''
model.load_weights(save_model_path, by_name=True, skip_mismatch=True)

# store predictions
if not os.path.exists(
        os.path.join(params.model_directory, params.mode + "_predictions")):
    os.makedirs(
示例#17
0
def home_reference_evaluate_data():
    if fk.request.method == 'POST':
        refs = [r for r in ReferenceModel.objects()]
        _ref = refs[-1]
        if _ref is None:
            return core_response(404, 'Request suggested an empty response', 'Unable to find the newest reference.')
        else:
            if fk.request.files:
                file_obj = fk.request.files['file']
                file_name = file_obj.filename
                _set, created = SetModel.objects.get_or_create(created_at=str(datetime.datetime.utcnow()))
                if created:
                    _set.filename = '{0}-{1}'.format(str(_set.id), file_name)
                    file_path = '/tmp/{0}'.format(_set.filename)
                    try:
                        with open(file_path, 'wb') as set_file:
                            set_file.write(file_obj.read())
                        wb = load_workbook(file_path, read_only=True)
                        ws = wb.active
                        pressure = {'aliq1':{'run1':[], 'run2':[]}, 'aliq2':{'run1':[], 'run2':[]}}
                        uptake = {'aliq1':{'run1':[], 'run2':[]}, 'aliq2':{'run1':[], 'run2':[]}}
                        for odx, row in enumerate(ws.rows):
                            if odx >= 2:
                                # print "--- row ---"
                                if row[0].value is not None:
                                    pressure['aliq1']['run1'].append(row[0].value)
                                if row[1].value is not None:
                                    uptake['aliq1']['run1'].append(row[1].value)
                                
                                if row[3].value is not None:
                                    pressure['aliq1']['run2'].append(row[3].value)
                                if row[4].value is not None:
                                    uptake['aliq1']['run2'].append(row[4].value)
                                

                                if row[7].value is not None:
                                    pressure['aliq2']['run1'].append(row[7].value)
                                if row[8].value is not None:
                                    uptake['aliq2']['run1'].append(row[8].value)
                                
                                if row[10].value is not None:
                                    pressure['aliq2']['run2'].append(row[10].value)
                                if row[11].value is not None:
                                    uptake['aliq2']['run2'].append(row[11].value)

                        evaluation = Evaluation(eval_id=_set.filename, reference=_ref, pressure=pressure, uptake=uptake)
                        evaluation.run()
                        # print str(evals)
                        os.remove(file_path)
                        _set.delete()
                        reslt_path = evaluation.error()
                        file_buffer = None
                        try:
                            with open(reslt_path, 'r') as _file:
                                file_buffer = StringIO(_file.read())
                            file_buffer.seek(0)
                        except:
                            print traceback.print_exc()
                        if file_buffer != None:
                            # os.remove(reslt_path)
                            return fk.send_file(file_buffer, as_attachment=True, attachment_filename=reslt_path.split('/')[1], mimetype='image/png')
                        else:
                            return core_response(404, 'Request suggested an empty response', 'Unable to return plot image.')
                    except:
                        print traceback.print_exc()
                        _set.delete()
                        print "An error occured!!"
                        return core_response(204, 'Nothing created', 'An error occured.')
                else:
                    return core_response(204, 'Already exists', 'This should normaly never happened.')
            else:
                return core_response(204, 'Nothing created', 'You must a set file.')

    return """