Пример #1
0
    def _batch_optimize(self,
                        batch_graph,
                        data_container,
                        alpha=0.5,
                        regularize=False):
        _word2idx, _idx2word, _adjacency, _ = batch_graph
        _emb_shape = data_container.emb_shape
        _vocab_size = len(_word2idx)
        _init_emb = data_container.init_emb
        _init_emb_mat = self.build_init_emb_mat(shape=(_vocab_size,
                                                       _emb_shape[0]),
                                                word2idx=_word2idx,
                                                init_emb=_init_emb)

        if self.verbose:
            print('--Before Optimization:')
            evaluate(data_container.init_emb, data_container.lexicon)

        print('Building Degree Matrix ...')
        _degree = np.zeros(shape=(_vocab_size, _vocab_size))
        for i in range(_vocab_size):
            _degree[i][i] = np.sum(np.absolute(_adjacency[i]))
            assert _degree[i][i] > 0

        print('Calculating Graph Laplacian ...')
        _reg_degree = np.linalg.matrix_power(np.sqrt(_degree), -1)
        _Laplacian = np.matmul(np.matmul(_reg_degree, _adjacency), _reg_degree)

        print('Starting Label Spreading optimization with **alpha =', alpha,
              '** ...')

        _optimized = np.matmul(
            np.linalg.matrix_power(
                np.identity(_vocab_size) - alpha * _Laplacian, -1),
            (1 - alpha) * _init_emb_mat)

        if regularize:
            optimized = np.copy(_init_emb_mat)
            for i in range(len(_optimized)):
                optimized[i] = _optimized[i] / np.linalg.norm(_optimized[i])
        else:
            optimized = _optimized

        trained_emb = {}
        for word in _word2idx:
            trained_emb[word] = optimized[_word2idx[word]]

        if self.verbose:
            print('--After Optimization:')
            evaluate(trained_emb, data_container.lexicon)

        return trained_emb
Пример #2
0
    def _batch_optimize(self,
                        batch_graph,
                        data_container,
                        alpha=0.5,
                        regularize=False):
        _word2idx, _idx2word, _adjacency, _ = batch_graph
        _emb_shape = data_container.emb_shape
        _vocab_size = len(_word2idx)
        _init_emb = data_container.init_emb
        _init_emb_mat = self.build_init_emb_mat(shape=(_vocab_size,
                                                       _emb_shape[0]),
                                                word2idx=_word2idx,
                                                init_emb=_init_emb)

        if self.verbose:
            print('--Before Optimization:')
            evaluate(data_container.init_emb, data_container.lexicon)

        print('Starting Retrofit optimization with **alpha =', alpha, '** ...')

        _num_iters = 15
        _optimized = np.copy(_init_emb_mat)
        _update = np.copy(_init_emb_mat)

        for it in range(_num_iters):
            for i in range(len(_optimized)):
                _temp = alpha * np.dot(_adjacency[i], _optimized) \
                        + (1 - alpha) * _init_emb_mat[i]

                _update[i] = _temp / ((alpha * np.sum(_adjacency[i])) +
                                      (1 - alpha))

            _optimized = _update

        if regularize:
            optimized = np.copy(_init_emb_mat)
            for i in range(len(_optimized)):
                optimized[i] = _optimized[i] / np.linalg.norm(_optimized[i])
        else:
            optimized = _optimized

        trained_emb = {}
        for word in _word2idx:
            trained_emb[word] = optimized[_word2idx[word]]

        if self.verbose:
            print('--After Optimization:')
            evaluate(trained_emb, data_container.lexicon)

        return trained_emb
Пример #3
0
def eval_(result_path='pytorch_result.mat'):
    result = scipy.io.loadmat(result_path)

    query_feature = torch.FloatTensor(result['query_f']).cuda()
    query_cam = result['query_cam'][0]
    query_label = result['query_label'][0]

    gallery_feature = torch.FloatTensor(result['gallery_f']).cuda()
    gallery_cam = result['gallery_cam'][0]
    gallery_label = result['gallery_label'][0]

    #     print(query_feature.shape)

    CMC = torch.IntTensor(len(gallery_label)).zero_()
    ap = 0.0

    for i in range(len(query_label)):
        ap_tmp, CMC_tmp = evaluate(query_feature[i], query_label[i],
                                   query_cam[i], gallery_feature,
                                   gallery_label, gallery_cam)
        if CMC_tmp[0] == -1:
            continue
        CMC = CMC + CMC_tmp
        ap += ap_tmp

    CMC = CMC.float() / len(query_label)  #average CMC
    res_string = 'Rank@1:%f Rank@5:%f Rank@10:%f mAP:%f' % (
        CMC[0], CMC[4], CMC[9], ap / len(query_label))
    print(res_string)
    return {'acc': CMC, 'mAP': ap / len(query_label)}
Пример #4
0
def main():
    print("Testing...")

    # args parsing

    args = get_arguments()
    w, h = map(int, args.input_size.split(','))
    args.input_size = (w, h)
    w, h = map(int, args.gt_size.split(','))
    args.gt_size = (w, h)

    # create result dir
    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)

    # load model
    model = Deeplabv2(num_classes=args.num_classes, backbone=args.backbone)
    saved_state_dict = torch.load(args.restore_from)
    model.load_state_dict(saved_state_dict)
    model.cuda(args.gpu_id)

    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print("Number of parameters: " + str(pytorch_total_params))

    # evaluate
    tt = time.time()
    _, avg_time = evaluate(args, args.gt_dir, args.gt_list, args.result_dir,
                           model)
    print('Time used: {} sec'.format(time.time() - tt))
    print(avg_time)
Пример #5
0
def individualKNNPrediction(similarityMatrix,
                            predictionMatrix,
                            kRange,
                            validOrTestMatrix,
                            itemBased=False):
    "Declaration for kRange = range(50,120,10)"
    #similarity = train(similarityMatrix)
    MAP10 = {}
    #Loop through the kvalues
    for kValue in kRange:
        if (itemBased == False):
            user_item_prediction_score = predict(predictionMatrix,
                                                 kValue,
                                                 similarityMatrix,
                                                 item_similarity_en=False)
        else:
            user_item_prediction_score = predict(predictionMatrix,
                                                 kValue,
                                                 similarityMatrix,
                                                 item_similarity_en=True)
        user_item_predict = prediction(user_item_prediction_score, 50,
                                       predictionMatrix)
        user_item_res = evaluate(user_item_predict, validOrTestMatrix)

        MAP10[kValue] = user_item_res.get('MAP@10')

    return MAP10
Пример #6
0
def train(dataset, x, y, rho, alpha, alpha_ban, lam2):
    num_labels = y.shape[1]
    S = cp.zeros((num_labels, num_labels))
    kf = KFold(n_splits=5, random_state=1)
    kf_metrics = []
    for train_idx, test_idx in kf.split(x):
        x_train = x[train_idx]
        y_train = y[train_idx]
        x_test = x[test_idx]
        y_test = y[test_idx]

        # get label correlation matrix
        for j in range(num_labels):
            y_j = cp.array(y_train[:, j].reshape(-1, 1))
            y_not_j = cp.array(np.delete(y_train, j, axis=1))
            S_j = ADMM(y_not_j, y_j, rho, alpha_ban)
            S[j, :] = cp.array(np.insert(np.array(S_j.tolist()), j, 0))

        # get caml parameter
        G, A, gamma, b_T = CAMEL(S, x_train, y_train, alpha, lam2)

        # evalue
        test_output, test_predict = predict(G, A, gamma, x_train, x_test, b_T,
                                            lam2)
        y_test[y_test == -1] = 0
        test_predict[test_predict == -1] = 0
        metric = evaluate(y_test, np.array(test_output.tolist()),
                          np.array(test_predict.tolist()))
        kf_metrics.append(metric)
    evaluate_mean(kf_metrics)
Пример #7
0
    def Evaluate(self, batches, eval_file=None, answer_file=None):
        print('Start evaluate...')

        with open(eval_file, 'r') as f:
            eval_file = json.load(f)

        answer_dict = {}
        remapped_dict = {}

        for batch in batches:
            batch_data = self.prepare_data(batch)
            und_passage_states, p_mask, und_ques_states, q_mask = self.encoding_forward(
                batch_data)
            logits1, logits2 = self.decoding_forward(und_passage_states,
                                                     p_mask, und_ques_states,
                                                     q_mask)
            y1, y2 = self.get_predictions(logits1, logits2)
            qa_id = batch_data['id']
            answer_dict_, remapped_dict_ = self.convert_tokens(
                eval_file, qa_id, y1, y2)
            answer_dict.update(answer_dict_)
            remapped_dict.update(remapped_dict_)
            del und_passage_states, p_mask, und_ques_states, q_mask, y1, y2, answer_dict_, remapped_dict_

        metrics = evaluate(eval_file, answer_dict)
        with open(answer_file, 'w') as f:
            json.dump(remapped_dict, f)
        print("Exact Match: {}, F1: {}".format(metrics['exact_match'],
                                               metrics['f1']))

        return metrics['exact_match'], metrics['f1']
Пример #8
0
def train_image(dataset, x_train, y_train, x_test, y_test, rho, alpha,
                alpha_ban, lam2):
    train_log_path = '../train_log/' + dataset + '/'
    log_name = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(
        time.time())) + '.log'
    num_labels = y_train.shape[1]
    S = cp.zeros((num_labels, num_labels))

    # get label correlation matrix
    for j in range(num_labels):
        y_j = cp.array(y_train[:, j].reshape(-1, 1))
        y_not_j = cp.array(np.delete(y_train, j, axis=1))
        S_j = ADMM(y_not_j, y_j, rho, alpha_ban)
        S[:, j] = cp.array(np.insert(np.array(S_j.tolist()), j, 0))

    # get caml parameter
    G, A, gamma, b_T = CAMEL(S, x_train, y_train, alpha, lam2)

    # evalue
    output, test_predict = predict(G, A, gamma, x_train, x_test, b_T, lam2)
    test_predict[test_predict == -1] = 0
    y_test[y_test == -1] = 0
    metrics = evaluate(y_test, np.array(output.tolist()),
                       np.array(test_predict.tolist()))
    print(metrics)
Пример #9
0
    def run_evaluate(self, sess, val_set, lr_schedule=None, path_results=None):
        """
        Performs an epoch of evaluation

        Args:
            sess: (tf.Session)
            val_set: Dataset instance
            lr_schedule: (instance of Lr schedule) optional
            path_results: (string) where to write the results
        Returns:
            bleu score: 
            exact match score: 
        """
        vocab = load_vocab(self.config.path_vocab)
        rev_vocab = {idx: word for word, idx in vocab.iteritems()}

        references, hypotheses = [], []
        n_words, ce_words = 0, 0 # for perplexity, sum of ce for all words + nb of words
        
        for img, formula in minibatches(val_set, self.config.batch_size):
            fd = self.get_feed_dict(img, training=False, formula=formula, dropout=1)
            ce_words_eval, n_words_eval, ids_eval = sess.run(
                    [self.ce_words, self.n_words, self.pred_test.ids], feed_dict=fd)

            if self.config.decoding == "greedy":
                ids_eval = np.expand_dims(ids_eval, axis=1)
                
            elif self.config.decoding == "beam_search":
                ids_eval = np.transpose(ids_eval, [0, 2, 1])

            n_words += n_words_eval
            ce_words += ce_words_eval
            for form, pred in zip(formula, ids_eval):
                # pred is of shape (number of hypotheses, time)
                references.append([form])
                hypotheses.append(pred)


        if path_results is None:
            path_results = self.config.path_results

        scores = evaluate(references, hypotheses, rev_vocab, 
                            path_results, self.config.id_END)

        ce_mean = ce_words / float(n_words)
        scores["perplexity"] = np.exp(ce_mean)

        if lr_schedule is not None:
            lr_schedule.update(score=scores["perplexity"])

        return scores
Пример #10
0
    def evaluate(self, filename):
        image_path = os.path.expanduser(
            os.path.join('~', 'oxdnn', 'temp', filename))

        logger.info('Received evaluation request for {}'.format(image_path))

        evaluation = evaluate(image_path, model, return_image=True)

        plt.imsave(
            # overwrite original
            image_path,
            evaluation['image'])

        logger.info('Saved annotated image at {}'.format(image_path))

        return {'outimage': image_path, 'classes': evaluation['classes']}
Пример #11
0
def KNNPrediction(similarityMatrix,
                  predictionMatrix,
                  kValue,
                  validOrTestMatrix,
                  itemBased=False):

    if (itemBased == False):
        user_item_prediction_score = predict(predictionMatrix,
                                             kValue,
                                             similarityMatrix,
                                             item_similarity_en=False)
    else:
        user_item_prediction_score = predict(predictionMatrix,
                                             kValue,
                                             similarityMatrix,
                                             item_similarity_en=True)
    user_item_predict = prediction(user_item_prediction_score, 50,
                                   predictionMatrix)
    user_item_res = evaluate(user_item_predict, validOrTestMatrix)

    return user_item_res.get('MAP@10')
Пример #12
0
def test_model(folder: str, trainer, model, normal, dataloaders, device):

    save_path = os.path.join(folder, 'best_model.pkl')
    save_dict = torch.load(save_path)
    # model.load_state_dict(save_dict['model_state_dict'])

    # model.eval()
    steps, predictions, running_targets = 0, list(), list()
    tqdm_loader = tqdm(enumerate(dataloaders['test']))
    for step, (inputs, targets) in tqdm_loader:
        running_targets.append(targets.numpy())

        with torch.no_grad():
            inputs = inputs.to(device)
            targets = targets.to(device)
            outputs, loss = trainer.train(inputs, targets, 'test')
            predictions.append(outputs.cpu().numpy())

    running_targets, predictions = np.concatenate(
        running_targets, axis=0), np.concatenate(predictions, axis=0)

    scores = evaluate(running_targets, predictions, normal)

    print('test results:')
    print(json.dumps(scores, cls=MyEncoder, indent=4))
    with open(os.path.join(folder, 'test-scores.json'), 'w+') as f:
        json.dump(scores, f, cls=MyEncoder, indent=4)
    if trainer.model.graph0 is not None:
        np.save(os.path.join(folder, 'graph0'),
                trainer.model.graph0.detach().cpu().numpy())
        np.save(os.path.join(folder, 'graph1'),
                trainer.model.graph1.detach().cpu().numpy())
        np.save(os.path.join(folder, 'graph2'),
                trainer.model.graph2.detach().cpu().numpy())

    np.savez(os.path.join(folder, 'test-results.npz'),
             predictions=predictions,
             targets=running_targets)
Пример #13
0
 def evaluate(self):
     self.load_evaluating_data()
     self.model.eval()
     all_results = []
     logging.info("Start evaluating")
     for input_ids, segment_ids, input_mask, example_indices, _, _ in tqdm(
             self.eval_dataloader, desc="Evaluating", ascii=True,
             ncols=100):
         input_ids = input_ids.to(self.device)
         segment_ids = segment_ids.to(self.device)
         input_mask = input_mask.to(self.device)
         with torch.no_grad():
             batch_start_logits, batch_end_logits = self.model(
                 input_ids, segment_ids, input_mask)
         for i, example_index in enumerate(example_indices):
             start_logits = batch_start_logits[i].detach().cpu().tolist()
             end_logits = batch_end_logits[i].detach().cpu().tolist()
             eval_feature = self.eval_features[example_index.item()]
             unique_id = int(eval_feature.unique_id)
             all_results.append(
                 RawResult(unique_id=unique_id,
                           start_logits=start_logits,
                           end_logits=end_logits))
     if not os.path.exists(self.args.output_dir):
         os.makedirs(self.args.output_dir)
     output_prediction_file = os.path.join(self.args.output_dir,
                                           "predictions_eval.json")
     write_predictions(self.eval_examples, self.eval_features, all_results,
                       self.args.n_best_size, self.args.max_answer_length,
                       output_prediction_file, self.args.verbose_logging)
     with open(os.path.splitext(self.args.eval_file)[0] +
               '.json') as dataset_file:
         dataset = json.load(dataset_file)['data']
     with open(output_prediction_file) as prediction_file:
         predictions = json.load(prediction_file)
     logging.info(json.dumps(evaluate(dataset, predictions)))
Пример #14
0
    callbacks = []
    callbacks.append(
        EarlyStopping('val_loss',
                      patience=patience,
                      verbose=0,
                      mode='auto',
                      restore_best_weights=True))

    new_iter_path = Path(args.out_path, str(iter + 1))
    if not new_iter_path.exists():
        new_iter_path.mkdir()
    model, history = train(model,
                           train_path_patches.parent,
                           args.val_path,
                           new_iter_path,
                           batch_size=batch_size,
                           patch_size=patch_size,
                           callbacks=callbacks)

    if args.test_path and Path(args.test_path).exists():
        test_path = Path(args.test_path)
        predict_bayes(model,
                      Path(test_path, 'images'),
                      Path(new_iter_path, 'eval'),
                      batch_size=batch_size,
                      patch_size=patch_size,
                      cutoff=cutoff)
        evaluate(Path(test_path, 'masks'), Path(new_iter_path, 'eval'))
    img_path = Path(iter_path, 'unlabeled')
    iter_path = new_iter_path
Пример #15
0
                start_time = time.time()
                loss, global_step, lr = model.train_one_batch(*zip(*batch))
                step_time += (time.time() - start_time)
                checkpoint_loss += loss

                if global_step % stats_per_step == 0:
                    print(
                        "# epoch - {3}, global step - {0}, step time - {1}, loss - {2}, lr - {4}"
                        .format(global_step, step_time / stats_per_step,
                                checkpoint_loss / stats_per_step, epoch, lr))
                    checkpoint_loss = 0.0
                    step_time = 0.0

                if global_step % (5 * stats_per_eval) == 0:
                    print("Eval train data")
                    train_f1 = evaluate(model, "train", train_data_batch,
                                        word_vocab, tag_vocab)

                if global_step % stats_per_eval == 0 or global_step == config.num_train_steps:
                    words, length, segments, target = batch[0]

                    decode, _ = model.inference([words], [length], [segments])
                    print("Sentence:")
                    print(" ".join(word_vocab[w] for w in words[:length]))
                    print("Gold:")
                    print(" ".join([tag_vocab[t] for t in target[:length]]))
                    print("Predict:")
                    print(" ".join([tag_vocab[p] for p in decode[0][:length]]))

                    print("Eval dev data")
                    dev_f1 = evaluate(model, "dev", dev_data_batch, word_vocab,
                                      tag_vocab)
Пример #16
0
                              preprocessor=preprocessor,
                              second_stage=True)

        # predict 1st Stage
        if not args.no_predict:
            print("2nd stage prediction")
            predict(model_1st,
                    Path(test_path, 'images'),
                    Path(out_path, '1stStage'),
                    batch_size=batch_size,
                    patch_size=patch_size,
                    preprocessor=preprocessor,
                    cutoff=cutoff_1st,
                    mc_iterations=args.mc_iterations)
        if not args.no_evaluate:
            evaluate.evaluate(Path(test_path, 'masks'),
                              Path(out_path, '1stStage'))

            # predict 2ndStage
            predict(model,
                    Path(test_path, 'images'),
                    out_path,
                    uncert_path=Path(out_path, '1stStage/uncertainty'),
                    batch_size=batch_size,
                    patch_size=patch_size,
                    preprocessor=preprocessor,
                    cutoff=cutoff,
                    mc_iterations=args.mc_iterations)
            if not args.no_evaluate:
                evaluate.evaluate(Path(test_path, 'masks'), out_path)

    # single stage mode
Пример #17
0
def searchThreshold(domain, model_pb, threshold_dir,
                    test_file, tag_file, vocab_file):
    """
    用划分好的测试集取搜索最优的阈值,精度0.1,再低会过拟合,最好使用交叉验证来做
    由于交叉验证bert代价很大,就没做
    :param domain: 数据集类别,divorce、labor、loan
    :param model_pb: pb模型文件
    :param threshold_dir: 阈值搜索结果json文件存放地址
    :param test_file: 用来搜索阈值的测试文件
    :param tag_file: 标签tags文件
    :param vocab_file: bert模型词典文件
    :return: 将搜索的阈值存入threshold_dir,命名为threshold.json
                将搜索过程记录在search.json
    """
    thresholds = []
    for i in range(1, 10):
        thresholds.append(round(i * 0.1, 1))

    all_sentences, all_labels = load_file(test_file)

    logging.info("———— 开始加载模型 ————\n")
    model = BERTModel(task=domain, pb_model=model_pb, tagDir=tag_file, threshold=None, vocab_file=vocab_file)
    logging.info("———— 模型加载结束 ————\n")
    logging.info("———— 开始生成预测概率metric ————\n")
    probas = model.getProbs(all_sentences)
    logging.info("———— 预测概率metric生成结束 ————\n")

    result = {}
    result["domain"] = domain
    result["label_score"] = []
    logging.info("———— 开始搜索 %s 的最优阈值 ————\n" % domain)
    best_threshold = [0.5] * 20
    threshold_init = [0.5] * 20
    for i in range(20):
        best_score = 0
        label_result = {}
        scoreOfthreshold = {}
        label_result["label"] = i
        for j in range(len(best_threshold)):
            threshold_init[j] = best_threshold[j]
        ##遍历一开始初始化的候选阈值列表,0.1--0.9的九个候选阈值
        for threshold in thresholds:
            threshold_init[i] = threshold
            predicts = getPreLab(probas, model.id2label, threshold_init)
            score, f1 = evaluate(predict_labels=predicts, target_labels=all_labels, tag_dir=tag_file)
            scoreOfthreshold[threshold] = score
            if score > best_score:
                best_threshold[i] = threshold
                best_score = score
        label_result["score"] = scoreOfthreshold
        result["label_score"].append(label_result)
        logging.info(best_threshold)
        logging.info(label_result)
        logging.info("\n")
    result["best_threshold"] = best_threshold
    logging.info("搜索出来的阈值: %s \n" % best_threshold)
    logging.info("————开始将结果写入文件————\n")
    if not os.path.exists(threshold_dir):
        os.makedirs(threshold_dir)
    threshold_file = os.path.join(threshold_dir, "threshold.json")
    search_file = os.path.join(threshold_dir, "search.json")

    ouf_t = open(threshold_file, "w", encoding="utf-8")
    ouf_s = open(search_file, "w", encoding="utf-8")
    json.dump(best_threshold, ouf_t, ensure_ascii=False)
    json.dump(result, ouf_s, ensure_ascii=False)
    ouf_s.close()
    ouf_t.close()
Пример #18
0
def predict(ACTIVATION='ReLU',
            dropout=0.1,
            batch_size=32,
            repeat=4,
            minimum_kernel=32,
            epochs=200,
            iteration=3,
            crop_size=128,
            stride_size=3,
            DATASET='DRIVE'):
    prepare_dataset.prepareDataset(DATASET)
    test_data = [
        prepare_dataset.getTestData(0, DATASET),
        prepare_dataset.getTestData(1, DATASET),
        prepare_dataset.getTestData(2, DATASET)
    ]

    IMAGE_SIZE = None
    if DATASET == 'DRIVE':
        IMAGE_SIZE = (565, 584)
    elif DATASET == 'CHASEDB1':
        IMAGE_SIZE = (999, 960)
    elif DATASET == 'STARE':
        IMAGE_SIZE = (700, 605)

    gt_list_out = {}
    pred_list_out = {}
    for out_id in range(iteration + 1):
        try:
            os.makedirs(
                f"./output/{DATASET}/crop_size_{crop_size}/out{out_id + 1}/",
                exist_ok=True)
            gt_list_out.update({f"out{out_id + 1}": []})
            pred_list_out.update({f"out{out_id + 1}": []})
        except:
            pass

    activation = globals()[ACTIVATION]
    model = define_model.get_unet(minimum_kernel=minimum_kernel,
                                  do=dropout,
                                  activation=activation,
                                  iteration=iteration)
    model_name = f"Final_Emer_Iteration_{iteration}_cropsize_{crop_size}_epochs_{epochs}"
    print("Model : %s" % model_name)
    load_path = f"trained_model/{DATASET}/{model_name}.hdf5"
    model.load_weights(load_path, by_name=False)

    imgs = test_data[0]
    segs = test_data[1]
    masks = test_data[2]

    for i in tqdm(range(len(imgs))):

        img = imgs[i]
        seg = segs[i]
        if masks:
            mask = masks[i]

        patches_pred, new_height, new_width, adjustImg = crop_prediction.get_test_patches(
            img, crop_size, stride_size)
        preds = model.predict(patches_pred)

        out_id = 0
        for pred in preds:
            pred_patches = crop_prediction.pred_to_patches(
                pred, crop_size, stride_size)
            pred_imgs = crop_prediction.recompone_overlap(
                pred_patches, crop_size, stride_size, new_height, new_width)
            pred_imgs = pred_imgs[:, 0:prepare_dataset.DESIRED_DATA_SHAPE[0],
                                  0:prepare_dataset.DESIRED_DATA_SHAPE[0], :]
            probResult = pred_imgs[0, :, :, 0]
            pred_ = probResult
            with open(
                    f"./output/{DATASET}/crop_size_{crop_size}/out{out_id + 1}/{i + 1:02}.pickle",
                    'wb') as handle:
                pickle.dump(pred_, handle, protocol=pickle.HIGHEST_PROTOCOL)
            pred_ = resize(pred_, IMAGE_SIZE[::-1])
            if masks:
                mask_ = mask
                mask_ = resize(mask_, IMAGE_SIZE[::-1])
            seg_ = seg
            seg_ = resize(seg_, IMAGE_SIZE[::-1])
            gt_ = (seg_ > 0.5).astype(int)
            gt_flat = []
            pred_flat = []
            for p in range(pred_.shape[0]):
                for q in range(pred_.shape[1]):
                    if not masks or mask_[
                            p, q] > 0.5:  # Inside the mask pixels only
                        gt_flat.append(gt_[p, q])
                        pred_flat.append(pred_[p, q])

            gt_list_out[f"out{out_id + 1}"] += gt_flat
            pred_list_out[f"out{out_id + 1}"] += pred_flat

            pred_ = 255. * (pred_ - np.min(pred_)) / (np.max(pred_) -
                                                      np.min(pred_))
            cv2.imwrite(
                f"./output/{DATASET}/crop_size_{crop_size}/out{out_id + 1}/{i + 1:02}.png",
                pred_)
            out_id += 1

    for out_id in range(iteration + 1)[-1:]:
        print('\n\n', f"out{out_id + 1}")
        evaluate(gt_list_out[f"out{out_id + 1}"],
                 pred_list_out[f"out{out_id + 1}"], DATASET)
Пример #19
0
def main():

    print(args)
    # =====extract data and build network=====
    if args.interpolated == "interpolated":
        traj_filename = "data/expert_trajs/{0}/{1}/traj_interpolated.pkl".format(
            args.memo, args.scenario)
    elif args.interpolated == "sparse":
        traj_filename = "data/expert_trajs/{0}/{1}/traj_sparse.pkl".format(
            args.memo, args.scenario)
    else:
        traj_filename = "data/expert_trajs/{0}/{1}/traj_sample.pkl".format(
            args.memo, args.scenario)

    print("Loading {}".format(traj_filename))
    traj_exp_df = pd.read_pickle(traj_filename)
    # list_x_name = ['interval', 'speed', 'pos_in_lane', 'lane_max_speed', 'if_exit_lane', 'dist_to_signal', 'phase',
    #                'if_leader', 'leader_speed', 'dist_to_leader']
    list_x_name = ['speed', 'if_leader', 'leader_speed', 'dist_to_leader']
    list_y_name = ['action']
    x_array = traj_exp_df[list_x_name].values
    y_array = traj_exp_df[list_y_name].values

    x = torch.from_numpy(x_array.astype(np.float))
    y = torch.from_numpy(y_array.astype(np.float))

    full_dataset = TensorDataset(x, y)

    dataloaders = {}
    dataloaders['train'] = DataLoader(dataset=full_dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=0)

    # build policy network

    net = FCNetwork(len(list_x_name), len(list_y_name), args.hidden_size)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(net.parameters(), args.lr)

    # =====training=====

    model_train(args, dataloaders, optimizer, net, criterion)

    # =====testing=====

    # load trained policy network
    print('Testing...')
    print("Loading model {}".format(
        os.path.join(args.data_dir, args.memo, args.scenario,
                     args.model_name)))
    net = torch.load(
        os.path.join(args.data_dir, args.memo, args.scenario, args.model_name))
    net.eval()

    gen_traj_with_policy(args, policy_model=net)

    print('Calculating RMSE...')

    path_to_len_lane = "data/{0}/roadnet_len.json".format(args.scenario)
    f = open(path_to_len_lane, encoding='utf-8')
    len_lane = json.load(f)

    evaluate(path_exp_traj="data/expert_trajs/{0}/{1}/traj_raw.pkl".format(
        args.memo, args.scenario),
             path_lrn_traj=os.path.join(args.data_dir, args.memo,
                                        args.scenario, 'traj_lrn_raw.pkl'),
             len_lane=len_lane,
             max_episode_len=args.max_episode_len)
    evaluate(path_exp_traj="data/expert_trajs/{0}/{1}/traj_sparse.pkl".format(
        args.memo, args.scenario),
             path_lrn_traj=os.path.join(args.data_dir, args.memo,
                                        args.scenario, 'traj_lrn_raw.pkl'),
             len_lane=len_lane,
             max_episode_len=args.max_episode_len,
             sparse=True)

    print('Done!')
def AlgoTrainPredict(name,
                     algorithm,
                     algo_particuliers,
                     X_train,
                     X_test,
                     y_test,
                     config,
                     scale="MinMax",
                     var1=10,
                     var2=20):
    if (name == "KNN") | (name == "ABOD") | (name == "HBOS"):
        algorithm.fit(X_train)
        y_pred = algorithm.predict(X_test)
        y_pred[y_pred == 0] = -1  #normal
        print('---------' + name + '-----------')
        eval_ = evaluate(y_test, y_pred)
        print(eval_)
        evaluation_detection(X_test, y_test, y_pred, var1, var2)
    if name == "Local Outlier Factor":
        algorithm.fit(X_train)
        y_pred = algorithm.fit_predict(X_test)
        y_pred = y_pred * -1  #outlier = 1
        print('---------' + name + '-----------')
        eval_ = evaluate(y_test, y_pred)
        print(eval_)
        evaluation_detection(X_test, y_test, y_pred, var1, var2)
    if name == "Deep MLP":
        # scale data here
        if scale == "StandardScaler":
            scaler = StandardScaler()
        else:
            scaler = MinMaxScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        algorithm.fit(X_train_scaled,
                      X_train_scaled,
                      epochs=config["nb_epoch"],
                      batch_size=config["batch_size"],
                      shuffle=True,
                      validation_split=0.33,
                      verbose=0)
        y_pred = deep_predict(algorithm, X_test_scaled, config["outlier_prop"],
                              y_test)  #outlier = 1
        y_pred = y_pred * -1
        print('---------' + name + '-----------')
        eval_ = evaluate(y_test, y_pred)
        print(eval_)
        evaluation_detection(X_test_scaled, y_test, y_pred, var1, var2)

    if name == "Robust covariance":
        print("EllipticEnvelope")
        algorithm.fit(X_train)
        y_pred = -algorithm.predict(X_test)  #outlier = 1
        if np.sum(y_pred == -1) < np.sum(y_pred == 1):
            print("adjust outlier definition")
            y_pred = -y_pred
        print('---------' + name + '-----------')
        eval_ = evaluate(y_test, y_pred)
        print(eval_)
        evaluation_detection(X_test, y_test, y_pred, var1, var2)

    if name not in algo_particuliers:
        algorithm.fit(X_train)
        y_pred = -algorithm.predict(X_test)  #outlier = 1
        print('---------' + name + '-----------')
        eval_ = evaluate(y_test, y_pred)
        print(eval_)
        evaluation_detection(X_test, y_test, y_pred, var1, var2)

    return y_pred, eval_
Пример #21
0
def hcoh(train_data,
         train_targets,
         query_data,
         query_targets,
         database_data,
         database_targets,
         code_length,
         lr,
         num_hadamard,
         device,
         topk,
         ):
    """HCOH algorithm

    Parameters
        train_data: Tensor
        Training data

        train_targets: Tensor
        Training targets

        query_data: Tensor
        Query data

        query_targets: Tensor
        Query targets

        Database_data: Tensor
        Database data

        Database_targets: Tensor
        Database targets

        code_length: int
        Hash code length

        lr: float
        Learning rate

        num_hadamard: int
        Number of hadamard codebook columns

        device: str
        Using cpu or gpu

        topk: int
        Compute mAP using top k retrieval result

    Returns
        meanAP: float
        mean Average precision
    """
    # Construct hadamard codebook
    hadamard_codebook = torch.from_numpy(hadamard(num_hadamard)).float().to(device)
    hadamard_codebook = hadamard_codebook[torch.randperm(num_hadamard), :]

    # Initialize
    num_train, num_features = train_data.shape
    W = torch.randn(num_features, code_length).to(device)

    # Matrix normalazation
    W = W / torch.diag(torch.sqrt(W.t() @ W)).t().expand(num_features, code_length)
    if code_length == num_hadamard:
        W_prime = torch.eye(num_hadamard).to(device)
    else:
        W_prime = torch.randn(num_hadamard, code_length).to(device)
        W_prime = W_prime / torch.diag(torch.sqrt(W_prime.t() @ W_prime)).t().expand(num_hadamard, code_length)

    # Train
    for i in range(train_data.shape[0]):
        data = train_data[i, :].reshape(1, -1)
        lsh_x = (hadamard_codebook[train_targets[i], :].view(1, -1) @ W_prime).sign()
        tanh_x = torch.tanh(data @ W)
        dW = data.t() @ ((tanh_x - lsh_x) * (1 - tanh_x * tanh_x))

        W = W - lr * dW

    # Evaluate
    # Generate hash code
    query_code = generate_code(query_data, W)
    database_code = generate_code(database_data, W)

    # Compute map
    mAP, precision = evaluate(
        query_code,
        database_code,
        query_targets,
        database_targets,
        device,
        topk,
    )

    return mAP, precision
Пример #22
0
def generate_result(evaluating=False, use_n_subjects='pred', senti_url=None):
    """ generate result

    Args:
        evaluating: evaluating use preliminary data
        use_n_subjects: use n_subjects info, 'gold', 'pred' or 'one'
        url to sentiment result

    """
    train_url = from_project_root("data/train_2_ex.csv")
    test_url = from_project_root("data/test_public_2v3_ex.csv")
    senti = None
    if evaluating:
        train_url = from_project_root("data/preliminary/train_ex.csv")
        test_url = from_project_root("data/preliminary/test_gold_ex.csv")
        senti = joblib.load(senti_url) if senti_url else None

    X, y, X_test = generate_vectors(train_url,
                                    test_url,
                                    column='article',
                                    max_n=3,
                                    min_df=3,
                                    max_df=0.8,
                                    max_features=20000,
                                    trans_type='hashing',
                                    sublinear_tf=True,
                                    balanced=True,
                                    multilabel_out=False,
                                    label_col='subjects',
                                    only_single=True,
                                    shuffle=True)
    X, y, X_test = joblib.load(
        from_project_root('data/vector/stacked_one_XyX_val_32_subjects.pk'))

    clf = LinearSVC()
    clf.fit(X, y)
    # pred, probas = clf.predict(X_test), predict_proba(clf, X_test)
    pred, probas = gen_10bi_result(train_url,
                                   test_url,
                                   validating=True,
                                   evaluating=True)
    # pred, probas = gen_multi_result(X, y, X_test)
    result_df = pd.DataFrame(
        columns=["content_id", "subject", "sentiment_value", "sentiment_word"])
    cids = pd.read_csv(test_url, usecols=['content_id']).values.ravel()
    for i, cid in enumerate(cids):
        k = 1
        if use_n_subjects == 'gold':
            cid_list = pd.read_csv(
                from_project_root(
                    'data/submit_example_2.csv'))['content_id'].tolist()
            k = cid_list.count(cid)
        elif use_n_subjects == 'pred':
            k = max(1, pred[i].sum())
        for j in probas[i].argsort()[-k:]:
            senti_val = 0 if senti is None else senti[i]
            result_df = result_df.append(
                {
                    'content_id': cid,
                    'subject': id2sub(j),
                    'sentiment_value': senti_val
                },
                ignore_index=True)

    save_url = from_project_root('data/result/tmp.csv')
    result_df.to_csv(save_url, index=False)
    if evaluating:
        y_true = pd.read_csv(test_url, usecols=list(map(str,
                                                        range(10)))).values < 2
        calc_metrics(y_true, pred, probas)
        print("metrics on %s subjects: " % use_n_subjects)
        evaluate(save_url, use_senti=False)
        evaluate(save_url, use_senti=True)
Пример #23
0
def DCMF(g1,g2,original_to_new,layer=3,q=0.2,alpha=5,c=0.5):

    Node1 = nx.number_of_nodes(g1)
    Node2 = nx.number_of_nodes(g2)
    attribute = None
    adj1 = nx.to_numpy_array(g1, nodelist=list(range(Node1)))
    adj2 = nx.to_numpy_array(g2, nodelist=list(range(Node2)))

    G1_degree_dict = cal_degree_dict(list(g1.nodes()), g1, layer)
    G2_degree_dict = cal_degree_dict(list(g2.nodes()), g2, layer)
    struc_neighbor1, struc_neighbor2, struc_neighbor_sim1, struc_neighbor_sim2, \
    struc_neighbor_sim1_score, struc_neighbor_sim2_score = \
        structing(layer, g1, g2, G1_degree_dict, G2_degree_dict, attribute, alpha, c, 10)

    # 构造转移矩阵
    P_TRANS1 = np.zeros((Node1 + Node2, Node1 + Node2))
    P_TRANS2 = np.zeros((Node1 + Node2, Node1 + Node2))
    D1 = np.sum(adj1, axis=1).reshape(-1, 1)
    D2 = np.sum(adj2, axis=1).reshape(-1, 1)
    adj1_hat = adj1 / D1
    adj2_hat = adj2 / D2

    for edge in g1.edges():
        P_TRANS1[edge[0], edge[1]] = adj1_hat[edge[0], edge[1]]
        P_TRANS1[edge[1], edge[0]] = adj1_hat[edge[1], edge[0]]
    for edge in g2.edges():
        P_TRANS1[edge[0] + Node1, edge[1] + Node1] = adj2_hat[edge[0], edge[1]]
        P_TRANS1[edge[1] + Node1, edge[0] + Node1] = adj2_hat[edge[1], edge[0]]
    for key in struc_neighbor_sim1.keys():

        for index, neighbor in enumerate(struc_neighbor1[key]):
            P_TRANS2[key, neighbor + Node1] = struc_neighbor_sim1[key][index]

    for key in struc_neighbor_sim2.keys():

        for index, neighbor in enumerate(struc_neighbor2[key]):
            P_TRANS2[key + Node1, neighbor] = struc_neighbor_sim2[key][index]

    cross_switch_alpha = np.zeros_like(P_TRANS2)
    for key in struc_neighbor_sim1.keys():
        if struc_neighbor_sim1[key][0] - struc_neighbor_sim1[key][1] >= 0.15:
            new_q = min(struc_neighbor_sim1[key][0] + q, 1)
        else:
            new_q = q
        cross_switch_alpha[key, :Node1] = 1 - new_q
        cross_switch_alpha[key, Node1:] = new_q

    for key in struc_neighbor_sim2.keys():
        if struc_neighbor_sim2[key][0] - struc_neighbor_sim2[key][1] >= 0.15:
            new_q = min(struc_neighbor_sim2[key][0] + q, 1)
        else:
            new_q = q
        cross_switch_alpha[key + Node1, :Node1] = new_q
        cross_switch_alpha[key + Node1, Node1:] = 1 - new_q

    P_TRANS = (P_TRANS1 + P_TRANS2) * cross_switch_alpha

    #
    P_TRANS = np.maximum(P_TRANS, P_TRANS.T)
    # P_TRANS = (P_TRANS + P_TRANS.T)/2.0

    P_TRANS = P_TRANS / np.sum(P_TRANS, axis=1, keepdims=True)
    # 计算平稳分布:
    tmp = (P_TRANS.sum(axis=0) / np.sum(P_TRANS)).tolist()

    D_R = np.zeros_like(P_TRANS)
    Node = Node1 + Node2
    # 初始采样概率,应该按照
    for i in range(len(tmp)):
        D_R[i, i] = 1 / tmp[i]

    # windows sizw小于5
    M = get_random_walk_matrix(P_TRANS, D_R, 20, 5)

    print("M计算完成")
    # 对于cora
    res = svd_deepwalk_matrix(M, dim=64)

    print("得到向量")
    e1, e2 = split_embedding(res, Node1)
    sim = cosine_similarity(e1, e2)
    acc = evaluate(sim, ans_dict=original_to_new)
    print(acc)
Пример #24
0
def _evaluate(eval_fn, input_fn, path, config, save_path):
    graph = tf.Graph()
    with graph.as_default():
        features = input_fn()
        prediction = eval_fn(features)
        results = {
            'prediction': prediction,
            'input_img': features['input_img'],
            'lens': features['lens'],
            'cnts': features['cnts'],
            'care': features['care']
        }
        sess_creator = tf.train.ChiefSessionCreator(checkpoint_dir=path,
                                                    config=config)

        recall_sum, gt_n_sum, precise_sum, pred_n_sum = 0, 0, 0, 0
        with tf.train.MonitoredSession(session_creator=sess_creator) as sess:
            tf.logging.info('start evaluation')
            time = 0
            while not sess.should_stop():
                time += 1
                print('time', time)
                outputs = sess.run(results)
                img = outputs['input_img']
                prediction = outputs['prediction']
                lens = outputs['lens']
                cnts = outputs['cnts']
                cnts = [(x / 2).astype(np.int32) for x in cnts]
                cnts = _depad(cnts, lens)
                care = outputs['care']
                # imname = outputs['imname']
                # print(imname)
                for i in range(img.shape[0]):
                    re_cnts = reconstruct(img[i], prediction[i])
                    TR, TP, T_gt_n, T_pred_n, PR, PP, P_gt_n, P_pred_n = \
                        evaluate(img[i],cnts,re_cnts,care)
                    tf.logging.info(' recall: ' + str(TR) + '; precise: ' +
                                    str(TP))
                    recall_sum += TR * T_gt_n
                    precise_sum += TP * T_pred_n
                    gt_n_sum += T_gt_n
                    pred_n_sum += T_pred_n

                    height, width = prediction.shape[1], prediction.shape[2]
                    imgoutput = np.zeros(shape=(height * 2, width * 2, 3),
                                         dtype=np.uint8)
                    imgoutput[0:height, width:width * 2, :] = cv2.resize(
                        img[0], (width, height))
                    imgoutput[height:height * 2, width:width *
                              2, :] = (_softmax(prediction[i, :, :, 0:2]) *
                                       255).astype(np.uint8)
                    cv2.drawContours(imgoutput, cnts, -1, (0, 0, 255))
                    cv2.drawContours(imgoutput, re_cnts, -1, (0, 255, 0))
                    cv2.imwrite(
                        os.path.join(save_path,
                                     'output_{:03d}.png'.format(time)),
                        imgoutput)

        if int(gt_n_sum) != 0:
            ave_r = recall_sum / gt_n_sum
        else:
            ave_r = 0.0
        if int(pred_n_sum) != 0:
            ave_p = precise_sum / pred_n_sum
        else:
            ave_p = 0.0
        if ave_r != 0.0 and ave_p != 0.0:
            ave_f = 2 / (1 / ave_r + 1 / ave_p)
        else:
            ave_f = 0.0
        tf.logging.info('ave recall:{}, precise:{}, f:{}'.format(
            ave_r, ave_p, ave_f))
        tf.logging.info('end evaluation')
        return ave_f
Пример #25
0
def main():
    """Create the model and start the training."""
    print("NUMBER OF CLASSES: ", str(args.num_classes))

    w, h = map(int, args.input_size.split(','))
    args.input_size = (w, h)

    w, h = map(int, args.crop_size.split(','))
    args.crop_size = (h, w)

    w, h = map(int, args.gt_size.split(','))
    args.gt_size = (w, h)

    cudnn.enabled = True
    cudnn.benchmark = True
    
    # create result dir
    if not os.path.exists(args.result_dir):
        os.makedirs(args.result_dir)

    str_ids = args.gpu_ids.split(',')
    gpu_ids = []
    for str_id in str_ids:
        gid = int(str_id)
        if gid >=0:
            gpu_ids.append(gid)

    num_gpu = len(gpu_ids)
    args.multi_gpu = False
    if num_gpu>1:
        args.multi_gpu = True
        Trainer = AD_Trainer(args)
    else:
        Trainer = AD_Trainer(args)
        
    TARGET_IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32)
    
    trainloader = data.DataLoader(
        cityscapesDataSet(args.data_dir, args.data_list,
                max_iters=args.num_steps * args.batch_size,
                resize_size=args.input_size,
                crop_size=args.crop_size,
                set=args.set, scale=False, mirror=args.random_mirror, mean=TARGET_IMG_MEAN, autoaug = args.autoaug_target, source_domain=args.source_domain),
    batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True)
    trainloader_iter = enumerate(trainloader)
    '''
    trainloader = data.DataLoader(
        gta5DataSet(args.data_dir, args.data_list,
                    max_iters=args.num_steps * args.batch_size,
                    resize_size=args.input_size,
                    crop_size=args.crop_size,
                    scale=True, mirror=True, mean=TARGET_IMG_MEAN, autoaug = args.autoaug),
    batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True)
    trainloader_iter = enumerate(trainloader)
    
    trainloader = data.DataLoader(
        synthiaDataSet(args.data_dir, args.data_list,
                    max_iters=args.num_steps * args.batch_size,
                    resize_size=args.input_size,
                    crop_size=args.crop_size,
                    scale=True, mirror=True, mean=TARGET_IMG_MEAN, autoaug = args.autoaug),
    batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True)
    trainloader_iter = enumerate(trainloader)'''
    
    # set up tensor board
    if args.tensorboard:
        args.log_dir += '/'+ os.path.basename(args.snapshot_dir)
        if not os.path.exists(args.log_dir):
            os.makedirs(args.log_dir)

        writer = SummaryWriter(args.log_dir)
    
    # load mIOU
    best_mIoUs = args.mIOU

    for i_iter in range(args.num_steps):

        loss_seg_value = 0

        adjust_learning_rate(Trainer.gen_opt , i_iter, args)

        _, batch = trainloader_iter.__next__()
        images, labels, _, _ = batch
        images = images.cuda()
        labels = labels.long().cuda()

        with Timer("Elapsed time in update: %f"):
            loss_seg = Trainer.gen_update(images, labels, i_iter)
            loss_seg_value += loss_seg.item()

        if args.tensorboard:
            scalar_info = {
                'loss_seg': loss_seg_value
            }

            if i_iter % 100 == 0:
                for key, val in scalar_info.items():
                    writer.add_scalar(key, val, i_iter)

        print('\033[1m iter = %8d/%8d \033[0m loss_seg = %.3f' %(i_iter, args.num_steps, loss_seg_value))

        del loss_seg

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            mIoUs, _ = evaluate(args, args.gt_dir, args.gt_list, args.result_dir, Trainer.G)
            writer.add_scalar('mIOU', round(np.nanmean(mIoUs)*100, 2), int(i_iter/args.save_pred_every)) # (TB)
            if round(np.nanmean(mIoUs) * 100, 2) > best_mIoUs:
                print('save model ...')
                best_mIoUs = round(np.nanmean(mIoUs) * 100, 2)
                torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'supervised_seg_' + str(i_iter) + '.pth'))

    if args.tensorboard:
        writer.close()
Пример #26
0
def train_net(args):
    # Configure models and load data
    avg_best, cnt, acc = 0.0, 0, 0.0
    train, dev, test, test_special, lang, SLOTS_LIST, gating_dict, max_word = prepare_data_seq(
        True, False, batch_size=args.batch_size)

    # Load model
    model = TRADE(hidden_size=args.hidden_size,
                  lang=lang,
                  lr=args.lr,
                  dropout=args.dropout,
                  slots=SLOTS_LIST,
                  gating_dict=gating_dict).to(args.device)

    # Configure criterion, optimizer, scheduler
    criterion = masked_cross_entropy_for_value
    criterion_gate = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                               mode='max',
                                               factor=0.5,
                                               patience=1,
                                               min_lr=0.0001,
                                               verbose=True)

    # print("[Info] Slots include ", SLOTS_LIST)
    # print("[Info] Unpointable Slots include ", gating_dict)

    for epoch in range(200):
        model.train()
        print("Epoch:{}".format(epoch))
        # Run the train function
        pbar = tqdm(enumerate(train), total=len(train))
        for i, data in pbar:
            optimizer.zero_grad()

            # Encode and Decode
            use_teacher_forcing = random.random() < args.teacher_forcing_ratio
            all_point_outputs, gates, words_point_out, words_class_out = model(
                data, use_teacher_forcing, SLOTS_LIST[1])

            loss_ptr = criterion(
                all_point_outputs.transpose(0, 1).contiguous(),
                data["generate_y"].contiguous(
                ),  # [:,:len(self.point_slots)].contiguous(),
                data["y_lengths"])  # [:,:len(self.point_slots)])
            loss_gate = criterion_gate(
                gates.transpose(0, 1).contiguous().view(-1, gates.size(-1)),
                data["gating_label"].contiguous().view(-1))

            loss = loss_ptr + loss_gate
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), args.clip)
            optimizer.step()
            pbar.set_description(print_loss(loss_ptr, loss_gate))

        if (epoch + 1) % int(args.eval_period) == 0:

            acc = evaluate(model, dev, avg_best, SLOTS_LIST[2], gating_dict)
            scheduler.step(acc)

            if acc >= avg_best:
                avg_best = acc
                cnt = 0
                save_model(model)
            else:
                cnt += 1

            if cnt == args.patience or acc == 1.0:
                print("Ran out of patient, early stop...")
                break
Пример #27
0
def main():
    args = parse_args()

    working_dir_name = get_working_dir_name(args.results_root, args)
    working_dir = os.path.join(args.results_root, working_dir_name)
    check_dir(working_dir)
    print(f'Working Dir : {working_dir}')
    make_results_folders(working_dir) # 'weights' / 'test_img'

    # update args 
    args.working_dir = working_dir
    args.weights_dir = os.path.join(args.working_dir, 'weights')
    args.test_img_dir = os.path.join(args.working_dir, 'test_img')

    # init writer for tensorboard 
    writer = SummaryWriter(working_dir)
    print(f'Tensorboard info will be saved in \'{working_dir}\'')

    # save args in run folder 
    with open(os.path.join(working_dir, 'args.txt'), 'w') as f: 
        json.dump(args.__dict__, f, indent=4)

    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(args)

    # Dataset 
    if args.dataset == 'sixray':
        train_dataset = sixrayDataset('../SIXray', mode='train')
        eval_dataset = sixrayDataset('../SIXray', mode='eval')
    elif args.dataset ==  'coco' :
        train_dataset = cocoDataset('../coco', mode='train')
        eval_dataset = cocoDataset('../coco', mode='eval')
    else : 
        raise RuntimeError('Invalide dataset type')


    # Dataloader 
    train_loader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, 
                            shuffle=True, num_workers=0, 
                            collate_fn=collate_sixray)

    eval_loader = DataLoader(dataset=eval_dataset, batch_size=1, 
                            shuffle=False, num_workers=0, 
                            collate_fn=collate_sixray)

    # Models 
    if args.model == 'res34': 
        backbone = torchvision.models.resnet34(pretrained=True) # res 34
        backbone = get_resent_features(backbone)
        out_ch = 512 # resnet18,34 : 512 
    elif args.model == 'res50':
        backbone = torchvision.models.resnet50(pretrained=True) # res 50 
        backbone = get_resent_features(backbone)
        out_ch = 2048 # resnet50~152 : 2048
    elif args.model == 'res34AAA':
        backbone = AAA('res34', True, args)
        out_ch = 512 # resnet18,34 : 512 
    else : 
        assert()

    # Anchor size : ((size, size*2, size*4, size*8, size*16), )
    anchor_size = (tuple(int(args.anchor_init_size * math.pow(2, i)) for i in range(5)), )

    backbone.out_channels = out_ch

    anchor_generator = AnchorGenerator(sizes=anchor_size,
                                        aspect_ratios=((0.5, 1.0, 2.0),))
    
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], 
                                                    output_size=7, 
                                                    sampling_ratio=2)
    model = FasterRCNN(backbone=backbone, 
                    num_classes=7, # 6 class + 1 background 
                    rpn_anchor_generator=anchor_generator,
                    box_roi_pool=roi_pooler,
                    min_size=args.img_min_size, 
                    max_size=args.img_max_size).to(args.device)

    # if args.model == 'res50fpn':
    #     model = fasterrcnn_resnet50_fpn(pretrained=True).to(args.device)
    #     model.rpn.anchor_generator.sizes = ((8,), (16,), (32,), (64,), (128,))

    # Optimizer 
    optimizer = optim.SGD(model.parameters(), 
                          lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    schedular = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[90, 120], gamma=0.1)
    
    # train 
    global_step = 0
    accuracies = {}
    for epoch in range(1, args.epochs+1):
        progress = tqdm.tqdm(train_loader)
        for images, targets, _ in progress:
            model.train() 
            
            images = list(image.to(args.device) for image in images)
            targets = [{k: v.to(args.device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            loss_cls = loss_dict['loss_classifier'].item()
            loss_reg = loss_dict['loss_box_reg'].item()
            loss_obj = loss_dict['loss_objectness'].item()
            loss_rpn_reg = loss_dict['loss_rpn_box_reg'].item()

            progress.set_description(f'Train {epoch} / {args.epochs}, lr : {optimizer.param_groups[0]["lr"]} ' +
                                    f'Loss : [TT]{losses:.3f}, [HC]{loss_cls:.3f}, [HR]{loss_reg:.3f}, ' +
                                    f'[RO]{loss_obj:.3f}, [RR]{loss_rpn_reg:.3f} ')
            
        if epoch % args.save_epoch == 0 : 
            torch.save(model.state_dict(), 
                       os.path.join(args.weights_dir, f'{args.model}_{epoch}.ckpt'))
                       
        if epoch % args.eval_epoch == 0 : 
            accuracies = evaluate(model, eval_loader, args, epoch, accs=accuracies, update_acc=True)
            if args.test_img_name == '': 
                image_path = os.path.join(args.test_img_folder, 
                                          random.sample(os.listdir(args.test_img_folder), 1)[0])
                args.test_img_name = image_path
            test(model, image_path, args, epoch)
        
        ## Tensor Board 
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step)
        writer.add_scalar('loss_classifier', loss_cls, global_step)
        writer.add_scalar('loss_box_reg', loss_reg, global_step)
        writer.add_scalar('loss_objectness', loss_obj, global_step)
        writer.add_scalar('loss_rpn_box_reg', loss_rpn_reg, global_step)
        global_step += 1

        schedular.step()

    ## Evaluate rankings 
    accuracies = sorted(accuracies.items(), key=lambda x: x[1], reverse=True)
    print('##### TOP 3 models by iou 0.5 value #####')
    for i in range(3) : 
        print(f'TOP {i+1} : epoch {accuracies[i][0]}, accuracy {accuracies[i][1]}')
Пример #28
0
def gbdt_solver(train_x, train_y, validation_x, test_x, filepath, validation_y = np.array([]), feature_names = [], validation_artist_id=None, validation_month=None, validation_label_day=None, transform_type=0, validation_song_id=None) :
    """
    """
    logging.info('start training the gbdt model')

    """
    col_last_month_plays=None
    for i in xrange(len(feature_names)):
        if feature_names[i]=='last_month_plays':
            col_last_month_plays=i
    assert col_last_month_plays is not None, 'No feature last_month_plays found!'

    train_last_month_plays=train_x[:, col_last_month_plays]
    validation_last_month_plays=validation_x[:, col_last_month_plays]
    test_last_month_plays=test_x[:, col_last_month_plays]

    
    train_y_ratio=(train_y-train_last_month_plays)*1.0/train_last_month_plays
    validation_y_ratio=(validation_y-validation_last_month_plays)*1.0/validation_last_month_plays

    #Transform predict

    tmp_train_y = train_y
    tmp_validation_y = validation_y

    train_y = Transform(train_y, transform_type, train_last_month_plays)
    validation_y = Transform(validation_y, transform_type, validation_last_month_plays)
    """
 
    params = {
        'n_estimators': 0,
        'learning_rate': 0.03,
        'random_state': 1000000007,
        'max_depth': 3,
        'verbose' : 2,
        'warm_start': True
    }


    max_num_round = 100 
    batch = 10
    best_val = -1e60
    history_validation_val = []
    best_num_round = -1
    curr_round = 0

    assert max_num_round % batch == 0
    gb = GradientBoostingRegressor(**params)
    for step in xrange(max_num_round / batch) :
        train_x = train_x.copy(order='C')
        train_y = train_y.copy(order='C')
        gb.n_estimators += batch
        logging.info('current round is: %d' % curr_round)
        #gb.set_params(**params)
        gb.fit(train_x, train_y)
        curr_round += batch
        predict = gb.predict(validation_x)
        #detransform to plays
        predict=Convert2Plays(predict, transform_type)
        predict = HandlePredict(predict.tolist(), validation_song_id)
        curr_val = evaluate.evaluate(predict, validation_y.tolist(), validation_artist_id, validation_month, validation_label_day)
        history_validation_val.append(curr_val)
        logging.info('the current score is %.10f' % curr_val)
        if curr_round >= 100 and curr_val > best_val:
            best_num_round = curr_round
            best_val = curr_val
            joblib.dump(gb, filepath + '/model/gbdt.pkl')

    logging.info('the best round is %d, the score is %.10f' % (best_num_round, best_val))
    gb = joblib.load(filepath + '/model/gbdt.pkl')
    predict = gb.predict(validation_x)
    #detransform to plays
    predict=Convert2Plays(predict, transform_type)

    with open(filepath + '/parameters.param', 'w') as out :
        for key, val in params.items():
            out.write(str(key) + ': ' + str(val) + '\n')
            out.write('max_num_round: '+str(max_num_round)+'\n')
            out.write('best_num_round: '+str(best_num_round)+'\n')
            out.write('transform_type: '+str(transform_type)+'\n')

    # unable to use matplotlib if used multiprocessing
    if validation_y.shape[0] and False :
        logging.info('the loss in Training set is %.4f' % loss_function(train_y, gb.predict(train_x)))
        logging.info('the loss in Validation set is %.4f' % loss_function(validation_y, gb.predict(validation_x)))

        plt.figure(figsize=(12, 6))
        # Plot feature importance
        plt.subplot(1, 2, 1)
        if (feature_names) == 0:
            feature_names = [str(i + 1) for i in xrange(validation_x.shape[0])]
        feature_names = np.array(feature_names)
        feature_importance = gb.feature_importances_
        feature_importance = 100.0 * (feature_importance / feature_importance.max())
        sorted_idx = np.argsort(feature_importance)
        pos = np.arange(sorted_idx.shape[0]) + .5
        plt.barh(pos, feature_importance[sorted_idx], align='center')
        plt.yticks(pos, feature_names[sorted_idx])
        plt.xlabel('Relative Importance')
        plt.title('Variable Importance')


        # Plot training deviance
        plt.subplot(1, 2, 2)
        test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
        for i, y_pred in enumerate(gb.staged_predict(validation_x)):
            test_score[i] = loss_function(validation_y, y_pred)
        plt.title('Deviance')
        plt.plot(np.arange(params['n_estimators']) + 1, gb.train_score_, 'b-',
                          label='Training Set Deviance')
        plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
                          label='Test Set Deviance')
        plt.legend(loc='upper right')
        plt.xlabel('Boosting Iterations')
        plt.ylabel('Deviance')

        plt.savefig(filepath + '/statistics.jpg')

    return predict, Transform(gb.predict(test_x), transform_type)
Пример #29
0
import json
import numpy as np

from utils.evaluate import evaluate
from utils.helper import JsonEncoder

res_dir = 'run'
models = ('dcrnn', 'fclstm', 'gwnet', 'stgcn')

unified_results = {'speed': dict(), 'available': dict()}

for model in models:
    results = np.load(os.path.join(res_dir, model, 'test-results.npz'))
    predictions, targets = results['predictions'], results['targets']

    unified_results['speed'].update({model: evaluate(predictions[:, :, 0], targets[:, :, 0])})
    unified_results['available'].update({model: evaluate(predictions[:, :, 1], targets[:, :, 1])})

ha_res = json.load(open('run/ha_results.json'))
unified_results['speed'].update({'ha': ha_res['speed']})
unified_results['available'].update({'ha': ha_res['available']})

var_res = json.load(open('run/var_results.json'))
unified_results['speed'].update({'var': var_res['speed']})
unified_results['available'].update({'var': var_res['available']})

speed_results = np.load(os.path.join(res_dir, 'ours_speed', 'test-results.npz'))
avail_results = np.load(os.path.join(res_dir, 'ours_avail', 'test-results.npz'))
unified_results['speed'].update({'sp': evaluate(speed_results['predictions'], speed_results['targets'])})
unified_results['available'].update({'ap': evaluate(avail_results['predictions'], avail_results['targets'])})
def predict(ACTIVATION='ReLU',
            dropout=0.2,
            minimum_kernel=32,
            epochs=50,
            crop_size=64,
            stride_size=3,
            DATASET='DRIVE'):

    print('-' * 40)
    print('Loading and preprocessing test data...')
    print('-' * 40)

    network_name = "Res-unet"
    model_name = f"{network_name}_cropsize_{crop_size}_epochs_{epochs}"

    prepare_dataset.prepareDataset(DATASET)
    test_data = [
        prepare_dataset.getTestData(0, DATASET),
        prepare_dataset.getTestData(1, DATASET),
        prepare_dataset.getTestData(2, DATASET)
    ]
    IMAGE_SIZE = None
    if DATASET == 'DRIVE':
        IMAGE_SIZE = (565, 584)

    gt_list_out = {}
    pred_list_out = {}
    try:
        os.makedirs(f"./output/{model_name}/crop_size_{crop_size}/out/",
                    exist_ok=True)
        gt_list_out.update({f"out": []})
        pred_list_out.update({f"out": []})
    except:
        pass

    print('-' * 30)
    print('Loading saved weights...')
    print('-' * 30)

    activation = globals()[ACTIVATION]
    model = get_res_unet(minimum_kernel=minimum_kernel,
                         do=dropout,
                         size=crop_size,
                         activation=activation)
    print("Model : %s" % model_name)
    load_path = f"./trained_model/{model_name}/{model_name}.hdf5"
    model.load_weights(load_path, by_name=False)

    imgs = test_data[0]
    segs = test_data[1]
    masks = test_data[2]

    print('-' * 30)
    print('Predicting masks on test data...')
    print('-' * 30)
    print('\n')

    for i in tqdm(range(len(imgs))):

        img = imgs[i]  # (576,576,3)
        seg = segs[i]  # (576,576,1)
        mask = masks[i]  # (584,565,1)

        patches_pred, new_height, new_width, adjustImg = crop_prediction.get_test_patches(
            img, crop_size, stride_size)
        pred = model.predict(patches_pred)  # 预测数据

        pred_patches = crop_prediction.pred_to_patches(pred, crop_size,
                                                       stride_size)
        pred_imgs = crop_prediction.recompone_overlap(pred_patches, crop_size,
                                                      stride_size, new_height,
                                                      new_width)
        pred_imgs = pred_imgs[:, 0:prepare_dataset.DESIRED_DATA_SHAPE[0],
                              0:prepare_dataset.DESIRED_DATA_SHAPE[0], :]
        probResult = pred_imgs[0, :, :, 0]  # (576,576)
        pred_ = probResult
        with open(
                f"./output/{model_name}/crop_size_{crop_size}/out/{i + 1:02}.pickle",
                'wb') as handle:
            pickle.dump(pred_, handle, protocol=pickle.HIGHEST_PROTOCOL)
        pred_ = resize(pred_, IMAGE_SIZE[::-1])  # (584,565)
        mask_ = mask
        mask_ = resize(mask_, IMAGE_SIZE[::-1])  # (584,565)
        seg_ = seg
        seg_ = resize(seg_, IMAGE_SIZE[::-1])  # (584,565)
        gt_ = (seg_ > 0.5).astype(int)
        gt_flat = []
        pred_flat = []
        for p in range(pred_.shape[0]):
            for q in range(pred_.shape[1]):
                if mask_[p, q] > 0.5:  # Inside the mask pixels only
                    gt_flat.append(gt_[p, q])
                    pred_flat.append(pred_[p, q])

        gt_list_out[f"out"] += gt_flat
        pred_list_out[f"out"] += pred_flat

        pred_ = 255. * (pred_ - np.min(pred_)) / (np.max(pred_) -
                                                  np.min(pred_))
        cv2.imwrite(
            f"./output/{model_name}/crop_size_{crop_size}/out/{i + 1:02}.png",
            pred_)

    print('-' * 30)
    print('Prediction finished')
    print('-' * 30)
    print('\n')

    print('-' * 30)
    print('Evaluate the results')
    print('-' * 30)

    evaluate(gt_list_out[f"out"], pred_list_out[f"out"], epochs, crop_size,
             DATASET, network_name)

    print('-' * 30)
    print('Evaluate finished')
    print('-' * 30)
Пример #31
0
from config import *
import torch
import os

from utils.dataloader import get_data_iterator
from utils.evaluate import evaluate

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

if __name__ == '__main__':
    device = torch.device('cuda', gpuid)

    # get data_iterator of train/valid/test
    print('start getting data_iterator of train/valid/test...')
    TEXT = torch.load(os.path.join(data_path, text_field))
    LABEL = torch.load(os.path.join(data_path, label_field))
    train_iterator, valid_iterator, test_iterator = get_data_iterator(
        device, TEXT, LABEL, batch_size, data_path, train_file, valid_file,
        test_file)
    print('=================== success ===================')

    print('start loading model...')
    model = torch.load(os.path.join('saved_models', f'model_{mark}.pkl'))
    criterion = nn.CrossEntropyLoss().to(device)
    print('=================== success ===================')

    print('start testing...')
    test_loss, test_acc = evaluate(model, test_iterator, criterion)
    print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc * 100:.2f}%')