def plot_time_boxplot(dataset_id):
    # data 1st level = technique, 2nd = list of revisions, 3rd = list of observations
    data = []
    for i, technique_id in enumerate(technique_list):
        technique_results = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history) - 1):
            un_mov = Metrics.compute_unavoidable_movement(
                history[revision], history[revision + 1])
            delta_vis = Metrics.compute_delta_vis(history[revision],
                                                  history[revision + 1])

            diff = 1 - (delta_vis - un_mov)
            technique_results.append(diff)

        data.append(technique_results)

    TimeBoxplot.plot(data,
                     technique_list,
                     title="Unavoidable Movement - " + dataset_id)

    TimeBoxplot.plot(data,
                     technique_list,
                     median_sorted=True,
                     title="Unavoidable Movement - " + dataset_id)
Пример #2
0
    def do_eval(self, phase):
        eval_batch_gen = self.pipe.batch_gen(phase=phase)
        eval_size, eval_n_acc = 0, 0.0
        y_list = []
        y_list_ = []
        #eval_step=0
        with torch.no_grad():
            for eval_batch_dict in eval_batch_gen:
                # eval_step+=1
                # if eval_step>3:
                #     break
                eval_result = self.model(eval_batch_dict, 0, phase)
                eval_batch_y, eval_batch_y_ = eval_result["y"], eval_result[
                    "y_"]
                eval_batch_n_acc = metrics.n_accurate(eval_batch_y,
                                                      eval_batch_y_)
                eval_n_acc += eval_batch_n_acc
                eval_size += float(eval_batch_dict["batch_size"])
                y_list.extend(eval_batch_y)
                y_list_.extend(eval_batch_y_)

        y_list = torch.stack(y_list, 0).detach().cpu().numpy()
        y_list_ = torch.stack(y_list_, 0).detach().cpu().numpy()
        acc = metrics.eval_acc(eval_n_acc, eval_size)
        tp, fp, tn, fn = metrics.create_confusion_matrix(y_list, y_list_, True)
        self.train_logger.info(
            "eval tp = {}, fp = {}, tn = {}, fn = {}".format(tp, fp, tn, fn))
        mcc = metrics.eval_mcc(tp, fp, tn, fn)
        res = {"acc": acc, "mcc": mcc, "size": eval_size}
        return res
Пример #3
0
def run_clustering_city(filepath, filename, k, eps, latitude, longitude):
    """
    The function clusters data for a given city and draws the result obtained on the map.
    :param filepath: path of file .csv
    :param filename: name of file .csv
    :param k: the value of k
    :param eps: the value of eps
    :param latitude: latitude of city
    :param longitude: longitude of city
    :return: None
    """
    d = Cluster.ClusterGreatCircles(filepath, filename)
    for k in [7]:
        for eps in [50]:
            c = Clustering.K_MXTGreatCircle(eps, k, d)
            c()
            m = Metrics.Modularity(c)
            print(f'k-MXT k={k} eps={eps} Modularity={m()}')
            c.cluster.view_at_map(latitude=latitude,
                                  longitude=longitude,
                                  filename_of_map=f'{k}-MXT-eps{eps}')
            c = Clustering.K_MXTGaussGreatCircle(eps, k, d)
            c()
            c.cluster.view_at_map(latitude=latitude,
                                  longitude=longitude,
                                  filename_of_map=f'{k}-MXTGauss-eps{eps}')
            m = Metrics.Modularity(c)
            print(f'k-MXT-Gauss k = {k} eps = {eps} Modularity = {m()}')
Пример #4
0
def plot_mean_boxplot(dataset_id):
    data = []
    for i, technique_id in enumerate(technique_list):
        print(Globals.acronyms[technique_id], end=' ', flush=True)
        technique_data = []
        history = Parser.parse_rectangles(technique_id, dataset_id)
        for revision in range(len(history) - 1):
            delta_vis = Metrics.compute_delta_vis(history[revision],
                                                  history[revision + 1])
            delta_data = Metrics.compute_delta_data(history[revision],
                                                    history[revision + 1])
            un_mov = Metrics.compute_unavoidable_movement(
                history[revision], history[revision + 1])

            ratios = (1 - delta_vis) / (1 - delta_data)
            diffs = 1 - abs(delta_vis - delta_data)
            unavoidable = 1 - (delta_vis - un_mov)

            mean = (ratios + diffs + unavoidable) / 3
            technique_data.append(mean)
        data.append(technique_data)

    TimeBoxplot.plot(data, technique_list, title='Mean - ' + dataset_id)

    TimeBoxplot.plot(data,
                     technique_list,
                     median_sorted=True,
                     title='Mean - ' + dataset_id)
Пример #5
0
    def evaluate_sentences(self, test_data, test_relations):

        gold = []
        detects = []
        for i, data in enumerate(test_data):
            if self.use_dependency_features:
                _, ne, _, _ = data
            else:
                _, ne, _ = data

            #Get all combinations of named entities:
            ne_combinations = map(list, itertools.product(ne, repeat=2))

            disc = [
                get_match(n[0], n[1], test_relations[i])
                for n in ne_combinations
            ]

            gold.extend(disc)
            detects.append([g != 0 for g in disc])

        sentence_pred = self.predict_sentences(test_data, detects)
        pred = list(itertools.chain(*sentence_pred))

        return Metrics.precision(pred, gold,
                                 2), Metrics.recall(pred, gold, 2), Metrics.f1(
                                     pred, gold, 2)
Пример #6
0
def process_tweet_credibility_vectors(rows):
    tweets = Metrics.groupby_element(rows, 1, 2, 5)
    score_vectors = get_credibility_scores(tweets)
    score_matrix = get_score_matrix(score_vectors)
    scores_to_csv('Output/scores_t2.csv', score_matrix)
    tweet_vectors = [(tweet, Metrics.get_group_vector(tweet_matrix))
                     for tweet, tweet_matrix in tweets]
    return append_rows_with_cosine(rows, 1, tweet_vectors, 2, 5)
Пример #7
0
    def _loss(y_true, y_pred):
        embeds_ao = []
        _len = 0
        for i in range(len(e_len)):
            embed_a = y_pred[:, _len:(_len+e_len[i])]
            embed_o = y_pred[:, (_len+e_len[i]):(_len+(e_len[i]*2))]
            embeds_ao.append((embed_a, embed_o))
            _len += e_len[i]*2

        output_a = y_pred[:, _len:(_len+n_cls)]
        output_o = y_pred[:, (_len+n_cls):(_len+(n_cls*2))]

        true_a = y_true[:, :n_cls]
        true_o = y_true[:, n_cls:(n_cls*2)]

        def __loss(anc, oth):

            # _dist_l2 = Metrics.squared_l2_distance(anc, oth)

            """
            Symmetrised Kullback and Leibler
            Kullback, S.; Leibler, R.A. (1951).
            "On information and sufficiency".
            Annals of Mathematical Statistics. 22 (1): 79–86.
            doi:10.1214/aoms/1177729694. MR 0039968.
            """
            # _dist_kl = Metrics.kullback_leibler(anc, oth) +\
            #     Metrics.kullback_leibler(oth, anc)

            """
            Squared Jensen-Shannon distance
            Endres, D. M.; J. E. Schindelin (2003).
            "A new metric for probability distributions".
            IEEE Trans. Inf. Theory. 49 (7): 1858–1860.
            doi:10.1109/TIT.2003.813506.
            """
            _dist_js = K.sqrt(Metrics.jensen_shannon(anc, oth))

            """
            Squared Hellinger distance
            Nikulin, M.S.
            (2001) [1994], "Hellinger distance"
            in Hazewinkel, Michiel, Encyclopedia of Mathematics, Springer Science+Business Media B.V.
            Kluwer Academic Publishers, ISBN 978-1-55608-010-4
            """
            # _dist_hl = Metrics.squared_hellinger(anc, oth)

            _loss = \
                -K.tanh(_dist_js)*K.log(K.maximum(K.tanh(_dist_js), K.epsilon()))
            return _loss

        loss = 0
        for i in range(len(e_len)):
            loss += __loss(*embeds_ao[i])
        loss += \
            Metrics.cross_entropy(true_a, output_a) +\
            Metrics.cross_entropy(true_o, output_o)
        return loss
Пример #8
0
    def _loss(y_true, y_pred):
        output_a = y_pred[:, :(e_len)]
        output_p = y_pred[:, (e_len):(e_len*2)]
        output_n = y_pred[:, (e_len*2):(e_len*3)]

        loss = \
            K.sqrt(Metrics.jensen_shannon(output_a, output_p)) +\
            -K.log(K.tanh(K.sqrt(Metrics.jensen_shannon(output_a, output_n))))
        return loss
Пример #9
0
def pearson_matrix(dataset_ids):
    matrix = []
    for dataset_id in dataset_ids:
        dataset_values = []
        for technique_id in technique_list:
            # print(Globals.acronyms[technique_id], dataset_id)
            history = Parser.parse_rectangles(technique_id, dataset_id)
            # Compute all delta_vis and delta_data values for a dataset (1 pair per cell)
            all_delta_data = np.array([])
            all_delta_vis = np.array([])
            for revision in range(len(history) - 1):
                delta_data = Metrics.compute_delta_data(
                    history[revision], history[revision + 1])
                all_delta_data = np.append(all_delta_data, delta_data)

                delta_vis = Metrics.compute_delta_vis(history[revision],
                                                      history[revision + 1])
                all_delta_vis = np.append(all_delta_vis, delta_vis)

            # Compute linear regression statistics
            slope, intercept, r_value, p_value, std_err = stats.linregress(
                all_delta_data, all_delta_vis)

            dataset_values.append(r_value)
            print(Globals.acronyms[technique_id], dataset_id, r_value)
        matrix.append(dataset_values)

    matrix = np.array(matrix).transpose()

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=False,
                    cell_text=True,
                    title='Pearson')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=True,
                    cell_text=True,
                    title='Pearson')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=False,
                    cell_text=False,
                    title='Pearson')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=True,
                    cell_text=False,
                    title='Pearson')
 def evalFitness(self, predictions, expected):
     if self.isClassification:
         # use accuracy as fitness measure
         result = mt.Metrics().confusion_matrix(expected, predictions)
         fitness = result[0]
     else:
         # invert rmse to deal with maximization problem
         result = mt.Metrics().RootMeanSquareError(expected, predictions)
         fitness = -result
     return fitness
Пример #11
0
def CalcMCCF1(pred=None, truth=None, probCutoff=0.5, contactCutoff=8.0):
    if pred is None:
        print 'please provide a predicted contact matrix'
        exit(-1)

    if truth is None:
        print 'please provide a true distance matrix'
        exit(-1)

    assert pred.shape == truth.shape

    ## in case the matrix is not square, e.g., interfacial contact matrix
    seqLen = pred.shape[0]
    seqLen2 = pred.shape[1]

    pred_binary = (pred > probCutoff)
    truth_binary = (0 < truth) & (truth < contactCutoff)
    pred_truth = pred_binary * 2 + truth_binary
    numPredicted = np.sum(pred_binary)
    numTruths = np.sum(truth_binary)
    #print "#predicted=", numPredicted, "#natives=", numTruths

    mask_LR = np.triu_indices(seqLen, 24, m=seqLen2)
    mask_MLR = np.triu_indices(seqLen, 12, m=seqLen2)
    mask_SMLR = np.triu_indices(seqLen, 6, m=seqLen2)

    metrics = []
    for mask in [mask_LR, mask_MLR, mask_SMLR]:

        res = pred_truth[mask]
        total = res.shape[0]
        count = np.bincount(res, minlength=4)
        assert (total == np.sum(count))

        ## pred=0, truth=0
        TN = count[0]

        ## pred=0, truth=1
        FN = count[1]

        ## pred=1, truth=0
        FP = count[2]

        ## pred=1, truth=1
        TP = count[3]

        #print TP, FP, TN, FN

        MCC = Metrics.MCC(TP, FP, TN, FN)
        F1, precision, recall = Metrics.F1(TP, FP, TN, FN)

        metrics.extend([MCC, TP, FP, TN, FN, F1, precision, recall])

    return np.array(metrics)
Пример #12
0
    def _loss(y_true, y_pred):
        embeds_apn = []
        for i in range(len(e_len)):
            _len = i * (e_len[i] * 3)
            embed_a = y_pred[:, _len:(_len + e_len[i])]
            embed_p = y_pred[:, (_len + e_len[i]):(_len + (e_len[i] * 2))]
            embed_n = y_pred[:,
                             (_len + (e_len[i] * 2)):(_len + (e_len[i] * 3))]
            embeds_apn.append((embed_a, embed_p, embed_n))

        out_len = 0
        for i in range(len(e_len)):
            out_len += (e_len[i] * 3)

        output_a = y_pred[:, out_len:(out_len + n_cls)]
        output_p = y_pred[:, (out_len + n_cls):(out_len + (n_cls * 2))]
        output_n = y_pred[:, (out_len + (n_cls * 2)):(out_len + (n_cls * 3))]

        true_a = y_true[:, :n_cls]
        true_p = y_true[:, n_cls:(n_cls * 2)]
        true_n = y_true[:, (n_cls * 2):(n_cls * 3)]

        zero = K.constant(0, dtype=K.floatx())
        one = K.constant(1, dtype=K.floatx())

        def __loss(anc, pos, neg):
            pos_dist_l2 = Metrics.squared_l2_distance(anc, pos)
            neg_dist_l2 = Metrics.squared_l2_distance(anc, neg)

            pos_dist_kl = Metrics.kullback_leibler(anc, pos) +\
                Metrics.kullback_leibler(pos, anc)
            neg_dist_kl = Metrics.kullback_leibler(anc, neg) +\
                Metrics.kullback_leibler(neg, anc)

            _loss = \
                Metrics.entropy(K.tanh(pos_dist_kl)) +\
                Metrics.entropy(K.tanh(neg_dist_kl)) +\
                Metrics.entropy(K.tanh(pos_dist_l2)) +\
                Metrics.entropy(K.tanh(neg_dist_l2)) +\
                Metrics.cross_entropy(zero, K.tanh(pos_dist_kl)) +\
                Metrics.cross_entropy(one, K.tanh(neg_dist_kl)) +\
                Metrics.cross_entropy(zero, K.tanh(pos_dist_l2)) +\
                Metrics.cross_entropy(one, K.tanh(neg_dist_l2))
            return _loss

        loss = 0
        for i in range(len(e_len)):
            loss += __loss(*embeds_apn[i])
        loss += \
            Metrics.cross_entropy(true_a, output_a) +\
            Metrics.cross_entropy(true_p, output_p) +\
            Metrics.cross_entropy(true_n, output_n)
        return loss
Пример #13
0
    def make_prediction(self):
        x, y, pred = self.OpticDiscPrediction()
        self.x = x
        self.y = y
        copy = self.currentImg.copy()
        drawCopy = self.currentImg.copy()
        drawCopy = moil.stackImageChannels(drawCopy)
        w, h, c = moil.getWidthHeightChannels(copy)
        xShift = int(80 * w / 600)
        yShift = int(80 * (w * 0.75) / 450)

        xExitShift = int(40 * w / 600)
        yExitShift = int(40 * (w * 0.75) / 450)
        roi = moil.getRegionOfInterest(copy, x, y, xShift, yShift)
        roiExit = moil.getRegionOfInterest(copy, x, y, xExitShift, yExitShift)
        atrophyRate, atrophyMap = self.AtrophyPrediction(roi)
        self.atrophyRate = atrophyRate
        self.label.configure(text="Stopień zaniku (tylko faza tętniczo-żylna): " + str(atrophyRate))

        xExit, yExit = self.ExitPrediction(roiExit, xExitShift, yExitShift, x, y)
        self.xOut = xExit
        self.yOut = yExit
        dist = np.linalg.norm(
            np.asarray([xExit / w * 600, yExit / (w * 0.75) * 450]) - np.asarray([x / w * 600, y / (w * 0.75) * 450]))
        if dist > 16:
            self.labelExit.configure(
                text='Przesunięcie naczyń (faza tętniczo-żylna lub późna) : ' + str(dist) + ', ZNACZNE!')
        else:
            self.labelExit.configure(
                text='Przesunięcie naczyń (faza tętniczo-żylna lub późna) : ' + str(dist))
        wA, hA, cA = moil.getWidthHeightChannels(atrophyMap)

        mask = np.zeros((h, w), drawCopy.dtype)
        mask = moil.addToRegionOfInterest(mask, x, y, round(wA / 2 + 0.00001), round(hA / 2 + 0.00001), atrophyMap)

        # mask[y-round(hA/2+0.00001):y+round(hA/2+0.00001), x-round(wA/2+0.00001):x+round(wA/2+0.00001)] = atrophyMap
        redImg = np.zeros(drawCopy.shape, drawCopy.dtype)
        redImg[:, :] = (255, 0, 0)
        redMask = cv2.bitwise_and(redImg, redImg, mask=mask)
        drawCopy = cv2.addWeighted(redMask, 1, drawCopy, 1, 0)

        # moil.show(atrophyMap)
        # drawCopy[mask] = (255, 0, 0)
        cv2.rectangle(drawCopy, (x - xShift, y - yShift), (x + xShift, y + yShift), (127, 0, 127), int(5 / 1387 * w))
        cv2.circle(drawCopy, (x, y), int(12 / 1387 * w), (127, 0, 127), thickness=int(5 / 1387 * w))

        met.draw(pred, drawCopy, thickness=int(4 / 1387 * w))
        cv2.circle(drawCopy, (xExit, yExit), int(12 / 1387 * w), (0, 127, 0), thickness=int(5 / 1387 * w))
        self.updateGuiImage(drawCopy)
        self.predicted = True
Пример #14
0
def unavoidable_matrix(dataset_ids):
    matrix = []
    for dataset_id in dataset_ids:
        dataset_values = []
        for technique_id in technique_list:
            history = Parser.parse_rectangles(technique_id, dataset_id)
            all_unavoidable = np.array([])
            for revision in range(len(history) - 1):
                un_mov = Metrics.compute_unavoidable_movement(
                    history[revision], history[revision + 1])
                delta_vis = Metrics.compute_delta_vis(history[revision],
                                                      history[revision + 1])

                diff = 1 - (delta_vis - un_mov)
                all_unavoidable = np.append(all_unavoidable, diff.values)

            dataset_values.append(all_unavoidable.mean())
            print(Globals.acronyms[technique_id], dataset_id,
                  all_unavoidable.mean())
        matrix.append(dataset_values)

    matrix = np.array(matrix).transpose()

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=False,
                    cell_text=True,
                    title='Unavoidable')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=True,
                    cell_text=True,
                    title='Unavoidable')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=False,
                    cell_text=False,
                    title='Unavoidable')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=True,
                    cell_text=False,
                    title='Unavoidable')
Пример #15
0
def testModel(name, mode, XS, YS, YS_multi):
    print('Model Testing Started ...', time.ctime())
    print('TIMESTEP_IN, TIMESTEP_OUT', TIMESTEP_IN, TIMESTEP_OUT)
    XS_torch, YS_torch = torch.Tensor(XS).to(device), torch.Tensor(YS).to(
        device)
    test_data = torch.utils.data.TensorDataset(XS_torch, YS_torch)
    test_iter = torch.utils.data.DataLoader(test_data,
                                            BATCHSIZE,
                                            shuffle=False)
    model = getModel(name)
    model.load_state_dict(torch.load(PATH + '/' + name + '.pt'))
    criterion = nn.MSELoss()
    torch_score = evaluateModel(model, criterion, test_iter)
    YS_pred_multi = predictModel_multi(model, test_iter)
    print('YS_multi.shape, YS_pred_multi.shape,', YS_multi.shape,
          YS_pred_multi.shape)
    YS_multi, YS_pred_multi = np.squeeze(YS_multi), np.squeeze(YS_pred_multi)
    for i in range(YS_multi.shape[1]):
        YS_multi[:, i, :] = scaler.inverse_transform(YS_multi[:, i, :])
        YS_pred_multi[:, i, :] = scaler.inverse_transform(YS_pred_multi[:,
                                                                        i, :])
    print('YS_multi.shape, YS_pred_multi.shape,', YS_multi.shape,
          YS_pred_multi.shape)
    np.save(PATH + '/' + MODELNAME + '_prediction.npy', YS_pred_multi)
    np.save(PATH + '/' + MODELNAME + '_groundtruth.npy', YS_multi)
    MSE, RMSE, MAE, MAPE = Metrics.evaluate(YS_multi, YS_pred_multi)
    print('*' * 40)
    print("%s, %s, Torch MSE, %.10e, %.10f\n" %
          (name, mode, torch_score, torch_score))
    f = open(PATH + '/' + name + '_prediction_scores.txt', 'a')
    f.write("%s, %s, Torch MSE, %.10e, %.10f\n" %
            (name, mode, torch_score, torch_score))
    print(
        "all pred steps, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n"
        % (name, mode, MSE, RMSE, MAE, MAPE))
    f.write(
        "all pred steps, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n"
        % (name, mode, MSE, RMSE, MAE, MAPE))
    for i in [2, 5, 11]:
        MSE, RMSE, MAE, MAPE = Metrics.evaluate(YS_multi[:, i, :],
                                                YS_pred_multi[:, i, :])
        print(
            "%d step, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n"
            % (i, name, mode, MSE, RMSE, MAE, MAPE))
        f.write(
            "%d step, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n"
            % (i, name, mode, MSE, RMSE, MAE, MAPE))
    f.close()
    print('Model Testing Ended ...', time.ctime())
Пример #16
0
def testModel(name, mode, XS, YS):
    if LOSS == "GraphWaveNetLoss":
        criterion = Metrics.masked_mae
    if LOSS == 'MSE':
        criterion = nn.MSELoss()
    if LOSS == 'MAE':
        criterion = nn.L1Loss()
    print('Model Testing Started ...', time.ctime())
    print('TIMESTEP_IN, TIMESTEP_OUT', TIMESTEP_IN, TIMESTEP_OUT)
    XS_torch, YS_torch = torch.Tensor(XS).to(device), torch.Tensor(YS).to(
        device)
    test_data = torch.utils.data.TensorDataset(XS_torch, YS_torch)
    test_iter = torch.utils.data.DataLoader(test_data,
                                            BATCHSIZE,
                                            shuffle=False)
    model = getModel(name)
    model.load_state_dict(torch.load(PATH + '/' + name + '.pt'))

    torch_score = evaluateModel(model, criterion, test_iter)
    YS_pred = predictModel(model, test_iter)
    print('YS.shape, YS_pred.shape,', YS.shape, YS_pred.shape)
    YS, YS_pred = scaler.inverse_transform(
        np.squeeze(YS)), scaler.inverse_transform(np.squeeze(YS_pred))
    print('YS.shape, YS_pred.shape,', YS.shape, YS_pred.shape)
    np.save(PATH + '/' + MODELNAME + '_prediction.npy', YS_pred)
    np.save(PATH + '/' + MODELNAME + '_groundtruth.npy', YS)
    MSE, RMSE, MAE, MAPE = Metrics.evaluate(YS, YS_pred)
    print('*' * 40)
    print("%s, %s, Torch MSE, %.10e, %.10f" %
          (name, mode, torch_score, torch_score))
    f = open(PATH + '/' + name + '_prediction_scores.txt', 'a')
    f.write("%s, %s, Torch MSE, %.10e, %.10f\n" %
            (name, mode, torch_score, torch_score))
    print(
        "all pred steps, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f"
        % (name, mode, MSE, RMSE, MAE, MAPE))
    f.write(
        "all pred steps, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n"
        % (name, mode, MSE, RMSE, MAE, MAPE))
    for i in range(TIMESTEP_OUT):
        MSE, RMSE, MAE, MAPE = Metrics.evaluate(YS[:, i, :], YS_pred[:, i, :])
        print(
            "%d step, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f"
            % (i + 1, name, mode, MSE, RMSE, MAE, MAPE))
        f.write(
            "%d step, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n"
            % (i + 1, name, mode, MSE, RMSE, MAE, MAPE))
    f.close()
    print('Model Testing Ended ...', time.ctime())
Пример #17
0
def delta_ratio_matrix(dataset_ids):
    matrix = []
    for dataset_id in dataset_ids:
        dataset_values = []
        for technique_id in technique_list:
            history = Parser.parse_rectangles(technique_id, dataset_id)
            all_ratios = np.array([])
            for revision in range(len(history) - 1):
                delta_vis = Metrics.compute_delta_vis(history[revision],
                                                      history[revision + 1])
                delta_data = Metrics.compute_delta_data(
                    history[revision], history[revision + 1])
                ratio = (1 - delta_vis) / (1 - delta_data)
                all_ratios = np.append(all_ratios, ratio.values)

            dataset_values.append(all_ratios.mean())
            print(Globals.acronyms[technique_id], dataset_id,
                  all_ratios.mean())
        matrix.append(dataset_values)

    matrix = np.array(matrix).transpose()

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=False,
                    cell_text=True,
                    title='Delta ratio')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=True,
                    cell_text=True,
                    title='Delta ratio')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=False,
                    cell_text=False,
                    title='Delta ratio')

    MatrixPlot.plot(matrix,
                    dataset_ids,
                    technique_list,
                    shared_cm=True,
                    cell_text=False,
                    title='Delta ratio')
Пример #18
0
def get_score_matrix(score_vectors):
    matrix = []
    for tweet_combi, vector in score_vectors:
        tweet_split = tweet_combi.split('/')
        matrix.append([tweet_split[0], (tweet_split[1], sum(vector))])
    grouped = Metrics.groupby_one_element(matrix, 0, 1)

    table = []
    for group in grouped:
        aggr = Metrics.groupby_one_element(group[1], 0, 1)
        aggr = [(id, sum(counts)) for (id, counts) in aggr]
        aggr.append((group[0], 'x'))
        table.append([group[0], aggr])

    return table
Пример #19
0
def validate(model, device, dataset, batch_size=64):
    batches = len(dataset)
    model.train(False)
    total = 0
    ground_truths = []
    predictions = []
    loss = 0
    criterion = nn.CrossEntropyLoss()
    # dataset.switch_mode(training=False)
    # dataset.update_batchsize(batch_size)
    with torch.no_grad():
        for data in tqdm(dataset):
            #data=dataset[i]
            X = data['data'].to(device).float()
            #X=torch.nn.functional.one_hot(X,num_classes=4)
            Y = data['labels'].to(device).long()
            output = model(X)
            del X
            loss += criterion(output, Y)
            classification_predictions = torch.argmax(output, dim=1).squeeze()
            for pred in classification_predictions:
                predictions.append(pred.cpu().numpy())
            for truth in Y:
                ground_truths.append(truth.cpu().numpy())
            del output
    ground_truths = np.asarray(ground_truths)
    torch.cuda.empty_cache()
    val_loss = (loss / batches).cpu()
    predictions = np.asarray(predictions)
    binary_predictions = predictions.copy()
    binary_predictions[binary_predictions == 2] = 1
    binary_ground_truths = ground_truths.copy()
    binary_ground_truths[binary_ground_truths == 2] = 1
    #print(predictions)
    #print(ground_truths)
    #score=metrics.cohen_kappa_score(ground_truths,predictions,weights='quadratic')
    val_acc = Metrics.accuracy(predictions, ground_truths)
    val_sens = Metrics.sensitivity(predictions, ground_truths)
    val_spec = Metrics.specificity(predictions, ground_truths)
    val_precision = precision_score(predictions, ground_truths)
    val_recall = recall_score(predictions, ground_truths)
    binary_acc = np.sum(
        binary_predictions == binary_ground_truths) / len(binary_ground_truths)
    val_f1 = f1_score(ground_truths, predictions)
    val_mcc = matthews_corrcoef(ground_truths, predictions)
    print('Accuracy: {}, Binary Accuracy: {} Val F1: {} Val Loss: {}'.format(
        val_acc, binary_acc, val_f1, val_loss))
    return val_loss, val_acc, val_precision, val_recall, val_f1, val_mcc
Пример #20
0
def train(train_loader, model, criterion, optimizer, epoch, use_cuda):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()

    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        if use_cuda:
            target = target.cuda()
            input = input.cuda()
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        # s_mAP = Metrics.calculate_mAP(output.data.cpu().numpy(), target_var.data.cpu().numpy())
        # s_mAP = Metrics.meanAP(output.data.cpu().numpy(), target_var.data.cpu().numpy())

        # prec1, prec5 = Metrics.accuracy(output.data, target, topk=(1, 5))
        prec = Metrics.match_accuracy(output.data, target)
        top5.update(prec, input.size(0))
        losses.update(loss.data[0], input.size(0))

        # mAP.update(s_mAP, input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print(
                'Epoch: [{0}][{1}/{2}]\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\tLR {lr:.3f}\t'
                'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                'Prec {mAP.val:.3f} ({mAP.avg:.3f})\t'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    lr=optimizer.param_groups[0]['lr'],
                    loss=losses,
                    mAP=top5))

    return top5.avg, losses.avg
Пример #21
0
    def setupMetric(self, metric_string, dataset):
        currentMetric = None
        if metric_string == 'Constant':
            currentMetric = Metrics.ConstantMetric(dataset, self.rankingSize,
                                                   1.0)
        elif metric_string == 'Revenue':
            currentMetric = Metrics.Revenue(dataset, self.rankingSize)
        else:
            print("Experiment:setupMetric [ERR] Metric ",
                  metric_string,
                  "currently not supported.",
                  flush=True)
            sys.exit(0)

        self.metric = currentMetric
        print("Experiment:setupMetric [INFO] ", metric_string, flush=True)
Пример #22
0
        def __loss(anc, oth):

            # _dist_l2 = Metrics.squared_l2_distance(anc, oth)

            """
            Symmetrised Kullback and Leibler
            Kullback, S.; Leibler, R.A. (1951).
            "On information and sufficiency".
            Annals of Mathematical Statistics. 22 (1): 79–86.
            doi:10.1214/aoms/1177729694. MR 0039968.
            """
            # _dist_kl = Metrics.kullback_leibler(anc, oth) +\
            #     Metrics.kullback_leibler(oth, anc)

            """
            Squared Jensen-Shannon distance
            Endres, D. M.; J. E. Schindelin (2003).
            "A new metric for probability distributions".
            IEEE Trans. Inf. Theory. 49 (7): 1858–1860.
            doi:10.1109/TIT.2003.813506.
            """
            _dist_js = K.sqrt(Metrics.jensen_shannon(anc, oth))

            """
            Squared Hellinger distance
            Nikulin, M.S.
            (2001) [1994], "Hellinger distance"
            in Hazewinkel, Michiel, Encyclopedia of Mathematics, Springer Science+Business Media B.V.
            Kluwer Academic Publishers, ISBN 978-1-55608-010-4
            """
            # _dist_hl = Metrics.squared_hellinger(anc, oth)

            _loss = \
                -K.tanh(_dist_js)*K.log(K.maximum(K.tanh(_dist_js), K.epsilon()))
            return _loss
Пример #23
0
 def predict_doses(self, similarity, data):
     flip_args = Metrics.get_flip_args()
     adjacency = TJaccardModel().get_adjacency_lists(
         data.organ_distances, np.arange(Constants.num_organs))
     normal_distances = data.tumor_distances
     flipped_distances = data.tumor_distances[:, flip_args]
     flipped_doses = data.doses[:, flip_args]
     dose_predictions = np.zeros(
         (data.get_num_patients(), Constants.num_organs))
     for p1 in range(data.get_num_patients()):
         matches = []
         for p2 in range(0, data.get_num_patients()):
             if p1 == p2:
                 continue
             match = self.get_patient_similarity(p1, p2, normal_distances,
                                                 flipped_distances,
                                                 data.doses, flipped_doses,
                                                 adjacency)
             matches.append(match)
         matches = sorted(matches, key=lambda x: -x[0])
         n_matches = self.get_num_matches(p1, matches, data.classes)
         prediction = np.array([x[1] for x in matches[0:n_matches]])
         weights = np.array([x[0]
                             for x in matches[0:n_matches]]).reshape(-1, 1)
         if weights.mean() <= 0:
             print(weights, p1, [x[0] for x in matches])
         dose_predictions[p1, :] = np.mean(prediction * weights,
                                           axis=0) / np.mean(weights)
     return (dose_predictions)
Пример #24
0
    def AtrophyPrediction(self, roi):
        img = self.ModAtrophy.predict(roi)
        atrophyRate = met.atrophyRate(img)
        w, h, c = moil.getWidthHeightChannels(self.currentImg)
        img = cv2.resize(img, (round(160 * w / 600), round(160 * (w * 0.75) / 450)))
        img = moil.getBinaryThreshold(img)

        return atrophyRate, img
Пример #25
0
 def tsim(self, d1, d2, adjacency):
     scores = []
     if d2.sum() == np.inf:
         return 0
     for organ_set in adjacency:
         scores.append(
             Metrics.jaccard_distance(d1[organ_set], d2[organ_set]))
     return np.mean(scores)
Пример #26
0
 def feed_forward(self, input_, biases=[], weights=[]):
     if not biases and not weights:
         biases = self.biases
         weights = self.weights
     #for a input run the network and get the output activation layer unit vector.
     activation = input_
     for b, w in zip(biases, weights):
         activation = mt.Metrics().sigmoid(np.dot(w, activation) + b)
     return activation
Пример #27
0
 def performance_measure(self, predicted, labels, dataset, isClassification, k, method):
         mtrx = Metrics.Metrics()
         if (isClassification):
                 acc, prec, recall = mtrx.confusion_matrix(labels.values, predicted)
                 self.update_result(dataset, isClassification, k, method, acc, prec, recall, 0)
                  
         else:
                 rmse = mtrx.RootMeanSquareError(labels.values, predicted)
                 self.update_result(dataset, isClassification, k, method, 0, 0, 0, rmse)
Пример #28
0
def find_best_move(board, history, player):
    if board.tostring() not in history:
        history[board.tostring()] = Metrics()
    print("Deciding best move...")
    run_simulations(board, history, player)
    boards = get_possible_moves(board, player)
    print(history[b.tostring()].count for b in boards)
    values = [history[b.tostring()].get_expected_value(player) for b in boards]
    return boards[np.argmax(values)]
Пример #29
0
 def test_validation(self, validate_set, isClassification, epoch):
         #test the model with validation set and return accuracy or rmse based on classification/regression.
         predicted = []
         label = []
         if isClassification:       
                 for x, y in validate_set:
                         predicted.append(np.argmax(self.feed_forward(x)))
                         label.append(np.argmax(y))
                 acc, prec, recall = mt.Metrics().confusion_matrix(label, predicted)
                 print('Epoch {0} completed with acc::: {1}'.format(epoch, acc))
                 return acc
         else:
                 for x, y in validate_set:
                         predicted.append(self.feed_forward(x)[0][0])
                         label.append(y)
                 rmse = mt.Metrics().RootMeanSquareError(np.asarray(label), predicted)
                 print('Epoch {0} completed with rmse::: {1}'.format(epoch, rmse))
                 return rmse
Пример #30
0
    def __init__(self,
                 sizeOfBuffer,
                 timeout,
                 numberOfThreads,
                 numberOfCores,
                 timeQuantum,
                 contextSwitchTime,
                 numberOfClients,
                 randomSeed,
                 arrivalTimeDistributionLambda,
                 thinkTimeDistribution,
                 serviceTimeDistribution,
                 paramThinkTime1,
                 paramServiceTime1,
                 paramThinkTime2=None,
                 paramServiceTime2=None):
        random.seed(randomSeed)
        self.eventList = EventList.EventList()
        Request.Request.initRequestId()
        self.simulationTime = 0
        self.departureCount = 0
        self.clients = []

        for y in list(range(numberOfClients)):
            self.clients.append(
                Client.Client(y, thinkTimeDistribution, paramThinkTime1, 0,
                              paramThinkTime2))  #0 - thinking

        self.requestList = RequestList.RequestList()

        for index in list(range(numberOfClients)):
            if serviceTimeDistribution == 1 or serviceTimeDistribution == 2:  # Uniform or Normal distribution
                request = Request.Request(index, arrivalTimeDistributionLambda,
                                          serviceTimeDistribution, timeout,
                                          paramServiceTime1, paramServiceTime2)
            else:
                request = Request.Request(index, arrivalTimeDistributionLambda,
                                          serviceTimeDistribution, timeout,
                                          paramServiceTime1)

            self.requestList.addToRequestList(request)
            #print (self.requestList.requestList[index].arrivalTime)
            newEvent = Event.Event(self.simulationTime + request.arrivalTime,
                                   0, request.requestId)
            self.eventList.enqueueEvent(newEvent)
            #schedule timeout of the request
            newEvent1 = Event.Event(self.simulationTime + request.arrivalTime +
                                    request.timeout, 4,
                                    request.requestId)  #4 - timeout
            self.eventList.enqueueEvent(newEvent1)

        self.system = System.System(sizeOfBuffer, numberOfCores,
                                    numberOfThreads, timeQuantum,
                                    contextSwitchTime)

        self.metrics = Metrics.Metrics()
Пример #31
0
def process_documents():
    '''Read From Document'''
    documents = Utilities.read_from_time_all()
    #documents = read_lines()
    '''Tokens and Stem Documents'''
    documents = Utilities.tokenize_stem_docs(documents)
    '''calculate doc lengths'''
    doc_len = Utilities.calculate_doc_len(documents)
    ''' term frequency'''
    tf = TFIDF.term_frequency(documents)
    '''calculates tf-idf'''
    tfidf = TFIDF.TFIDF(len(documents), tf)
    '''Read From Document'''
    queries = Utilities.read_from_time_que()
    #queries = ['pop love song', 'chinese american', 'city']
    '''Tokens and Stem Documents'''
    queries = Utilities.tokenize_stem_docs(queries)
    
    
    #print Search.search_by_cosine(tfidf,len(documents),['CARTOONISTS'.lower()])
    
    
    cosine_result = []
    rsv_result = []
    BM25_1_5 = []  #b=1 k= 0.5
    BM25_1_1 = [] #b=1 k= 1
    BM25_2_5 = [] #b=2 k= 0.5
    BM25_2_1 = [] #b=2 k= 1 
    
    
    for query in queries:
        cosine_result.append(Search.search_by_cosine(tfidf,len(documents),query))
        rsv_result.append(Search.search_by_rsv(tf,len(documents),query))
        BM25_1_5.append(Search.search_by_BM25(tf,doc_len,query,1.0,0.5))
        BM25_1_1.append(Search.search_by_BM25(tf,doc_len,query,1.0,1.0))
        BM25_2_5.append(Search.search_by_BM25(tf,doc_len,query,2.0,0.5))
        BM25_2_1.append(Search.search_by_BM25(tf,doc_len,query,2.0,1.0))
    
    #print cosine_result[1]
    '''
    read from time.rel
    '''    
    rel_dict = Utilities.read_from_time_rel()
    '''
    print result
    '''
    result = []

    result.append(('System','Precision','Recall','F1','MAP')) 
    result.append( ('cosine  ',) + Metrics.getMetrics(cosine_result,rel_dict,20)) #limit to top 20 search
    result.append( ('RSV  ',) + Metrics.getMetrics(rsv_result,rel_dict,20))
    result.append(('BM25 (1, .5) ',)+ Metrics.getMetrics(BM25_1_5,rel_dict,20))
    result.append(('BM25 (1, 1) ',)+Metrics.getMetrics(BM25_1_1,rel_dict,20))
    result.append(('BM25 (2, .5) ',)+Metrics.getMetrics(BM25_2_5,rel_dict,20)) 
    result.append(('BM25 (2, 1) ',)+Metrics.getMetrics(BM25_2_1,rel_dict,20))
    
    Utilities.tabulate(result)
    Utilities.plot_graph(result)
Пример #32
0
	def computeMetricsAverageOverFold(self):
		RMSE = []
		AAE = []
		PEARSON = []
		
		for f in range(0,self.completedFolds):
			metric = Metrics(self.aT[f],self.aP[f])	
			metric.computeRMSE()
			RMSE.append(metric.RMSE)			
			metric.computeAAE()
			AAE.append(metric.AAE)
			if len(self.aT[f]) > 1:
				metric.computePEARSON()
				PEARSON.append(metric.PEARSON)
				print "Fold"+str(f)+"\nRMSE:"+str(RMSE[f])+" AAE:"+str(AAE[f])+" PEARSON:"+str(PEARSON[f])
			else:
				print "Fold"+str(f)+"\nRMSE:"+str(RMSE[f])+" AAE:"+str(AAE[f])

		averageRMSE = 0.0
		averagePEARSON = 0.0
		averageAAE = 0.0
		for f in range(0,self.completedFolds):
			averageRMSE += RMSE[f]
			averageAAE += AAE[f]
			averagePEARSON += PEARSON[f]
		averageRMSE = averageRMSE/float(self.completedFolds)
		averageAAE = averageAAE/float(self.completedFolds)
		averagePEARSON = averagePEARSON/float(self.completedFolds)
		
		stdRMSE = 0.0
		stdPEARSON = 0.0
		stdAAE = 0.0
		for f in range(0,self.completedFolds):
			stdRMSE += math.pow((RMSE[f]-averageRMSE),2)
			stdAAE += math.pow((AAE[f]-averageAAE),2)
			stdPEARSON += math.pow((PEARSON[f]-averagePEARSON),2)
		stdRMSE = math.sqrt(stdRMSE/float(self.completedFolds))
		stdAAE = math.sqrt(stdAAE/float(self.completedFolds))
		stdPEARSON = math.sqrt(stdPEARSON/float(self.completedFolds))	
		
		print "FOLD AVERAGE...\nRMSE:"+str(averageRMSE)+" "+str(stdRMSE)+" AAE:"+str(averageAAE)+" "+str(stdAAE)+" PEARSON:"+str(averagePEARSON)+" "+str(stdPEARSON)		
def cross_validation_score(x, y, classifier, folds = 10, class_value = 1.0):
    """ Creates #folds in the dataset, and then runs the 
        <classifier> on them, computing the average recall,
        precision and f1 score
    """
    if len(x) != len(y):
        raise Exception("Lists are not the same size")
    
    x = __ensure_np_array__(x)
    y = __ensure_np_array__(y)
    
    edges = cross_validation_edges(len(x), folds)
    recall, precision, f1_score = 0.0, 0.0, 0.0
    
    
    for i in range(folds):
        l,r = edges[i]
        
        #Note these are numpy obj's and cannot be treated as lists
        td_x = np.concatenate((x[:l], x[r:]))                            
        td_y = np.concatenate((y[:l], y[r:]))
        
        vd_x = x[l:r]
        vd_y = y[l:r]
        
        classifier.fit(td_x, td_y)
        pred_y = classifier.predict(vd_x)
        
        r, p, f1 = Metrics.rpf1(vd_y, pred_y, class_value)
        recall    += r
        precision += p
        f1_score  += f1
    
    recall      = recall    / folds
    precision   = precision / folds
    f1_score    = f1_score  / folds
    
    return (recall, precision, f1_score)
    def RunStacked(self, results_file, cv_folds = 10, min_word_count = 5,
                   stem = True, lemmatize = False, remove_stop_words = True, layers = 2):

        #SETTINGS
        logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

        print "Results filename: " + results_file
        settings = Settings.Settings()

        results_dir = settings.results_directory + self.sub_dir() + "\\"

        fName = results_dir + results_file

        #TOKENIZE
        data = self.get_data(settings)
        tokenized_docs = WordTokenizer.tokenize(data.documents, min_word_count=min_word_count, stem=stem,
                                                lemmatize=lemmatize, remove_stop_words=remove_stop_words,
                                                spelling_correct=True, number_fn=NumberStrategy.collapse_num)

        empty_ixs = set([i for i, doc in enumerate(tokenized_docs) if len(doc) < StackedExperimentRunner.__MIN_DOC_LENGTH__])
        tokenized_docs = [t for i, t in enumerate(tokenized_docs) if i not in empty_ixs]

        #TRAINING DATA
        #TODO Make this one call from docs -> td
        (distance_matrix, id2word) = self.get_vector_space(tokenized_docs)
        xs = self.get_training_data(distance_matrix, id2word)

        matrix_mapper = self.matrix_value_mapper()
        if matrix_mapper:
            xs = MatrixHelper.map_matrix(matrix_mapper, xs)

        all_results = self.get_params() + "\n"
        print all_results,

        MIN_CODE_COUNT = 3

        codes = set(self.get_codes(data.sm_codes))
        label_mapper = self.label_mapper()

        # Stop logging now
        logging.disable(logging.INFO)

        xs = ensure_np_array(xs)
        edges = cross_validation_edges(len(xs), cv_folds)

        ys_by_code = {}
        positive_count_by_code = {}
        for code in codes.copy():
            ys = self.get_ys(code, data, empty_ixs, label_mapper, xs)
            ys_by_code[code] = ys

            positive_count = len([item for item in ys if item == 1])
            positive_count_by_code[code] = positive_count

            if positive_count < MIN_CODE_COUNT:
                codes.remove(code)

        dct_td_predictions_by_fold = {}
        dct_vd_predictions_by_fold = {}
        dct_actual_by_fold = {}

        for layer in range(layers):

            print("Layer: {0}".format(layer))
            vd_metrics_for_layer, td_metrics_for_layer = [], []

            vd_metrics_by_code = defaultdict(lambda: [])
            td_metrics_by_code = defaultdict(lambda: [])

            for fold in range(cv_folds):

                l, r = edges[fold]

                #Note these are numpy obj's and cannot be treated as lists
                td_x = np.concatenate((xs[:l], xs[r:]))
                vd_x = xs[l:r]

                predictions_from_previous_layer = None
                if layer > 0:
                    # Seed with an empty lists
                    lst_td_preds = self.__extract_predictions__(codes, dct_td_predictions_by_fold[fold], td_x)
                    td_x = np.concatenate((td_x, np.array(lst_td_preds)), 1)

                    lst_vd_preds = self.__extract_predictions__(codes, dct_vd_predictions_by_fold[fold], vd_x)
                    vd_x = np.concatenate((vd_x, np.array(lst_vd_preds)), 1)

                dct_td_predictions_per_code = {}
                dct_vd_predictions_per_code = {}
                dct_actual_per_code = {}

                dct_td_predictions_by_fold[fold] = dct_td_predictions_per_code
                dct_vd_predictions_by_fold[fold] = dct_vd_predictions_per_code
                dct_actual_by_fold[fold] = dct_actual_per_code

                class_value = self.get_class_value()

                for code in codes:

                    total_codes = positive_count_by_code[code]

                    ys = ys_by_code[code]
                    td_y = np.concatenate((ys[:l], ys[r:]))
                    vd_y = ys[l:r]

                    if min(td_y) == max(td_y):
                        val = td_y[0]
                        td_predictions = np.array([val for y in td_y])
                        vd_predictions = np.array([val for y in vd_y])
                    else:
                        create_classifier_func = self.create_classifier(code)
                        classify_func = self.classify()

                        classifier = create_classifier_func(td_x, td_y)
                        td_predictions = classify_func(classifier, td_x)
                        vd_predictions = classify_func(classifier, vd_x)

                    dct_td_predictions_per_code[code]  = td_predictions
                    dct_vd_predictions_per_code[code]  = vd_predictions
                    dct_actual_per_code[code]       = td_y

                    td_r, td_p, td_f1, td_a = Metrics.rpf1a(td_y, td_predictions, class_value=class_value)
                    vd_r, vd_p, vd_f1, vd_a = Metrics.rpf1a(vd_y, vd_predictions, class_value=class_value)

                    vd_metric, td_metric = self.rpfa(vd_r, vd_p, vd_f1, vd_a, total_codes), \
                                           self.rpfa(td_r, td_p, td_f1, td_a, total_codes)

                    vd_metrics_for_layer.append(vd_metric)
                    td_metrics_for_layer.append(td_metric)

                    vd_metrics_by_code[code].append(vd_metric)
                    td_metrics_by_code[code].append(td_metric)

                pass # End for code in codes

            pass #END for fold in folds

            for code in sorted(codes):
                positive_count = positive_count_by_code[code]
                vd_metric, td_metric = self.mean_rpfa(vd_metrics_by_code[code]), self.mean_rpfa(td_metrics_by_code[code])

                results = "Code: {0} Count: {1} VD[ {2} ]\tTD[ {3} ]\n".format(code.ljust(7), str(positive_count).rjust(4),
                                                                               vd_metric.to_str(), td_metric.to_str())
                print results,

            mean_vd_metrics, mean_td_metrics = self.mean_rpfa(vd_metrics_for_layer), self.mean_rpfa(td_metrics_for_layer)
            wt_mean_vd_metrics, wt_mean_td_metrics = self.weighted_mean_rpfa(vd_metrics_for_layer), self.weighted_mean_rpfa(
                td_metrics_for_layer)

            aggregate_results = "\n"
            aggregate_results += "VALIDATION DATA -\n"
            aggregate_results += "\tMEAN\n\t\t {0}\n".format(mean_vd_metrics.to_str(True))
            aggregate_results += "\tWEIGHTED MEAN\n\t\t {0}\n".format(wt_mean_vd_metrics.to_str(True))

            aggregate_results += "\n"
            aggregate_results += "TRAINING DATA -\n"
            aggregate_results += "\tMEAN\n\t\t {0}\n".format(mean_td_metrics.to_str(True))
            aggregate_results += "\tWEIGHTED MEAN\n\t\t {0}\n".format(wt_mean_td_metrics.to_str(True))

            print aggregate_results
            pass #End for layer in layers

        pass #End fold

        """ Dump results to file in case of crash """

        #DUMP TO FILE
        """
        print "Writing results to: " + fName
        handle = open(fName, mode="w+")
        handle.write(all_results)
        handle.close()
        """
        #return (mean_vd_metrics, wt_mean_vd_metrics)
Пример #35
0
__author__ = 'bharathipriyaa'
import Metrics

total_df, coke_df, pepsi_df = Metrics.readFiles()
Metrics.calculate_viewability(total_df, coke_df, pepsi_df)
total_df, coke_df, pepsi_df=Metrics.calculateAdStickiness(total_df, coke_df, pepsi_df)
Metrics.calculateCPM(total_df, coke_df, pepsi_df)
Пример #36
0
	def computeMetricsTestFixed(self):
		foldLength = len(self.tT)/self.folds
		tTFixed = []
		pTFixed = []
		pTFixedAv = []
		tTFixedAv = []	
		for i in range(0,foldLength):
			pTFixedAv.append(0.0)
			tTFixedAv.append(0.0)	
		averageRMSE = 0.0
		averageAAE = 0.0
		averagePEARSON = 0.0
		averageStdAAE = 0.0
		for f in range(0,self.folds):
			first = f*foldLength
			last = first+foldLength
			tTFixed.append(self.tT[first:last]) 	
			pTFixed.append(self.pT[first:last])
			for i in range(0,foldLength):
				pTFixedAv[i] = pTFixedAv[i]+pTFixed[f][i] 
				tTFixedAv[i] = tTFixedAv[i]+tTFixed[f][i]
			metric = Metrics(tTFixed[f],pTFixed[f])	
			metric.computeRMSE()
			metric.computeAAE()
			if len(tTFixed) > 1:
				metric.computePEARSON()
				metric.computeStdAAE()
				print "Fold"+str(f)+"\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)+" PEARSON:"+str(metric.PEARSON)+" STD_R:"+str(metric.stdAAE)
				averageRMSE += metric.RMSE
				averageAAE += metric.AAE
				averagePEARSON += metric.PEARSON
				averageStdAAE += metric.stdAAE
				print "FOLD AVERAGE...\nRMSE:"+str(averageRMSE/float(self.folds))+" AAE:"+str(averageAAE/float(self.folds))+" PEARSON:"+str(averagePEARSON/float(self.folds))+" STD_R:"+str(averageStdAAE/float(self.folds))		
			else:		
				print "Fold"+str(f)+"\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)
			
		pTFixedAv = [x/float(self.folds) for x in pTFixedAv]
		tTFixedAv = [x/float(self.folds) for x in tTFixedAv]
		if len(pTFixedAv) > 1:		
			metric = Metrics(tTFixedAv,pTFixedAv)	
			metric.computeRMSE()
			metric.computeAAE()
			metric.computePEARSON()
			metric.computeStdAAE()
			print "AVERAGE...\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)+" PEARSON:"+str(metric.PEARSON)+" STD_R:"+str(metric.stdAAE)	
 def train(self, tokenized_docs, ys, epochs, batch_size = 500):
     
     if self.activation_fn == "tanh":
         def to_tanh_val(y):
             if y > 0:
                 return 1
             else:
                 return -1
         ys = [map(to_tanh_val, y) for y in ys]
     
     elif self.activation_fn == "sigmoid":
         def to_sigmoid_val(y):
             if y > 0:
                 return 1
             else:
                 return 0
         ys = [map(to_sigmoid_val, y) for y in ys]
     
     if not self.init:
         self.__init_learners__(tokenized_docs, ys)
 
     outputs = np.array(ys)
     
     num_rows = len(tokenized_docs)
     assert num_rows == outputs.shape[0]
     
     num_batches = num_rows / batch_size
     if num_rows % batch_size > 0:
         num_batches += 1
     
     batch_leaf_nodes = {}
    
     for epoch in range(epochs):
         top_level_inputs = []
         recon_errors = None
         cls_errors = None
         
         print ""
         print "EPOCH: ", epoch
         
         for batch in range(num_batches):
             print batch,
             
             start = batch * batch_size
             end = start + batch_size
             mini_batch_in = tokenized_docs[start:end]
             mini_batch_out = outputs[start:end]
             
             """ Leaf level input data will NOT change thru learning """
             if batch not in batch_leaf_nodes:
                 leaf_nodes, word_pairs, indices = self.__construct_leaf_nodes__(mini_batch_in)
                 batch_leaf_nodes[batch] = (leaf_nodes, word_pairs, indices)
             else:
                 leaf_nodes, word_pairs, indices = batch_leaf_nodes[batch]
                 
             reconstruction_errors, classification_errors, top_nts = self.__train_mini_batch__(leaf_nodes, word_pairs, indices, mini_batch_out)
             top_level_inputs.extend(top_nts)
             
             if recon_errors == None:
                 recon_errors = reconstruction_errors
                 cls_errors = classification_errors
             else:
                 recon_errors = np.append(recon_errors, reconstruction_errors, 0)
                 cls_errors   = np.append(cls_errors, classification_errors, 0)
             
         recon_mse = np.mean(np.square(recon_errors))
         cls_mse = np.mean(np.square(classification_errors))
         
         recon_mae = np.mean(np.abs(recon_errors))
         cls_mae = np.mean(np.abs(classification_errors))
         
         print ""
         print "[AE]   MSE for EPOCH: " + str(recon_mse)
         print "[AE]   MAE for EPOCH: " + str(recon_mae)
         print ""
         print "[NNet] MSE for EPOCH: " + str(cls_mse)
         print "[NNet] MAE for EPOCH: " + str(cls_mae)
         print ""
         
         a3, a2, err = self.nnet.prop_up(top_level_inputs, outputs)
         a3sorted = np.argsort(a3, 1)
         if self.activation_fn == "tanh":
             """ If tanh h, adjust to be [-1,1] """
             a3sorted = ((2 * a3sorted) -1)
         
         expected = outputs[:,1]
         actual = a3sorted[:,1].flatten().tolist()[0]
         
         r,p,f1 = Metrics.rpf1(expected, actual, class_value = 1)
         mse = np.mean(np.square(err))
         mae = np.mean(np.abs(err))
         print "Top-Level Classification Results:"
         print "\tMSE for EPOCH: " + str(mse)
         print "\tMAE for EPOCH: " + str(mae)
         print ""
         print "\tRecall:        " + str(r)
         print "\tPrecision:     " + str(p)
         print "\tF1:            " + str(f1)
         
         if epoch > 0 and epoch % 5 == 0:
             self.__run_classifier__(top_level_inputs, expected)
	for col in range(dim):
		p_single = int(p[col])
		a_single = int(a[col])
		str_p += str(p_single) + " "
		str_a += str(a_single) + " "
		predict_single[col].append(p_single)
		actual_single[col].append(a_single)
	str_p = str_p[0:-1]
	str_a = str_a[0:-1]
	predict_complete.append(str_p)
	actual_complete.append(str_a)

import Metrics
matrix_complete = Metrics.ConfusionMatrix(predict_complete, actual_complete)
matrix_single = []

for col in range(dim):
	matrix_single.append(Metrics.ConfusionMatrix(predict_single[col], actual_single[col]))
	
# Hardcoded for better formatting
#Metrics.printConfusionMatrix("COMPLETE", matrix_complete)
#Metrics.printConfusionMatrix("EXPLORATION ORDER", matrix_single[0])
#Metrics.printConfusionMatrix("SATURATION STRATEGY", matrix_single[1])
#Metrics.printConfusionMatrix("SATURATION GRANULARITY", matrix_single[2])

# Simple printing
Metrics.printConfusionMatrixCompact(matrix_complete)
for col in range(dim):
	Metrics.printConfusionMatrixCompact(matrix_single[col])
print ""
Пример #39
0
	def computeMetrics(self):
		if self.testFixed and self.completedFolds == self.folds:
			self.computeMetricsTestFixed()
		elif self.averageOverFold:
			self.computeMetricsAverageOverFold()
		else:
			for m in range(0,self.models):
				metric = Metrics(self.tTM[m],self.pTM[m])	
				metric.computeRMSE()
				metric.computeAAE()
				if len(self.tTM[m]) > 1:
					metric.computePEARSON()
					print "Model"+str(m)+"\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)+" PEARSON:"+str(metric.PEARSON)
				else:
					print "Model"+str(m)+"\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)
			metric = Metrics(self.tT,self.pT)
			metric.computeRMSE()
			metric.computeAAE()
			if len(self.tT) > 1:
				metric.computePEARSON()
				metric.computeStdAAE()
				metric.computeStd()
				print "AVERAGE...\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)+" PEARSON:"+str(metric.PEARSON)
Пример #40
0
import KernelKMeans 
import numpy as np
import Metrics
import scipy.io as sio

#parameters
fileData = 'G:/Dropbox/Universidad/Machine Learning/Robustes/Abalone/abalone.npz'
epocs = sio.loadmat('G:/Dropbox/Universidad/Machine Learning/Robustes/Abalone/parameters.mat')['epocs']
n_clusters = 3

gamma_logscale = [1,2,3,4]

clustering_accuracy = np.zeros(epocs*4)
calculate_purity = np.zeros(epocs*4)
calculate_nmi = np.zeros(epocs*4)

cont = 0
for gamma in gamma_logscale:
    for epocs in xrange(epocs):
        print epocs
        labels_true,labels_pred,features= KernelKMeans.get_kernelKMeans(fileData,n_clusters,normalized_axis = 0,norm='l1',gamma=2**-gamma)
        #print str(np.where(labels_true==0)[0].shape) + ' ' + str(np.where(labels_pred==0)[0].shape)
        #print str(np.where(labels_true==1)[0].shape) + ' ' + str(np.where(labels_pred==1)[0].shape)
        #print str(np.where(labels_true==2)[0].shape) + ' ' + str(np.where(labels_pred==2)[0].shape)
    
        clustering_accuracy[cont] = Metrics.calculate_clusteringAccuracy(labels_true,labels_pred)
        calculate_purity[cont],vector = Metrics.calculate_purity(labels_true,labels_pred)
        calculate_nmi[cont] = Metrics.calculate_nmi(labels_true,labels_pred)
        cont +=1

sio.savemat('results',{'clusteringAccuracy' : clustering_accuracy,'purityvec' : calculate_purity,'nmivec' : calculate_nmi})
Пример #41
0
for i in range(0, len(vector)):
    vector[i] = list(map(int, vector[i]))

for i in range(0, len(vector_words)):
    vector_words[i] = list(map(int, vector_words[i]))

for x in range(0, len(worker)):
    if worker[x] in workerdict:
        current_worker_annotations = workerdict[worker[x]]
    else:
        current_worker_annotations = {}
    current_worker_annotations[tweet[x]] = vector[x]
    workerdict[worker[x]] = current_worker_annotations

for x in range(0, len(worker_words)):
    if worker_words[x] in workerdict_words:
        current_worker_annotations = workerdict_words[worker_words[x]]
    else:
        current_worker_annotations = {}
    current_worker_annotations[tweet_words[x]] = vector_words[x]
    workerdict_words[worker_words[x]] = current_worker_annotations

# workerdict[worker[x]] = {tweet[x]:vector[x]}
worker_agreement = Metrics.get_worker_agreement(workerdict)
cosine = Metrics.get_cosine_similarity(workerdict)

worker_unique = list(set(worker))

write_counts_to_csv('Novelty_Cosine.csv',cosine)
write_counts_to_csv('Novelty_Worker_Disagreement.csv',worker_agreement)
# Test classifier
testSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-validate.csv")
predictionsRaw = classifier.predict(testSet.features)

# Outbox labels
predictions = []
predictionsExpl = []
for row in range(len(predictionsRaw)):
	# Convert float to int
	predictions.append(int(predictionsRaw[row]))
	# Splice int into 3 values
	order = predictions[row]/100
	sat = (predictions[row]/10)%10
	gran = predictions[row]%10
	predictionsExpl.append([order, sat, gran])

# Write results to csv file
results = DataUtils.ResultSet(testSet.id, predictionsExpl, testSet.labels)
DataUtils.write_resultset(results, directory +"Result-"+ setNo +".csv")

# Metrics of classifier
import Metrics
matrix_complete = Metrics.ConfusionMatrix(predictions, inboxLabels(testSet.labels))
	
# Readable printing
#Metrics.printConfusionMatrix("COMPLETE", matrix_complete)
# Simple printing
Metrics.printConfusionMatrixCompact(matrix_complete)
print ""
def cross_validation_score_generic(x, y, fn_create_classifier, fn_classify, folds = 10, class_value = 1.0, one_fold = False):
    """ Creates #folds in the dataset, and then runs the 
        <classifier> on them, computing the average recall,
        precision and f1 score
        fn_create_classifier : a function that takes a list of training data and returns a classifier
        fn_classifier        : a function that takes a classifier and a list of inputs and returns a list of classifications
        folds                : Number of folds
        class_value          : positive class value
        one_fold             : run for one fold (for quick testing)
    """
    if len(x) != len(y):
        raise Exception("Lists are not the same size")

    npx = __ensure_np_array__(x)
    npy = __ensure_np_array__(y)
    
    edges = cross_validation_edges(len(x), folds)

    td_recall, td_precision, td_f1_score, td_accuracy = 0.0, 0.0, 0.0, 0.0
    vd_recall, vd_precision, vd_f1_score, vd_accuracy = 0.0, 0.0, 0.0, 0.0

    td_tp_ix, td_fp_ix, td_fn_ix, td_tn_ix = [], [], [], []
    vd_tp_ix, vd_fp_ix, vd_fn_ix, vd_tn_ix = [], [], [], []

    for i in range(folds):
        l,r = edges[i]
        
        #Note these are numpy obj's and cannot be treated as lists
        td_x = np.concatenate((npx[:l], npx[r:]))                            
        td_y = np.concatenate((npy[:l], npy[r:]))
        
        vd_x = np.array(npx[l:r])
        vd_y = np.array(npy[l:r])
        
        classifier = fn_create_classifier(td_x, td_y)

        pred_td_y = fn_classify(classifier, td_x)


        td_r, td_p, td_f1, td_a,     tp_ix, fp_ix, fn_ix, tn_ix,  = Metrics.rpf1a_with_indices(td_y, pred_td_y, class_value)
        td_recall    += td_r
        td_precision += td_p
        td_f1_score  += td_f1
        td_accuracy  += td_a

        td_tp_ix.extend(tp_ix)
        td_fp_ix.extend(fp_ix)
        td_fn_ix.extend(fn_ix)
        td_tn_ix.extend(tn_ix)

        pred_vd_y = fn_classify(classifier, vd_x)

        vd_r, vd_p, vd_f1, vd_a, tp_ix, fp_ix, fn_ix, tn_ix = Metrics.rpf1a_with_indices(vd_y, pred_vd_y, class_value)
        vd_recall    += vd_r
        vd_precision += vd_p
        vd_f1_score  += vd_f1
        vd_accuracy  += vd_a

        vd_tp_ix.extend(tp_ix)
        vd_fp_ix.extend(fp_ix)
        vd_fn_ix.extend(fn_ix)
        vd_tn_ix.extend(tn_ix)

        if one_fold:
            folds = 1
            break

    #Compute mean scores across all folds
    
    mean_td_recall      = td_recall    / folds
    mean_td_precision   = td_precision / folds
    mean_td_f1_score    = td_f1_score  / folds
    mean_td_accuracy    = td_accuracy  / folds

    mean_vd_recall      = vd_recall    / folds
    mean_vd_precision   = vd_precision / folds
    mean_vd_f1_score    = vd_f1_score  / folds
    mean_vd_accuracy    = vd_accuracy  / folds
    
    return \
        (   mean_vd_recall, mean_vd_precision, mean_vd_f1_score, mean_vd_accuracy,
            mean_td_recall, mean_td_precision, mean_td_f1_score, mean_td_accuracy,

            # indices for different groupings
            vd_tp_ix, vd_fp_ix, vd_fn_ix, vd_tn_ix,
            td_tp_ix, td_fp_ix, td_fn_ix, td_tn_ix
        )
for line in worker_label_vectors_file.readlines():
    info = line.strip().split('|')
    worker_id = info[0]
    label = info[1]
    vector = info[2:]
    vector = [int(i) for i in vector]
    if worker_id in worker_dict:
        cur_label_vectors = worker_dict[worker_id]
    else:
        cur_label_vectors = {}
    cur_label_vectors[label] = vector
    worker_dict[worker_id] = cur_label_vectors
worker_label_vectors_file.close()

#worker_agreement  
worker_agreement_dict = Metrics.get_worker_agreement(worker_dict)

#avg_worker_sentence_score
avg_worker_sentence_agreement_dict = Metrics.get_avg_worker_sentence_agreement(worker_dict)

#Avg Amount of annotations per label
avg_worker_annotations = {}
for worker in worker_dict:
    total_annotations = 0
    labels = worker_dict[worker]
    for label in labels:
        vector = labels[label]
        total_annotations += sum(vector)
    avg_annotations = float(total_annotations)/len(labels)
    avg_worker_annotations[worker] = avg_annotations