Exemplo n.º 1
0
def train_test_base(X_train, X_test, y_train, y_test, task, name, inputs,
                    seed):
    mtl = 1 if y_test.shape[1] > 1 else 0  # multi-label
    if name == 'lr':
        # print('Start training Logistic Regression:')
        model = LogisticRegression()
    else:
        # print('Start training Random Forest:')
        model = RandomForestClassifier()
    if mtl:
        model = OneVsRestClassifier(model)
    else:
        y_train, y_test = y_train[:, 0], y_test[:, 0]
    t0 = time.time()
    model.fit(X_train, y_train)
    t1 = time.time()
    # print('Running time:', t1 - t0)
    probs = model.predict_proba(X_test)
    metrics = []
    if mtl:
        for idx in range(y_test.shape[1]):
            metric = cal_metric(y_test[:, idx], probs[:, idx])
            # print(idx + 1, metric)
            metrics.append(metric)
        # print('Avg', np.mean(metrics, axis=0).tolist())
    else:
        metric = cal_metric(y_test, probs[:, 1])
        f1, auc, aupr = metric
        # print(metric)
        print(f'{task},{name},{inputs},{seed},{f1},{auc},{aupr}')
Exemplo n.º 2
0
    def convert(self, kind="user"):
        """Convert underlying metric objects.

        Conversion to user format returns a dictionary with each element mapping
        metric name to metric value. Conversion to db format returns a
        list of dictionaries, each with keys "name", "scoring", and "value"
        mapping to their respective values. Both formats convert np.floating
        values to Python floats.

        Parameters
        ----------
        kind : str
            One of "user" or "db"
        """
        if kind=="user":
            metrics = {}
            for m in self._list:
                metrics.update(m.convert(kind="user"))
        elif kind=="db":
            metrics = []
            for m in self._list:
                metrics.append(m.convert(kind="db"))
        else:
            ValueError("Bad kind: {}".format(kind))

        return metrics
Exemplo n.º 3
0
def train(projectname, models, x_train, y_train):
    for k in range(MODEL_NUMBER):
        mn0, mn1, metrics0, metrics1 = [], [], [], []
        print("Training ", k, "th model...")
        for i in range(y_train.shape[0]):
            if y_train[i] == 0:
                mn0.append(x_train[0][i])
                metrics0.append(x_train[1][i])
            else:
                mn1.append(x_train[0][i])
                metrics1.append(x_train[1][i])

        size = (int)(len(mn1) * SUBSETSIZE)
        temp_set = []
        mn0 = np.array(mn0)
        mn1 = np.array(mn1)
        metrics0 = np.array(metrics0)
        metrics1 = np.array(metrics1)

        indices0 = np.arange(mn0.shape[0])
        indices1 = np.arange(mn1.shape[0])

        #np.random.shuffle(indices0)
        #np.random.shuffle(indices1)
        indices0 = shuffle(indices0)
        indices1 = shuffle(indices1)

        mn0 = mn0[indices0[:size]]
        mn1 = mn1[indices1[:size]]
        metrics0 = metrics0[indices0[:size]]
        metrics1 = metrics1[indices1[:size]]
        temp_set = []
        for i in range(size):
            temp_set.append([mn0[i], metrics0[i], 0])
            temp_set.append([mn1[i], metrics1[i], 1])

        np.random.shuffle(temp_set)

        y = []
        mn = []
        metrics = []
        for i in range(len(temp_set)):
            mn.append(temp_set[i][0])
            metrics.append(temp_set[i][1])
            y.append(temp_set[i][2])

        mn = np.array(mn)
        metrics = np.array(metrics)

        x = [mn, metrics]
        y = np.array(y)

        models[k].fit(x, y, epochs=10, batch_size=5, verbose=0)
        json_string = models[k].to_json()
        open(
            'D:/TSE/python/largeclass/model/' + projectname + '-' + (str)(k) +
            '.json', 'w').write(json_string)
        models[k].save_weights('D:/TSE/python/largeclass/model/' +
                               projectname + '-' + (str)(k) + '.h5')
    return models
Exemplo n.º 4
0
def train_test_base(X_train, X_test, y_train, y_test, name):
    mtl = 1 if y_test.shape[1] > 1 else 0  # multi-label
    if name == 'lr':
        print('Start training Logistic Regression:')
        model = LogisticRegression()
        param_grid = {'penalty': ['l1', 'l2']}
    else:
        print('Start training Random Forest:')
        model = RandomForestClassifier()
        param_grid = {
            'n_estimators': [x for x in range(20, 40, 5)],
            'max_depth': [None, 20, 40, 60, 80, 100]
        }
    if mtl:
        model = OneVsRestClassifier(model)
    else:
        y_train, y_test = y_train[:, 0], y_test[:, 0]
    t0 = time.time()
    gridsearch = GridSearchCV(model, param_grid, scoring='roc_auc', cv=5)
    gridsearch.fit(X_train, y_train)
    model = gridsearch.best_estimator_
    t1 = time.time()
    print('Running time:', t1 - t0)
    probs = model.predict_proba(X_test)
    metrics = []
    if mtl:
        for idx in range(y_test.shape[1]):
            metric = cal_metric(y_test[:, idx], probs[:, idx])
            print(idx + 1, metric)
            metrics.append(metric)
        print('Avg', np.mean(metrics, axis=0).tolist())
    else:
        metric = cal_metric(y_test, probs[:, 1])
        print(metric)
Exemplo n.º 5
0
def average_metric(true: list, predicted: list, metric) -> float:
    """
    Computes an average metric from a list of true and predicted values.

    This function iterates from 1 to `len(predicted)` where in each iteration
    computes the metric over the true and `predicted[:current_index]`, at the end
    an average metric is calculated.

    Parameters
    ----------
    true : list of str
        List of true elements.
    predicted : list of str
        List of predicted elements.
    metric : callable
        A metric function.
    
    Returns
    -------
    float
        Average metric.

    """
    metrics = []
    for k in range(1, len(predicted) + 1):
        predicted_k = predicted[:k]
        p = metric(true, predicted_k)
        metrics.append(p)
    try:
        return statistics.mean(metrics)
    except statistics.StatisticsError:
        return 0.0
Exemplo n.º 6
0
def test(models, x_test, y_test):
    mn0, mn1, metrics0, metrics1 = [], [], [], []
    for i in range(y_test.shape[0]):
        if y_test[i] == 0:
            mn0.append(x_test[0][i])
            metrics0.append(x_test[1][i])
        else:
            mn1.append(x_test[0][i])
            metrics1.append(x_test[1][i])

    size = (int)(len(mn0) / 0.9329 * 0.0671 + 0.5)

    temp_set = []
    mn0 = np.array(mn0)
    mn1 = np.array(mn1)
    metrics0 = np.array(metrics0)
    metrics1 = np.array(metrics1)

    indices0 = np.arange(mn0.shape[0])
    indices1 = np.arange(mn1.shape[0])

    np.random.shuffle(indices0)
    np.random.shuffle(indices1)

    mn1 = mn1[indices1[:size]]
    metrics1 = metrics1[indices1[:size]]

    temp_set = []
    for i in range(mn0.shape[0]):
        temp_set.append([mn0[i], metrics0[i], 0])
    for i in range(size):
        temp_set.append([mn1[i], metrics1[i], 1])

    np.random.shuffle(temp_set)
    print('---', len(temp_set), '---')
    y = []
    mn = []
    metrics = []
    for i in range(len(temp_set)):
        mn.append(temp_set[i][0])
        metrics.append(temp_set[i][1])
        y.append(temp_set[i][2])

    mn = np.array(mn)
    metrics = np.array(metrics)

    x = [mn, metrics]
    y = np.array(y)
    predict = []
    for i in range(MODEL_NUMBER):
        predict.append(models[i].predict(x))
    y_pre = []
    for i in range(y.shape[0]):
        t = 0.0
        for j in range(MODEL_NUMBER):
            t += predict[j][i]
        y_pre.append(t / MODEL_NUMBER)
    return eval(y_pre, y)
Exemplo n.º 7
0
def predict_original_for_loader(loaded_models, dataloader, config, is_pseudo):
    
    predicts = defaultdict(lambda: {'original': defaultdict(dict), 'target': defaultdict(dict), 'processed': defaultdict(dict)})
    predicts_each = defaultdict()
    metrics = []

    for i, key in enumerate(loaded_models.keys()):
        print('【%d/%d】' % (i+1, len(loaded_models.keys())))
        model = loaded_models[key]['pseudo_model' if is_pseudo else 'model']
        for images, targets, image_ids in tqdm.tqdm(dataloader):
            image_id = image_ids[0]
            preds, _ = model(images, targets)
            metrics.append(calculate_score_for_each(preds, targets))

            if i == 0:
                predicts[image_id]['target']['boxes'] = targets[0]['boxes']
                predicts_each[image_id] = defaultdict(dict)
               
            predicts_each[image_id][key]['boxes'] = preds[0]['boxes']
            predicts_each[image_id][key]['scores'] = preds[0]['scores']
    
    for image_id, d in predicts_each.items():
        idx = 0
        all_empty = False
        while not all_empty:
            top_scores = []
            top_boxes = []
            for key in predicts_each[image_id].keys():
                if d[key]['scores'].shape[0] <= idx:
                    continue            
                top_scores.append(d[key]['scores'][idx])
                top_boxes.append(d[key]['boxes'][idx, :])  

            if len(top_scores) == 0:
                all_empty = True
                continue      

            top_scores = np.array(top_scores)
            top_boxes = np.array(top_boxes)
            top_sorted_idx = np.argsort(top_scores)[::-1]
            top_boxes = top_boxes[top_sorted_idx, :]
            top_scores = top_scores[top_sorted_idx]
            
            if 'boxes' not in predicts[image_id]['original']:
                predicts[image_id]['original']['boxes'] = top_boxes
                predicts[image_id]['original']['scores'] = top_scores
            else:
                if config['apply']:
                    keeped_top_score = predicts[image_id]['original']['scores'][-1]
                    top_scores -= np.max([0.0, (top_scores[0] - keeped_top_score + config['subtraction'])])
                predicts[image_id]['original']['boxes'] = np.concatenate([predicts[image_id]['original']['boxes'], top_boxes], axis=0)
                predicts[image_id]['original']['scores'] = np.concatenate([predicts[image_id]['original']['scores'], top_scores], axis=0)
                sorted_idx = np.argsort(predicts[image_id]['original']['scores'])[::-1]
                predicts[image_id]['original']['boxes'] = predicts[image_id]['original']['boxes'][sorted_idx, :]
                predicts[image_id]['original']['scores'] = predicts[image_id]['original']['scores'][sorted_idx]                        
            idx += 1
        
    return predicts, np.array(metrics)
Exemplo n.º 8
0
    def fit(self, X, Y):
        """
        Self explanatory
        @param: X (np.ndarray)
        @param: Y (np.ndarray)
        @returns: results (pd.DataFrame) with all cv results
        """
        assert(X.shape[0] == Y.shape[0])
        self.splits = list(self.kfold.split(X))

        # set up results dictionary
        results = {"params": [], "mean_test_score": []}
        for param_title in self.parameters.keys():
            results[f"param_{param_title}"] = []
        for i in range(self.kfold.get_n_splits()):
            results[f"test{i}_score"] = []

        self.best_estimator_, self.best_score_, self.best_params_ = None, np.inf, None
        for params in itertools.product(*self.parameters.values()):
            # set param 
            param_dict = {}
            for param_idx, param_title in enumerate(self.parameters.keys()):
                results[f"param_{param_title}"].append(params[param_idx])
                param_dict[param_title] = params[param_idx]

            results["params"].append(param_dict)
            # perform split
            models, metrics = [], []
            for split_idx, elem in enumerate(self.splits):
                train_idx, valid_idx = elem
                # model = self.model_func(param_dict, self.feat_params[split_idx])
                model = base.clone(self.model)
                model = model.set_params(**{**param_dict, **self.feat_params[split_idx]})

                Xtrain, Xtest = X[train_idx], X[valid_idx]
                Ytrain, Ytest = Y[train_idx], Y[valid_idx]

                if self.input_valid:
                    model.fit(Xtrain, Ytrain.flatten(), Xtest, Ytest.flatten())
                else:
                    model.fit(Xtrain, Ytrain.flatten())

                pred = model.predict(Xtest)
                metric = self.metric_func(Ytest, pred) 

                # bookkeeping
                models.append(model)
                metrics.append(metric)
                results[f"test{split_idx}_score"].append(metric)

            avg_score = np.average(metrics)
            results["mean_test_score"].append(avg_score)
            if avg_score < self.best_score_:
                self.best_score_ = avg_score
                self.best_estimator_ = models[np.argmin(metrics)]
                self.best_params_ = param_dict
        return pd.DataFrame(results)
Exemplo n.º 9
0
def evaluate_metrics_by_forecast_horizon(ds, column=None, model=sklearn.linear_model.LinearRegression(), metric=sklearn.metrics.r2_score, fit_set='training', predict_set='test'):
    logger = logging.getLogger()
    
    if column is None: column = ds.input_all.columns
    if not checks.is_iterable_not_string(column): column = [column]
    
    assert fit_set in ('training', 'validation', 'test')
    assert predict_set in ('training', 'validation', 'test')
    
    if fit_set == 'training':
        fit_sets = ds.training_set
        all_fit_sets = ds.all_training_sets
    elif fit_set == 'validation':
        fit_sets = ds.validation_set
        all_fit_sets = ds.all_validation_sets
    else:
        fit_sets = ds.test_set
        all_fit_sets = ds.all_test_sets
        
    if predict_set == 'training':
        predict_sets = ds.training_set
    elif predict_set == 'validation':
        predict_sets = ds.validation_set
    else:
        predict_sets = ds.test_set
        
    logger.info('Evaluating the metric for column(s) %s' % ', '.join(['"%s"' % c for c in column]))
    metrics = []
    if len(fit_sets) == len(predict_sets):
        for fs, ps in zip(fit_sets, predict_sets):
            x_train = fs.input[column].values
            y_train = fs.output.values
            model.fit(x_train, y_train)
            x_predict = ps.input[column].values
            y_predict = ps.output.values
            y_predict_pred = model.predict(x_predict)
            predict_set_metrics = []
            for i in range(len(ds.forecast_horizon)):
                predict_set_metrics.append(metric(y_predict[:,i], y_predict_pred[:,i]))
            metrics.append(predict_set_metrics)
    else:
        x_train = all_fit_sets.input[column].values
        y_train = all_fit_sets.output.values
        model.fit(x_train, y_train)
        for ps in predict_sets:
            x_predict = ps.input[column].values
            y_predict = ps.output.values
            y_predict_pred = model.predict(x_predict)
            predict_set_metrics = []
            for i in range(len(ds.forecast_horizon)):
                predict_set_metrics.append(metric(y_predict[:,i], y_predict_pred[:,i]))
            metrics.append(predict_set_metrics)
    # The mean is taken over the predict sets
    return np.mean(metrics, axis=0)
Exemplo n.º 10
0
    def calculate_metrics(self, y_predicted: np.array,
                          y_true: np.array) -> List[ClassMetrics]:
        classes_num = self.get_classes_num(y_true)

        metrics = []
        for class_idx in range(classes_num):
            class_stats = self.calculate_class_metrics(y_predicted, y_true,
                                                       class_idx)
            metrics.append(class_stats)

        return metrics
Exemplo n.º 11
0
def evaluate_significance(null_predictions,
                          *,
                          metric=sklearn.metrics.balanced_accuracy_score):
    """
    Prints several summary metrics of classification performance.
    
    Parameters
    ----------
    null_predictions : list
        Length [n_perms+1,]. Each item is a [labels, prediction] pair, with
        `predictions[0]` being the unshuffled result (typically the output of
        `compute_null_predictions()`).
    metric : optional
        Function conforming to the `sklearn.metrics` interface.
    
    Returns
    -------
    metrics : np.array
        `metric` calculated for every item in `null_predictions`.
    """

    metrics = []
    for labels, predictions in null_predictions:
        # Reorder for sklearn
        # Easier to convert to string and let sklearn sort a common coding
        predictions = utils.to_categorical(predictions, to_string=True)
        # Not strictly necessary, but throws error on missing entries
        labels = labels[predictions.index.levels[-1]]
        labels = utils.to_categorical(labels, to_string=True)
        labels = labels.reindex(predictions.index, level=-1)

        metrics.append(metric(labels, predictions))

    metrics = np.asarray(metrics)

    # Summarise
    print("True accuracy:            {: .2f}".format(metrics[0]))
    print("Null accuracy [+/- s.d.]: {: .2f} [+/- {:.2f}]".format(
        np.mean(metrics[1:]), np.std(metrics[1:])))
    print("Approx (2.5%, 97.5%) CI:  {: .2f}, {:.2f}".format(
        np.percentile(metrics[1:], 2.5), np.percentile(metrics[1:], 97.5)))
    # Include true in permutation distribution
    # Phipson & Smyth, 2010: https://doi.org/10.2202/1544-6115.1585
    # https://stats.stackexchange.com/a/112352
    k = np.sum(metrics >= metrics[0])
    n = len(metrics)
    print("p(True > Null) [95% CI]:  {: .3f} [{:.2e}, {:.2e}]".format(
        k / n, *scipy.stats.beta.interval(0.95, k, n - k)))
    print()

    return metrics
Exemplo n.º 12
0
def get_metric_options() -> typing.List[typing.Dict[str, str]]:
    """
    Returns:
        A list of all metrics that may be used in a format that may be read by other apps and interpretted
    """
    metrics: typing.List[typing.Dict[str, str]] = list()

    for metric in get_all_metrics():
        metrics.append({
            "name": metric.get_name(),
            "description": metric.get_descriptions(),
            "identifier": metric.get_identifier()
        })

    return metrics
Exemplo n.º 13
0
def _bootstrap(metric: Metric, y_true: np.ndarray, y_pred: np.ndarray,
               n_bootstrap: int, conf_interval: float, seed: int,
               **kwargs) -> BootstrapResults:
    """Performs bootstrapping on a given `Metric`.

  Args:
    metric: An instance of a `Metric`.
    y_true: Ground truth (correct) target values.
    y_pred: Estimated targets as returned by a classifier.
    n_bootstrap: An integer denoting the number of bootstrap iterations.
    conf_interval: A float denoting the width of confidence interval.
    seed: An int denoting the seed for the PRNG.
    **kwargs: Additional keyword arguments passed to each Metric's `func`.

  Returns:
    A BootstrapResults namedtuple tuple of the mean, standard deviation, and
    lower and upper bounds for conf_interval of the `Metric` over
   `n_bootstrap` bootstrapping iterations. If n_bootstrap=0, i.e., no
    bootstrapping is used, the returned standard deviation and lower and upper
    bounds are numpy.nan.
  """
    if n_bootstrap == 0:
        return BootstrapResults(metric(y_true, y_pred, **kwargs), np.nan,
                                np.nan, np.nan, np.nan)

    prng = np.random.RandomState(seed)
    lo_perc = (100 - conf_interval) / 2
    hi_perc = 100 - lo_perc

    metrics = []
    num_observations = len(y_pred)
    while len(metrics) < n_bootstrap:
        idx = prng.randint(0, high=num_observations, size=num_observations)
        sample_true = y_true[idx]
        sample_preds = y_pred[idx]
        if metric.binary_only and len(np.unique(sample_true)) < 2:
            continue
        metrics.append(metric(sample_true, sample_preds, **kwargs))

    metric_mean = np.mean(metrics, axis=0)
    metric_std = np.std(metrics, axis=0)

    metric_lo, metric_hi = np.percentile(metrics, [lo_perc, hi_perc], axis=0)

    return BootstrapResults(metric_mean, metric_std, conf_interval, metric_lo,
                            metric_hi)
def run_check(check_file: str):
    gt_df = model.widerface.WIDERFACEDataset(
            root='data/WIDER_val/images',
            meta='data/wider_face_split/wider_face_val_bbx_gt.txt')

    check_df = model.widerface.WIDERFACEDataset(
            root='./',
            meta=check_file)


    metrics = []
    pred = []
    labels = []

    for idx, image in enumerate(check_df):
        bboxes = [[b['x'], b['y'], b['w'], b['h']] for b in image.bboxes]
        scores = [b['blur'] for b in image.bboxes]

        gt_image = gt_df[image.filename]
        gt_bboxes = [[b['x'], b['y'], b['w'], b['h']] for b in gt_image.bboxes]

        if len(gt_bboxes) > len(bboxes):
            for _ in range(0, len(gt_bboxes) - len(bboxes)):
                metrics.append(0)
                pred.append(0)
                labels.append(1)

        if len(bboxes) == 0:
            continue

        iou = iou_scores(bboxes, gt_bboxes)
        for iou_idx, iou_score in enumerate(iou):
            metrics.append(scores[iou_idx])
            if iou_score >= 0.5:
                pred.append(1)
                labels.append(1)
            else:
                pred.append(1)
                labels.append(0)

    metrics = np.array(metrics)
    labels = np.array(labels)
    pred = np.array(pred)

    p, r, _ = sklearn.metrics.precision_recall_curve(labels, metrics)
    print('AP:', sklearn.metrics.auc(r, p))
Exemplo n.º 15
0
 def objective(cls, args):
     metrics = []
     for seed in range(1, cls.cv_fold+1):
         model = cls.interpreter(args[0])
         data_path = args[1]
         targets_path = args[2]
         X_train, Y_train, X_test, Y_test = cls.load_data(
             data_path=data_path,
             targets_path=targets_path,
             train_set = cls.train_set,
             random_state=seed
         )
         model.fit(X_train, Y_train)
         metric = cls.evaluate(model, X_test, Y_test)
         metrics.append(metric)
     metrics_mean = np.array(metrics).mean()
     print(metrics_mean)
     return 1 - metrics_mean
Exemplo n.º 16
0
def evaluate_metrics_by_forecast_horizon(
        ds,
        column=None,
        model=sklearn.linear_model.LinearRegression(),
        metric=sklearn.metrics.r2_score):
    logger = logging.getLogger()
    if column is None: column = ds.input_all.columns
    if not checks.is_iterable_not_string(column): column = [column]
    logger.info('Evaluating the metric for column(s) %s' %
                ', '.join(['"%s"' % c for c in column]))
    metrics = []
    if len(ds.training_set) == len(ds.validation_set):
        for ts, vs in zip(ds.training_set, ds.validation_set):
            x_train = ts.input[column].values
            y_train = ts.output.values
            model.fit(x_train, y_train)
            x_validation = vs.input[column].values
            y_validation = vs.output.values
            y_validation_pred = model.predict(x_validation)
            validation_set_metrics = []
            for i in range(len(ds.forecast_horizon)):
                validation_set_metrics.append(
                    metric(y_validation[:, i], y_validation_pred[:, i]))
            metrics.append(validation_set_metrics)
    else:
        x_train = ds.all_training_sets.input[column].values
        y_train = ds.all_training_sets.output.values
        model.fit(x_train, y_train)
        for vs in ds.validation_set:
            x_validation = vs.input[column].values
            y_validation = vs.output.values
            y_validation_pred = model.predict(x_validation)
            validation_set_metrics = []
            for i in range(len(ds.forecast_horizon)):
                validation_set_metrics.append(
                    metric(y_validation[:, i], y_validation_pred[:, i]))
            metrics.append(validation_set_metrics)
    # The mean is taken over the validation sets
    return np.mean(metrics, axis=0)
Exemplo n.º 17
0
def _build_default_metrics(binary: bool) -> List[Metric]:
    """Builds and returns the default set of `Metric`s."""

    metrics = [
        Metric('num', lambda y_true, y_pred: len(y_true), binary_only=binary)
    ]

    if binary:
        metrics.extend([
            Metric('auc', sklearn.metrics.roc_auc_score, binary_only=True),
            Metric('auprc',
                   sklearn.metrics.average_precision_score,
                   binary_only=True),
            TopPercentileMetric('freq',
                                compute_frequency,
                                binary_only=True,
                                top_percentile=100),
        ])
        for top_percentile in [10, 5, 1]:
            metrics.append(
                TopPercentileMetric('freq @{:04.1f}'.format(top_percentile),
                                    compute_frequency,
                                    binary_only=True,
                                    top_percentile=top_percentile))
    else:
        metrics.extend([
            Metric('pearson', sp.stats.pearsonr, binary_only=False),
            Metric('spearman', sp.stats.spearmanr, binary_only=False),
            Metric('mse',
                   sklearn.metrics.mean_squared_error,
                   binary_only=False),
            Metric('mae',
                   sklearn.metrics.mean_absolute_error,
                   binary_only=False),
        ])
    return metrics
Exemplo n.º 18
0
    def _create_metrics(self, folder):
        columns = ["F1 score", "Matthews"]
        indices = [
            self._get_classifier_name(index)
            for index in self.classifier_indices
        ]
        metrics = pd.DataFrame(columns=columns)

        LOG.info("Start creating metrics")
        for index in self.classifier_indices:
            tmp = np.empty((0, 2))
            classifier = self.classifiers[index]
            Xtrain, ytrain, Xtest, ytest = self.dataset.get_dataset()
            for i in range(self.experiments):
                LOG.info("Experiment {}/{}".format(
                    index * i + i,
                    self.experiments * len(self.classifier_indices)))
                classifier.fit(Xtrain, ytrain)
                prediction = classifier.predict(Xtest)

                f1_score, matthews = get_metrics(prediction, ytest)

                entry = np.array([[f1_score, matthews]])
                tmp = np.concatenate((tmp, entry))

            mean = np.mean(tmp, axis=0)
            metrics = metrics.append(
                pd.DataFrame(np.reshape(mean, (1, 2)),
                             index=[self._get_classifier_name(index)],
                             columns=columns))

        basename = self._get_basename()
        name = "metrics_" + basename + "_" + ".csv"
        path = os.path.join(folder, name)
        LOG.info("Saving metrics as: {}".format(path))
        metrics.to_csv(path, sep=',')
Exemplo n.º 19
0
def train(model, dataset, cfg):
    print("Our config:")
    pprint.pprint(cfg)

    dataset_name = cfg.dataset + "-" + cfg.model + "-" + cfg.name

    device = 'cuda' if cfg.cuda else 'cpu'
    if not torch.cuda.is_available() and cfg.cuda:
        device = 'cpu'
        print(
            "WARNING: cuda was requested but is not available, using cpu instead."
        )

    print(f'Using device: {device}')

    print(cfg.output_dir)

    if not exists(cfg.output_dir):
        os.makedirs(cfg.output_dir)

    # Setting the seed
    np.random.seed(cfg.seed)
    random.seed(cfg.seed)
    torch.manual_seed(cfg.seed)

    if cfg.cuda:
        torch.cuda.manual_seed_all(cfg.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # Dataset
    train_size = int(0.8 * len(dataset))
    valid_size = len(dataset) - train_size
    torch.manual_seed(cfg.seed)
    train_dataset, valid_dataset = torch.utils.data.random_split(
        dataset, [train_size, valid_size])

    #disable data aug
    valid_dataset.data_aug = None

    # fix labels
    train_dataset.labels = dataset.labels[train_dataset.indices]
    valid_dataset.labels = dataset.labels[valid_dataset.indices]

    # Dataloader
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=cfg.batch_size,
                                               shuffle=cfg.shuffle,
                                               num_workers=cfg.threads,
                                               pin_memory=cfg.cuda)
    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=cfg.batch_size,
                                               shuffle=cfg.shuffle,
                                               num_workers=cfg.threads,
                                               pin_memory=cfg.cuda)
    #print(model)

    # Optimizer
    optim = torch.optim.Adam(model.parameters(),
                             lr=cfg.lr,
                             weight_decay=1e-5,
                             amsgrad=True)
    print(optim)

    criterion = torch.nn.BCEWithLogitsLoss()

    # Checkpointing
    start_epoch = 0
    best_metric = 0.
    weights_for_best_validauc = None
    auc_test = None
    metrics = []
    weights_files = glob(join(
        cfg.output_dir, f'{dataset_name}-e*.pt'))  # Find all weights files
    if len(weights_files):
        # Find most recent epoch
        epochs = np.array([
            int(w[len(join(cfg.output_dir, f'{dataset_name}-e')):-len('.pt')].
                split('-')[0]) for w in weights_files
        ])
        start_epoch = epochs.max()
        weights_file = [
            weights_files[i]
            for i in np.argwhere(epochs == np.amax(epochs)).flatten()
        ][0]
        model.load_state_dict(torch.load(weights_file).state_dict())

        with open(join(cfg.output_dir, f'{dataset_name}-metrics.pkl'),
                  'rb') as f:
            metrics = pickle.load(f)

        best_metric = metrics[-1]['best_metric']
        weights_for_best_validauc = model.state_dict()

        print("Resuming training at epoch {0}.".format(start_epoch))
        print("Weights loaded: {0}".format(weights_file))

    model.to(device)

    for epoch in range(start_epoch, cfg.num_epochs):

        avg_loss = train_epoch(cfg=cfg,
                               epoch=epoch,
                               model=model,
                               device=device,
                               optimizer=optim,
                               train_loader=train_loader,
                               criterion=criterion)

        auc_valid = valid_test_epoch(name='Valid',
                                     epoch=epoch,
                                     model=model,
                                     device=device,
                                     data_loader=valid_loader,
                                     criterion=criterion)[0]

        if np.mean(auc_valid) > best_metric:
            best_metric = np.mean(auc_valid)
            weights_for_best_validauc = model.state_dict()
            torch.save(model, join(cfg.output_dir, f'{dataset_name}-best.pt'))
            # only compute when we need to

        stat = {
            "epoch": epoch + 1,
            "trainloss": avg_loss,
            "validauc": auc_valid,
            'best_metric': best_metric
        }

        metrics.append(stat)

        with open(join(cfg.output_dir, f'{dataset_name}-metrics.pkl'),
                  'wb') as f:
            pickle.dump(metrics, f)

        torch.save(model,
                   join(cfg.output_dir, f'{dataset_name}-e{epoch + 1}.pt'))

    return metrics, best_metric, weights_for_best_validauc
Exemplo n.º 20
0
def predict_sentences_reporting_bias(negative_sample_weighting=1, number_of_models=1, positives_per_pdf=1):
    X, y, X_sents, vec, study_sent_indices = _get_sentence_level_X_y()

    kf = KFold(len(study_sent_indices), n_folds=5, shuffle=True)

    metrics = []

    for fold_i, (train, test) in enumerate(kf):

        print "making test sentences"

        test_indices = [study_sent_indices[i] for i in test]
        train_indices = [study_sent_indices[i] for i in train]

        X_sents_test = sublist(X_sents, test_indices)
        # [X_sents[i] for i in test]

        print "done!"

        # pdb.set_trace()

        # print "generating split"
        X_train = X[np_indices(train_indices)]
        y_train = y[np_indices(train_indices)]
        X_test = X[np_indices(test_indices)]
        y_test = y[np_indices(test_indices)]
        # print "done!"

        all_indices = np.arange(len(y_train))

        train_positives = np.nonzero(y_train)[0]
        train_negatives = all_indices[~train_positives]

        total_positives = len(train_positives)

        if (negative_sample_weighting * total_positives) > len(train_negatives):
            sample_negative_examples = len(train_negatives)
        else:
            sample_negative_examples = negative_sample_weighting * total_positives

        models = []

        print "fitting models..."
        p = progressbar.ProgressBar(number_of_models, timer=True)

        for model_no in range(number_of_models):

            p.tap()

            train_negatives_sample = np.random.choice(train_negatives, sample_negative_examples, replace=False)

            train_sample = np.concatenate([train_positives, train_negatives_sample])

            clf = SGDClassifier(loss="hinge", penalty="l2")
            clf.fit(X_train[train_sample], y_train[train_sample])
            models.append(clf)

        TP = 0
        FP = 0
        TN = 0
        FN = 0

        print "testing..."
        p = progressbar.ProgressBar(len(test_indices), timer=True)

        for start, end in test_indices:

            p.tap()
            study_X = X[np_indices((start, end))]
            study_y = y[np_indices((start, end))]

            preds_all = np.mean([clf.predict(study_X) for clf in models], 0)

            max_indices = preds_all.argsort()[-positives_per_pdf:][::-1] + start

            real_index = np.where(study_y == 1)[0][0] + start

            if real_index in max_indices:

                TP += 1
                TN += len(study_y) - positives_per_pdf
                FP += positives_per_pdf - 1
                # FN += 0
            else:
                # TP += 0
                TN += len(study_y) - positives_per_pdf - 1
                FN += 1
                FP += positives_per_pdf

            print len(study_y)

        precision = float(TP) / (float(TP) + float(FP))
        recall = float(TP) / (float(TP) + float(FN))
        f1 = 2 * ((precision * recall) / (precision + recall))
        accuracy = float(TP) / len(test_indices)

        metrics.append({"precision": precision, "recall": recall, "f1": f1, "accuracy": accuracy})

    print
    pprint(metrics)

    metric_types = ["precision", "recall", "f1", "accuracy"]

    for metric_type in metric_types:

        metric_vec = [metric[metric_type] for metric in metrics]

        metric_mean = np.mean(metric_vec)

        print "%s: %.5f" % (metric_type, metric_mean)
Exemplo n.º 21
0
def true_hybrid_prediction_test(model, test_mode=False):

    print "True Hybrid prediction"
    print "=" * 40
    print


    s = model
    s.generate_data() # some variations use the quote data internally 
                                                # for sentence prediction (for additional features)

    s_cheat = HybridModel(test_mode=False)
    s_cheat.generate_data()



    for test_domain in CORE_DOMAINS:

        

        print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40)
        
        domain_uids = s.domain_uids(test_domain)
        no_studies = len(domain_uids)
        kf = KFold(no_studies, n_folds=5, shuffle=False)

        print "making scorer"
        ftwo_scorer = make_scorer(fbeta_score, beta=2)

        tuned_parameters = [{"alpha": np.logspace(-4, -1, 10)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 10)]}]
        clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring=ftwo_scorer)


        metrics = []

        for fold_i, (train, test) in enumerate(kf):

            print "training doc level model with test data, please wait..."

            d = DocumentLevelModel(test_mode=False)
            d.generate_data(uid_filter=domain_uids[train])
            d.vectorize()
            doc_X, doc_y = d.X_domain_all(domain=test_domain), d.y_domain_all(domain=test_domain)


            doc_tuned_parameters = {"alpha": np.logspace(-4, -1, 10)}
            doc_clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), doc_tuned_parameters, scoring='f1')

            doc_clf.fit(doc_X, doc_y)

            s.set_doc_model(doc_clf, d.vectorizer)


            s_cheat.vectorize(test_domain)
            s.vectorize(test_domain, use_vectorizer=s_cheat.vectorizer)


            X_train, y_train = s_cheat.X_y_uid_filtered(domain_uids[train], test_domain)
            # train on the *true* labels

            X_test, y_test = s.X_y_uid_filtered(domain_uids[test], test_domain)

            clf.fit(X_train, y_train)

            y_preds = clf.predict(X_test)

            fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1]

            metrics.append(fold_metric) # get the scores for positive instances

            print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2])
            



            metrics.append(fold_metric) # get the scores for positive instances



        # summary score

        summary_metrics = np.mean(metrics, axis=0)
        print "=" * 40
        print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2])
Exemplo n.º 22
0
def main(train_function):
  client = AdvisorClient()

  # Get or create the study
  study_configuration = {
      "goal":
      "MINIMIZE",
      "randomInitTrials":
      1,
      "maxTrials":
      5,
      "maxParallelTrials":
      1,
      "params": [
          {
              "parameterName": "gamma",
              "type": "DOUBLE",
              "minValue": 0.001,
              "maxValue": 0.01,
              "feasiblePoints": "",
              "scallingType": "LINEAR"
          },
          {
              "parameterName": "C",
              "type": "DOUBLE",
              "minValue": 0.5,
              "maxValue": 1.0,
              "feasiblePoints": "",
              "scallingType": "LINEAR"
          },
          {
              "parameterName": "kernel",
              "type": "CATEGORICAL",
              "minValue": 0,
              "maxValue": 0,
              "feasiblePoints": "linear, poly, rbf, sigmoid, precomputed",
              "scallingType": "LINEAR"
          },
          {
              "parameterName": "coef0",
              "type": "DOUBLE",
              "minValue": 0.0,
              "maxValue": 0.5,
              "feasiblePoints": "",
              "scallingType": "LINEAR"
          },
      ]
  }
  study = client.create_study("Study", study_configuration,
                              "BayesianOptimization")
  #study = client.get_study_by_id(6)

  # Get suggested trials
  trials = client.get_suggestions(study.id, 3)

  # Generate parameters
  parameter_value_dicts = []
  for trial in trials:
    parameter_value_dict = json.loads(trial.parameter_values)
    print("The suggested parameters: {}".format(parameter_value_dict))
    parameter_value_dicts.append(parameter_value_dict)

  # Run training
  metrics = []
  for i in range(len(trials)):
    metric = train_function(**parameter_value_dicts[i])
    #metric = train_function(parameter_value_dicts[i])
    metrics.append(metric)

  # Complete the trial
  for i in range(len(trials)):
    trial = trials[i]
    client.complete_trial_with_one_metric(trial, metrics[i])
  is_done = client.is_study_done(study.id)
  best_trial = client.get_best_trial(study.id)
  print("The study: {}, best trial: {}".format(study, best_trial))
Exemplo n.º 23
0
    def from_list_db(cls, l):
        metrics = cls()
        for item in l:
            metrics.append(Metric.from_dict(item,kind="db"))

        return metrics
Exemplo n.º 24
0
def document_prediction_test(model=DocumentLevelModel(test_mode=False)):

    print "Document level prediction"
    print "=" * 40
    print


    d = model
    d.generate_data() # some variations use the quote data internally 
                                                # for sentence prediction (for additional features)

    d.vectorize()

    for test_domain in CORE_DOMAINS:
        print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40)
        
        



        # f1_prefer_nos = make_scorer(f1_score, pos_label="NO")

        tuned_parameters = {"alpha": np.logspace(-4, -1, 10)}
        clf = GridSearchCV(SGDClassifier(loss="log", penalty="L2"), tuned_parameters, scoring='f1')

       
        # clf = SGDClassifier(loss="hinge", penalty="L2")

        domain_uids = d.domain_uids(test_domain)
        no_studies = len(domain_uids)

        kf = KFold(no_studies, n_folds=5, shuffle=False)

        metrics = []


        for fold_i, (train, test) in enumerate(kf):


            X_train, y_train = d.X_y_uid_filtered(domain_uids[train], test_domain)
            X_test, y_test = d.X_y_uid_filtered(domain_uids[test], test_domain)

            

            clf.fit(X_train, y_train)

            y_preds = clf.predict(X_test)
            


            fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds, labels=RoB_CLASSES))[:3]

            print ('fold %d\t' % (fold_i)) + '\t'.join(RoB_CLASSES)

            # for metric_type, scores in zip(["prec.", "recall", "f1"], fold_metric):
            #     print "%s\t%.2f\t%.2f\t%.2f" % (metric_type, scores[0], scores[1], scores[2])

            # print

            # print clf.best_params_

            #### START CONFUSION

            real_no_indices = (y_test=="NO")
            print "The actual NOs were predicted as..."
            print collections.Counter(y_preds[real_no_indices])

            #### END CONFUSION



            metrics.append(fold_metric) # get the scores for positive instances

            # print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2])


        mean_scores = np.mean(metrics, axis=0)

        print "=" * 40
        print 'means \t' + '\t'.join(RoB_CLASSES)

        for metric_type, scores in zip(["prec.", "recall", "f1"], mean_scores):
            print "%s\t%.2f\t%.2f\t%.2f" % (metric_type, scores[0], scores[1], scores[2])
        print


        # then train all for most informative features
        clf = SGDClassifier(loss="hinge", penalty="L2", alpha=0.01)
        X_all = d.X_domain_all(test_domain)
        y_all = d.y_domain_all(test_domain)

        clf.fit(X_all, y_all)

        print show_most_informative_features_ynu(d.vectorizer, clf)
Exemplo n.º 25
0
def extract_spearman_for_fold(metrics, fold, i, predictions, truth, y_ground_truth, test, y_pred, learn_options):
    spearman = util.spearmanr_nonan(y_ground_truth[test].flatten(), y_pred.flatten())[0]
    assert not np.isnan(spearman), "found nan spearman"
    metrics.append(spearman)
Exemplo n.º 26
0
def extract_spearman_for_fold(metrics, fold, i, predictions, truth,
                              y_ground_truth, test, y_pred, learn_options):
    spearman = util.spearmanr_nonan(y_ground_truth[test].flatten(),
                                    y_pred.flatten())[0]
    assert not np.isnan(spearman), "found nan spearman"
    metrics.append(spearman)
Exemplo n.º 27
0
def extract_NDCG_for_fold(metrics, fold, i, predictions, truth, y_ground_truth,
                          test, y_pred, learn_options):
    NDCG_fold = ranking_metrics.ndcg_at_k_ties(y_ground_truth[test].flatten(),
                                               y_pred.flatten(),
                                               learn_options["NDGC_k"])
    metrics.append(NDCG_fold)
Exemplo n.º 28
0
def hybrid_doc_prediction_test(model=HybridDocModel(test_mode=False)):

    print "Hybrid doc level prediction"
    print "=" * 40
    print


    d = model
    d.generate_data() # some variations use the quote data internally 
                                                # for sentence prediction (for additional features)


    for test_domain in CORE_DOMAINS:

        

        print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40)
        
        domain_uids = d.domain_uids(test_domain)
        no_studies = len(domain_uids)
        kf = KFold(no_studies, n_folds=5, shuffle=False)
        tuned_parameters = {"alpha": np.logspace(-4, -1, 5)}
        clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='f1')


        metrics = []

        for fold_i, (train, test) in enumerate(kf):

            

            s = SentenceModel(test_mode=False)
            s.generate_data(uid_filter=domain_uids[train])
            s.vectorize()
            sents_X, sents_y = s.X_domain_all(domain=test_domain), s.y_domain_all(domain=test_domain)



            sent_tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 2, 10)]}]
            sent_clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall')
            sent_clf.fit(sents_X, sents_y)
            d.set_sent_model(sent_clf, s.vectorizer)
            d.vectorize(test_domain)

            X_train, y_train = d.X_y_uid_filtered(domain_uids[train], test_domain)
            X_test, y_test = d.X_y_uid_filtered(domain_uids[test], test_domain)

            clf.fit(X_train, y_train)

            y_preds = clf.predict(X_test)

            fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds, labels=RoB_CLASSES))[:3]

            print ('fold %d\t' % (fold_i)) + '\t'.join(RoB_CLASSES)

            for metric_type, scores in zip(["prec.", "recall", "f1"], fold_metric):
                print "%s\t%.2f\t%.2f\t%.2f" % (metric_type, scores[0], scores[1], scores[2])

            print





            metrics.append(fold_metric) # get the scores for positive instances

            # print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2])


        mean_scores = np.mean(metrics, axis=0)

        print "=" * 40
        print 'means \t' + '\t'.join(RoB_CLASSES)

        for metric_type, scores in zip(["prec.", "recall", "f1"], mean_scores):
            print "%s\t%.2f\t%.2f\t%.2f" % (metric_type, scores[0], scores[1], scores[2])
        print
Exemplo n.º 29
0
def main():
    """Model training routine."""

    ws = Workspace.from_config()

    # Get the dataset from the workspace
    data = Dataset.get_by_name(ws, "house_prices")
    data = data.to_pandas_dataframe()

    # Split features and labels
    X = data.drop(columns="price")
    y = data["price"]

    # Get the run to start logging
    run = Run.get_context()

    # Log training data and CV params
    run.log("Training size", X.shape[0])
    run.log("CV splits", cv_splits)
    run.log("CV test proportion", test_prop)

    # Run cross-validation
    metrics = []
    results = []
    model = LinearRegression()
    cv = ShuffleSplit(n_splits=cv_splits, test_size=test_prop)

    for train, test in cv.split(X):
        model.fit(X.loc[train, :], y[train])

        y_true = y[test]
        y_pred = model.predict(X.loc[test, :])

        results.append(
            pd.DataFrame({
                "actual": y_true,
                "predicted": y_pred,
                "residual": y_pred - y_true,
            }))

        metrics.append(compute_metrics(y_true, y_pred, log_metrics))

    results = pd.concat(results)
    metrics = pd.DataFrame(metrics)

    # Log accuracy metrics in AzureML (mean from splits)
    for metric in metrics.columns:
        run.log(metric.replace("_", " ").title(), metrics[metric].mean())

    # Log predictions and residuals histograms
    run.log_predictions("Predictions",
                        histogram_predictions(results["predicted"]))
    run.log_residuals("Residuals", histogram_residuals(results["residual"]))

    # Register if percentage error below threshold
    performance = sklearn.metrics.mean_absolute_percentage_error(
        results["actual"], results["predicted"])
    if performance <= performance_threshold:

        # Retrain on all data
        model = model.fit(X, y)

        # Complete the run so files get uploaded
        joblib.dump(model, model_file)
        run.upload_file(model_file, model_file)

        # Register the model
        run.register_model(
            model_name=model_id,
            model_path=model_file,
            model_framework=Model.Framework.SCIKITLEARN,
            model_framework_version=sklearn.__version__,
            description=f"Mean Absolute Percentage Error: {performance}",
        )
Exemplo n.º 30
0
def binary_hybrid_doc_prediction_test(model=HybridDocModel, test_mode=False):

    print "Binary hybrid doc level prediction version 2 (maybe quicker!!)"
    print "=" * 40
    print


    d = model(test_mode=test_mode)
    d.generate_data(binarize=True) # some variations use the quote data internally 
                                                # for sentence prediction (for additional features)


    for test_domain in CORE_DOMAINS:

        

        print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40)
        
        domain_uids = d.domain_uids(test_domain)
        no_studies = len(domain_uids)
        kf = KFold(no_studies, n_folds=5, shuffle=False)
        tuned_parameters = {"alpha": np.logspace(-4, -1, 10), "class_weight": [{1: i, -1: 1} for i in np.logspace(-1, 1, 10)]}
        clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='precision')

        metrics = []

        s = SentenceModel(test_mode=test_mode)
        s.generate_data(uid_filter=domain_uids)
        s.vectorize()


        for fold_i, (train, test) in enumerate(kf):


            sents_X, sents_y = s.X_y_uid_filtered(domain_uids[test], test_domain)
            sent_tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 2, 10)]}]
            sent_clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall')
            sent_clf.fit(sents_X, sents_y)
            d.set_sent_model(sent_clf, s.vectorizer)
            d.vectorize(test_domain)


            X_train, y_train = d.X_y_uid_filtered(domain_uids[train], test_domain)
            X_test, y_test = d.X_y_uid_filtered(domain_uids[test], test_domain)

            clf.fit(X_train, y_train)

            y_preds = clf.predict(X_test)

            fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1]

            metrics.append(fold_metric) # get the scores for positive instances

            print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2])
            

            metrics.append(fold_metric) # get the scores for positive instances

            if fold_i == 0:
                # make a plot of the first curve
                probas_ = clf.best_estimator_.predict_proba(X_test)

                # Compute ROC curve and area the curve
                fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1])
                roc_auc = auc(fpr, tpr)
                print("Area under the ROC curve : %f" % roc_auc)

                # Plot ROC curve
                pl.clf()
                pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
                pl.plot([0, 1], [0, 1], 'k--')
                pl.xlim([0.0, 1.0])
                pl.ylim([0.0, 1.0])
                pl.xlabel('False Positive Rate')
                pl.ylabel('True Positive Rate')
                pl.title(test_domain)
                pl.legend(loc="lower right")
                pl.show()




        summary_metrics = np.mean(metrics, axis=0)
        print "=" * 40
        print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2])


        # then train all for most informative features

        sents_X, sents_y = s.X_domain_all(domain=test_domain), s.y_domain_all(domain=test_domain)

        sent_tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 2, 10)]}]
        sent_clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall')
        sent_clf.fit(sents_X, sents_y)


        d.set_sent_model(sent_clf, s.vectorizer)
        d.vectorize(test_domain)

        
        X_all, y_all = d.X_y_uid_filtered(domain_uids, test_domain)
        clf.fit(X_all, y_all)

        print show_most_informative_features(d.vectorizer, clf.best_estimator_)
Exemplo n.º 31
0
def sentence_prediction_test(class_weight={1: 5, -1:1}, model=SentenceModel(test_mode=True)):
    print
    print
    print

    print "Sentence level prediction"
    print "=" * 40
    print

    s = model


    print "Model name:\t" + s.__class__.__name__
    print s.__doc__

    print "class_weight=%s" % (str(class_weight),)
    
    
    s.generate_data()
    s.vectorize()
    
    for test_domain in CORE_DOMAINS:
        print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40)
        


        domain_uids = s.domain_uids(test_domain)
        no_studies = len(domain_uids)




        kf = KFold(no_studies, n_folds=5, shuffle=False, indices=True)

        # # tuned_parameters = {"alpha": np.logspace(-4, -1, 10)}
        # tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 5)]}]
        # clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall')
        

        print "making scorer"
        ftwo_scorer = make_scorer(fbeta_score, beta=2)

        tuned_parameters = [{"alpha": np.logspace(-4, -1, 10)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 10)]}]
        clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring=ftwo_scorer)


        metrics = []

        for fold_i, (train, test) in enumerate(kf):



            X_train, y_train = s.X_y_uid_filtered(domain_uids[train], test_domain)
            X_test, y_test = s.X_y_uid_filtered(domain_uids[test], test_domain)

            clf.fit(X_train, y_train)

            y_preds = clf.predict(X_test)

            fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1]

            metrics.append(fold_metric) # get the scores for positive instances

            print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2])
            

            # if not sample and list_features:
            #     # not an obvious way to get best features for ensemble
            #     print show_most_informative_features(s.vectorizer, clf)
            

        # summary score

        summary_metrics = np.mean(metrics, axis=0)
        print "=" * 40
        print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2])


        # then train all for most informative features
        clf = SGDClassifier(loss="hinge", penalty="L2", alpha=0.01, class_weight={1: 5, -1: 1})
        X_all = s.X_domain_all(test_domain)
        y_all = s.y_domain_all(test_domain)

        clf.fit(X_all, y_all)

        print show_most_informative_features(s.vectorizer, clf)
Exemplo n.º 32
0
def simple_hybrid_prediction_test(model=HybridModel(test_mode=True)):

    print "Hybrid prediction"
    print "=" * 40
    print


    s = model
    s.generate_data() # some variations use the quote data internally 
                                                # for sentence prediction (for additional features)



    for test_domain in CORE_DOMAINS:

        s.vectorize(test_domain)

        print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40)
        
        domain_uids = s.domain_uids(test_domain)
        no_studies = len(domain_uids)

        kf = KFold(no_studies, n_folds=5, shuffle=False)


        # tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight":  [{1: i, -1: 1} for i in np.logspace(0, 1, 5)]}]
        # clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='f1')

        print "making scorer"
        ftwo_scorer = make_scorer(fbeta_score, beta=2)

        tuned_parameters = [{"alpha": np.logspace(-4, -1, 10)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 10)]}]
        clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring=ftwo_scorer)





        metrics = []

        for fold_i, (train, test) in enumerate(kf):



            X_train, y_train = s.X_y_uid_filtered(domain_uids[train], test_domain)
            X_test, y_test = s.X_y_uid_filtered(domain_uids[test], test_domain)

            clf.fit(X_train, y_train)

            y_preds = clf.predict(X_test)

            fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1]

            metrics.append(fold_metric) # get the scores for positive instances

            print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2])
            



            metrics.append(fold_metric) # get the scores for positive instances



        # summary score

        summary_metrics = np.mean(metrics, axis=0)
        print "=" * 40
        print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2])
Exemplo n.º 33
0
	def run_predict(self, modelfile, weightfile):
		""" Run model prediction """

		#===========================
		#==   SET DATA
		#===========================	
		logger.info("Setting input data from data loader ...")
		status= self.__set_data()
		if status<0:
			logger.error("Input data set failed!")
			return -1

		#===========================
		#==   LOAD MODEL
		#===========================
		#- Create the network architecture and weights from file
		logger.info("Loading model architecture and weights from files %s %s ..." % (modelfile, weightfile))
		if self.__load_model(modelfile, weightfile)<0:
			logger.warn("Failed to load model from files!")
			return -1

		if self.model is None:
			logger.error("Loaded model is None!")
			return -1

		#===========================
		#==   PREDICT
		#===========================
		# - Get predicted output data
		logger.info("Predicting model output data ...")
		predout= self.model.predict(
			x=self.test_data_generator,	
			steps=1,
    	verbose=2,
    	workers=self.nworkers,
    	use_multiprocessing=self.use_multiprocessing
		)

		print("predout")
		print(type(predout))
		print(predout.shape)

		# - Convert one-hot encoding to target ids
		logger.info("Retrieving target ids from predicted output ...")
		self.targets_pred= np.argmax(predout, axis=1)

		print("targets_pred")
		print(self.targets_pred)
		print(type(self.targets_pred))
		print(self.targets_pred.shape)

		# - Get predicted output class id
		logger.info("Computing predicted class ids from targets ...")
		self.classids_pred= [self.classid_remap_inv[item] for item in self.targets_pred]
		
		print("classids_pred")
		print(self.classids_pred)
		print(type(self.classids_pred))
		
		# - Get predicted output class prob
		logger.info("Predicting output classid ...")
		self.probs_pred= [predout[i,self.targets_pred[i]] for i in range(predout.shape[0])]

		print("probs_pred")
		print(self.probs_pred)
		print(type(self.probs_pred))
		
		# - Save predicted data to file
		logger.info("Saving prediction data to file %s ..." % (self.outfile))
		N= predout.shape[0]
		snames= np.array(self.source_names).reshape(N,1)
		objids= np.array(self.source_ids).reshape(N,1)
		objids_pred= np.array(self.classids_pred).reshape(N,1)
		probs_pred= np.array(self.probs_pred).reshape(N,1)

		outdata= np.concatenate(
			(snames, objids, objids_pred, probs_pred),
			axis=1
		)

		head= "# sname id id_pred prob"
		Utils.write_ascii(outdata, self.outfile, head)


		#================================
		#==   COMPUTE AND SAVE METRICS
		#================================
		# - Retrieve metrics
		logger.info("Computing classification metrics on predicted data ...")
		report= classification_report(self.target_ids, self.targets_pred, target_names=self.target_names, output_dict=True)
		self.accuracy= report['accuracy']
		self.precision= report['weighted avg']['precision']
		self.recall= report['weighted avg']['recall']    
		self.f1score= report['weighted avg']['f1-score']

		self.class_precisions= []
		self.class_recalls= []  
		self.class_f1scores= []
		for class_name in self.target_names:
			class_precision= report[class_name]['precision']
			class_recall= report[class_name]['recall']    
			class_f1score= report[class_name]['f1-score']
			self.class_precisions.append(class_precision)
			self.class_recalls.append(class_recall)
			self.class_f1scores.append(class_f1score)
			
		logger.info("accuracy=%f" % (self.accuracy))
		logger.info("precision=%f" % (self.precision))
		logger.info("recall=%f" % (self.recall))
		logger.info("f1score=%f" % (self.f1score))
		logger.info("--> Metrics per class")
		print("classnames")
		print(self.target_names)
		print("precisions")
		print(self.class_precisions)
		print("recall")
		print(self.class_recalls)
		print("f1score")
		print(self.class_f1scores)

		# - Retrieving confusion matrix
		logger.info("Retrieving confusion matrix ...")
		cm= confusion_matrix(self.target_ids, self.targets_pred)

		print("confusion matrix")
		print(cm)

		# - Saving metrics to file
		logger.info("Saving metrics to file %s ..." % (self.outfile_metrics))
		metrics= [self.accuracy, self.precision, self.recall, self.f1score]
		metric_names= ["accuracy","precision","recall","f1score"]
		
		for i in range(len(self.target_names)):
			classname= self.target_names[i]
			precision= self.class_precisions[i]
			recall= self.class_recalls[i]
			f1score= self.class_f1scores[i]
			metrics.append(precision)
			metrics.append(recall)
			metrics.append(f1score)
			metric_names.append("precision_" + classname)
			metric_names.append("recall_" + classname)
			metric_names.append("f1score_" + classname)
			
		Nmetrics= len(metrics)
		metric_data= np.array(metrics).reshape(1,Nmetrics)

		metric_names_str= ' '.join(str(item) for item in metric_names)
		head= '{} {}'.format("# ",metric_names_str)

		print("metric_data")
		print(metrics)
		print(len(metrics))
		print(metric_data.shape)
		
		Utils.write_ascii(metric_data, self.outfile_metrics, head)


		return 0
Exemplo n.º 34
0
def main(train_function):
    client = AdvisorClient()

    # Get or create the study
    study_configuration = {
        "goal":
        "MINIMIZE",
        "randomInitTrials":
        1,
        "maxTrials":
        5,
        "maxParallelTrials":
        1,
        "params": [
            {
                "parameterName": "gamma",
                "type": "DOUBLE",
                "minValue": 0.001,
                "maxValue": 0.01,
                "feasiblePoints": "",
                "scalingType": "LINEAR"
            },
            {
                "parameterName": "C",
                "type": "DOUBLE",
                "minValue": 0.5,
                "maxValue": 1.0,
                "feasiblePoints": "",
                "scalingType": "LINEAR"
            },
            {
                "parameterName": "kernel",
                "type": "CATEGORICAL",
                "minValue": 0,
                "maxValue": 0,
                "feasiblePoints": "linear, poly, rbf, sigmoid, precomputed",
                "scalingType": "LINEAR"
            },
            {
                "parameterName": "coef0",
                "type": "DOUBLE",
                "minValue": 0.0,
                "maxValue": 0.5,
                "feasiblePoints": "",
                "scalingType": "LINEAR"
            },
        ]
    }
    study = client.create_study("Study", study_configuration,
                                "BayesianOptimization")
    #study = client.get_study_by_id(6)

    num_trials = 20
    for i in range(num_trials):
        # Get suggested trials
        trials = client.get_suggestions(study.name, 3)

        # Generate parameters
        parameter_value_dicts = []
        for trial in trials:
            parameter_value_dict = json.loads(trial.parameter_values)
            print("The suggested parameters: {}".format(parameter_value_dict))
            parameter_value_dicts.append(parameter_value_dict)

        # Run training
        metrics = []
        for i in range(len(trials)):
            metric = train_function(**parameter_value_dicts[i])
            #metric = train_function(parameter_value_dicts[i])
            metrics.append(metric)

        # Complete the trial
        for i in range(len(trials)):
            trial = trials[i]
            client.complete_trial_with_one_metric(trial, metrics[i])
    is_done = client.is_study_done(study.name)
    best_trial = client.get_best_trial(study.name)
    print("The study: {}, best trial: {}".format(study, best_trial))
    print(best_trial.parameter_values)
Exemplo n.º 35
0
        with tqdm.tqdm(test_dataloader) as tq:
            for step, (input_nodes, pos_graph, neg_graph,
                       mfgs) in enumerate(tq):
                # feature copy from CPU to GPU takes place here
                inputs = mfgs[0].srcdata['feat']

                outputs = model(mfgs, inputs).float()
                pos_score = pred(pos_graph, outputs)
                neg_score = pred(neg_graph, outputs)

                # print("Positive Score: ", pos_score[:100])
                # print("Negative Scor: ", neg_score[:100])

                batches += 1
                metrics = []
                metrics.append(compute_auc(pos_score, neg_score))
                # metrics.append(compute_f1(pos_score, neg_score))
                # metrics.append(compute_prec(pos_score, neg_score))
                # metrics.append(compute_recall(pos_score, neg_score))
                print('Step: ', step)
                print('ROC-AUC Score: ', metrics[0])
                # print('F1-Score: ', metrics[1])
                # print('Precision Score: ', metrics[2])
                # print('Recall Score: ', metrics[3])

                score_array[0] += metrics[0]
                # score_array[1] += metrics[1]
                # score_array[2] += metrics[2]
                # score_array[3] += metrics[3]

    roc_auc.append(score_array[0] / batches)
Exemplo n.º 36
0
def extract_NDCG_for_fold(metrics, fold, i, predictions, truth, y_ground_truth, test, y_pred, learn_options):
    NDCG_fold = ranking_metrics.ndcg_at_k_ties(
        y_ground_truth[test].flatten(), y_pred.flatten(), learn_options["NDGC_k"]
    )
    metrics.append(NDCG_fold)
Exemplo n.º 37
0
        100 * true_positives / all_real_signals, 100 * purity,
        100 * efficiency, 100 * accuracy
    ]
    # TODO mettere solo due cifre decimali


import glob

validation_paths = glob.glob(
    '/storage/users/Muciaccia/data/validation/**/*.netCDF4',
    recursive=True)  # TODO 10 risulta venire dopo 1 e non dopo 9

metrics = []
for path in validation_paths:
    validation_dataset = xarray.open_dataset(path)
    metrics.append(compute_metrics(validation_dataset))

# TODO salvare i risultati su file
# TODO fare vari plot dei risultati

# TODO considerare il fatto che le immagini sono troncate da 148 a 128

# TODO magari fare anche la validation col rumore bianco senza buchi (e con la vera ampiezza relativa del regnale)

import pandas

# TODO 'false alarms' è brutto e poco chiaro. mettere qualcosa tipo 'misclassified noise'
metrics = pandas.DataFrame(metrics,
                           columns=[
                               'signal_intensity', 'all_validation_samples',
                               'rejected noise (%)', 'false alarms (%)',
Exemplo n.º 38
0
    def from_dict_user(cls, d):
        metrics = cls()
        for key in d:
            metrics.append(Metric.from_dict({key:d[key]},kind="user"))

        return metrics
Exemplo n.º 39
0
 def select(self, ds):
     assert len(ds.training_set
                ) > 0, 'Must have at least one training set in the dataset'
     assert len(
         ds.validation_set
     ) > 0, 'Must have at least one validation set in the dataset'
     logger = logging.getLogger()
     included_columns = []
     for c in ds.input_working.columns:
         if self.__exclude_column_re is not None and self.__exclude_column_re.match(
                 c):
             logger.info('- Excluding column due to exclude_column_re: %s' %
                         c)
             continue
         if self.__include_column_re is not None and not self.__include_column_re.match(
                 c):
             logger.info('- Including column due to include_column_re: %s' %
                         c)
             continue
         included_columns.append(c)
     selected_columns = []
     prev_best_metric = None
     best_metric = None
     best_metric_column = None
     while len(selected_columns) < len(included_columns):
         for i, next_column in enumerate(included_columns):
             logger.info('- Trying column %d of %d, "%s"' %
                         (i + 1, len(included_columns), next_column))
             if next_column in selected_columns:
                 logger.info('  Column "%s" already selected, continuing' %
                             next_column)
                 continue
             current_columns = []
             current_columns.extend(selected_columns)
             current_columns.append(next_column)
             metrics = []
             if len(ds.training_set) == len(ds.validation_set):
                 for ts, vs in zip(ds.training_set, ds.validation_set):
                     x_train = ts.input[current_columns].values
                     y_train = ts.output.values
                     self.__model.fit(x_train, y_train)
                     x_validation = vs.input[current_columns].values
                     y_validation = vs.output.values
                     y_validation_pred = self.__model.predict(x_validation)
                     if self.__weight_metric_by_forecast_horizon:
                         metric = 0.
                         for k, fh in enumerate(ds.forecast_horizon):
                             metric += (fh / np.max(
                                 ds.forecast_horizon)) * self.__metric(
                                     y_validation[:, k],
                                     y_validation_pred[:, k])
                     else:
                         metric = self.__metric(y_validation,
                                                y_validation_pred)
                     metrics.append(metric)
             else:
                 x_train = ds.all_training_sets.input[
                     current_columns].values
                 y_train = ds.all_training_sets.output.values
                 self.__model.fit(x_train, y_train)
                 for vs in ds.validation_set:
                     x_validation = vs.input[current_columns].values
                     y_validation = vs.output.values
                     y_validation_pred = self.__model.predict(x_validation)
                     if self.__weight_metric_by_forecast_horizon:
                         metric = 0.
                         for k, fh in enumerate(ds.forecast_horizon):
                             metric += (fh / np.max(
                                 ds.forecast_horizon)) * self.__metric(
                                     y_validation[:, k],
                                     y_validation_pred[:, k])
                     else:
                         metric = self.__metric(y_validation,
                                                y_validation_pred)
                     metrics.append(metric)
             mean_metric = np.mean(metrics)
             log_message = '  Achieved metric %05f' % mean_metric
             if best_metric is not None:
                 log_message += ', the maximum being %05f' % best_metric
             logger.info('  Achieved metric %05f' % mean_metric)
             if best_metric is None or best_metric < mean_metric:
                 best_metric = mean_metric
                 best_metric_column = next_column
         if prev_best_metric is not None and best_metric - prev_best_metric < self.__metric_improvement_threshold:
             break
         selected_columns.append(best_metric_column)
         logger.info(
             '*** Selected column "%s", which improved the metric to %05f' %
             (best_metric_column, best_metric))
         logger.info('*** So far, selected %d columns: %s' %
                     (len(selected_columns), ', '.join(
                         ['"%s"' % sc for sc in selected_columns])))
         prev_best_metric = best_metric
     logger.info('*** Final metric: %05f' % best_metric)
     return selected_columns
Exemplo n.º 40
0
def xgboost_baseline(X,
                     y,
                     regression=False,
                     n_splits=10,
                     test_size=0.25,
                     eval_metric="auc",
                     optimized_metric=metrics.average_precision_score,
                     max_evals=10,
                     weight_imbalanced=False,
                     verbose=False,
                     random_state=777):
    """
    Quickly run benchmark multiple times on a dataset to evalute xgboost model on it.
    At each iteration, we split the dataset in train / valid / test and trained a tuned xgboost model.
    We return different classification performance metrics.

    If categorical feature in dataset, each categorical feature is transformed with labelEncoder (from scikit-learn),
    then we transformed data with one-hot-encoding.
    :param X: whole feature set (matrix pandas DataFrame)
    :param y: target vector (vector pandas Series)
    :param n_splits: number of iteration to repeat.
    :param test_size: size of the test set (this value is reused to split the (1-test_size) train set in train and valid set).
    :param max_evals: try max_evals parameters combination for evaluation before returning best hyerparameters combination.
    :param random_state: seed used by the random number generator for reproductibility.
    :return: classification performance metrics for each iteration:
     f1_score, f2_score, precision_score, recall_score, metrics.average_precision_score, roc_auc_score.
    """

    metrics = []
    rs = ShuffleSplit(
        n_splits=n_splits, test_size=test_size, random_state=random_state)

    for train_index, test_index in tqdm(rs.split(X), total=n_splits):

        # get train and test set
        X_train = X.iloc[train_index]
        X_test = X.iloc[test_index]
        y_train = y.iloc[train_index]
        y_test = y.iloc[test_index]

        # Add valid set
        X_train, X_val, y_train, y_val = train_test_split(
            X_train, y_train, test_size=test_size, random_state=random_state)

        # XGboost model
        if regression:
            xgboost = XGBRegressorTuning(
                X_train,
                y_train,
                X_val,
                y_val,
                eval_metric=eval_metric,
                optimized_metric=optimized_metric,
                max_evals=max_evals,
                verbose=verbose,
                random_state=random_state)
            metrics.append(
                regression_score_metrics(xgboost.model, X_test, y_test))
        else:
            xgboost = XGBClassifierTuning(
                X_train,
                y_train,
                X_val,
                y_val,
                eval_metric=eval_metric,
                optimized_metric=optimized_metric,
                max_evals=max_evals,
                weight_imbalanced=weight_imbalanced,
                verbose=verbose,
                random_state=random_state)
            metrics.append(
                classification_score_metrics(xgboost.model, X_test, y_test))

    return metrics
Exemplo n.º 41
0
def binary_doc_prediction_test(model=DocumentLevelModel, test_mode=False):
    print
    print
    print

    print "Binary doc prediction"
    print "=" * 40
    print

    s = model(test_mode=test_mode)


    
    s.generate_data(binarize=True)
    s.vectorize()
    
    for test_domain in CORE_DOMAINS:
        print ("*"*40) + "\n\n" + test_domain + "\n\n" + ("*" * 40)
        


        domain_uids = s.domain_uids(test_domain)
        no_studies = len(domain_uids)




        kf = KFold(no_studies, n_folds=5, shuffle=False, indices=True)

        # # tuned_parameters = {"alpha": np.logspace(-4, -1, 10)}
        # tuned_parameters = [{"alpha": np.logspace(-4, -1, 5)}, {"class_weight": [{1: i, -1: 1} for i in np.logspace(0, 1, 5)]}]
        # clf = GridSearchCV(SGDClassifier(loss="hinge", penalty="L2"), tuned_parameters, scoring='recall')
        

        # print "making scorer"
        # ftwo_scorer = make_scorer(fbeta_score, beta=2)
        tuned_parameters = {"alpha": np.logspace(-4, -1, 10), "class_weight": [{1: i, -1: 1} for i in np.logspace(-1, 1, 10)]}
        clf = GridSearchCV(SGDClassifier(loss="log", penalty="L2"), tuned_parameters, scoring="precision")


        metrics = []

        for fold_i, (train, test) in enumerate(kf):



            X_train, y_train = s.X_y_uid_filtered(domain_uids[train], test_domain)
            X_test, y_test = s.X_y_uid_filtered(domain_uids[test], test_domain)

            clf.fit(X_train, y_train)

            y_preds = clf.predict(X_test)

            fold_metric = np.array(sklearn.metrics.precision_recall_fscore_support(y_test, y_preds))[:,1]

            metrics.append(fold_metric) # get the scores for positive instances

            print "fold %d:\tprecision %.2f, recall %.2f, f-score %.2f" % (fold_i, fold_metric[0], fold_metric[1], fold_metric[2])
            
            if fold_i == 0:
                # make a plot of the first curve
                probas_ = clf.best_estimator_.predict_proba(X_test)

                # Compute ROC curve and area the curve
                fpr, tpr, thresholds = roc_curve(y_test, probas_[:, 1])
                roc_auc = auc(fpr, tpr)
                print("Area under the ROC curve : %f" % roc_auc)

                # Plot ROC curve
                pl.clf()
                pl.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc)
                pl.plot([0, 1], [0, 1], 'k--')
                pl.xlim([0.0, 1.0])
                pl.ylim([0.0, 1.0])
                pl.xlabel('False Positive Rate')
                pl.ylabel('True Positive Rate')
                pl.title(test_domain)
                pl.legend(loc="lower right")
                pl.show()




        # summary score

        summary_metrics = np.mean(metrics, axis=0)
        print "=" * 40
        print "mean score:\tprecision %.2f, recall %.2f, f-score %.2f" % (summary_metrics[0], summary_metrics[1], summary_metrics[2])


        
        X_all, y_all = s.X_y_uid_filtered(domain_uids, test_domain)
        clf.fit(X_all, y_all)

        print show_most_informative_features(s.vectorizer, clf.best_estimator_)