示例#1
0
    def __init__(self, filename):
        """ Reads metrics csv into an array which can be
        indexed using the header_to_col and disease_to_row 
        dictionaries.
        """
        # Read metrics into dictionaries
        with open(filename, 'r') as metrics_file:
            metrics_reader = csv.DictReader(metrics_file)
            disease_to_metrics = {
                row["Disease ID"]: row
                for row in metrics_reader
            }
            self.header_to_col = {
                name: i
                for i, name in enumerate(metrics_reader.fieldnames[2:])
            }
            self.disease_to_row = {
                id: i
                for i, id in enumerate(disease_to_metrics.keys())
            }

        # Build metrics array
        self.metrics = np.zeros(
            (len(disease_to_metrics), len(self.header_to_col)))
        for disease, metrics in disease_to_metrics.items():
            for header, metric in metrics.items():
                if (header in self.header_to_col):
                    self.metrics[self.disease_to_row[disease],
                                 self.header_to_col[header]] = metric
示例#2
0
def log_run(split: str, epoch: int, writer: tensorboard.SummaryWriter,
            label_names: Sequence[str], metrics: MutableMapping[str, float],
            heaps: Optional[Mapping[str, Mapping[int, list[HeapItem]]]],
            cm: np.ndarray) -> None:
    """Logs the outputs (metrics, confusion matrix, tp/fp/fn images) from a
    single epoch run to Tensorboard.

    Args:
        metrics: dict, keys already prefixed with {split}/
    """
    per_label_recall = recall_from_confusion_matrix(cm, label_names)
    metrics.update(prefix_all_keys(per_label_recall, f'{split}/label_recall/'))

    # log metrics
    for metric, value in metrics.items():
        writer.add_scalar(metric, value, epoch)

    # log confusion matrix
    cm_fig = plot_utils.plot_confusion_matrix(cm, classes=label_names,
                                              normalize=True)
    cm_fig_img = fig_to_img(cm_fig)
    writer.add_image(tag=f'confusion_matrix/{split}', img_tensor=cm_fig_img,
                     global_step=epoch, dataformats='HWC')

    # log tp/fp/fn images
    if heaps is not None:
        for heap_type, heap_dict in heaps.items():
            log_images_with_confidence(writer, heap_dict, label_names,
                                       epoch=epoch, tag=f'{split}/{heap_type}')
    writer.flush()
def log_run(split: str, epoch: int, writer: tf.summary.SummaryWriter,
            label_names: Sequence[str], metrics: MutableMapping[str, float],
            heaps: Mapping[str,
                           Mapping[int,
                                   List[HeapItem]]], cm: np.ndarray) -> None:
    """Logs the outputs (metrics, confusion matrix, tp/fp/fn images) from a
    single epoch run to Tensorboard.

    Args:
        metrics: dict, keys already prefixed with {split}/
    """
    per_class_recall = recall_from_confusion_matrix(cm, label_names)
    metrics.update(prefix_all_keys(per_class_recall, f'{split}/label_recall/'))

    # log metrics
    for metric, value in metrics.items():
        tf.summary.scalar(metric, value, epoch)

    # log confusion matrix
    cm_fig = plot_utils.plot_confusion_matrix(cm,
                                              classes=label_names,
                                              normalize=True)
    cm_fig_img = tf.convert_to_tensor(fig_to_img(cm_fig)[np.newaxis, ...])
    tf.summary.image(f'confusion_matrix/{split}', cm_fig_img, step=epoch)

    # log tp/fp/fn images
    for heap_type, heap_dict in heaps.items():
        log_images_with_confidence(heap_dict,
                                   label_names,
                                   epoch=epoch,
                                   tag=f'{split}/{heap_type}')
    writer.flush()
def test(args, model, loader, prefix='', verbose=True):
    
    print("train: Beginning test")

    loss_fn = loss_fns.DetectorLossFn().to(args.device)
    metrics = defaultdict(list)
    with torch.no_grad():
        for (img, det) in tqdm(loader):
            batch_size = img.shape[0]
            img = img.to(args.device).expand(batch_size, 3, *(img.shape[2:]))
            det = det.to(args.device)
            det_hat = model(img)
            loss, loss_dict = loss_fn(det_hat[0], det_hat[1], det)
            metrics['loss'].append(loss.item())
            for k, v in loss_dict.items():
                metrics[k].append(np.mean([v]))
            for k, v in get_metrics(det, det_hat).items():
                metrics[k].append(v)

        for k in replace_metric_by_mean:
            metrics[k] = np.mean(metrics[k])

        # Print!
        if verbose:
            start_string = '#### {} evaluation ####'.format(prefix)
            print(start_string)
            for k, v in metrics.items():
                print('#### {} = {}'.format(k, v[-1]))
            print(''.join(['#' for _ in range(len(start_string))]))
    return metrics
示例#5
0
def calc_ref_metrics(truer, quants, est_col, tru_col='cnt'):
    truth = pd.read_table(truer)
    truth.columns = ['id', 'cnt']
    estimated = pd.read_table(quants)
    estimated['id'] = estimated['Name'].str.split('|').str[0]
    est = estimated.groupby('id')[est_col].sum().reset_index()
    metrics = { "R^2" : sklearn.metrics.r2_score, \
                "Explained Var." : sklearn.metrics.explained_variance_score, \
                "Mean Abs Error" : sklearn.metrics.mean_absolute_error, \
                "Mean Sq. Error" : sklearn.metrics.mean_squared_error, \
                "Mean Sq. Log. Error" : sklearn.metrics.mean_squared_log_error, \
                "Med Abs Error" : sklearn.metrics.median_absolute_error, \
                "Bray Curtis" : scipy.spatial.distance.braycurtis, \
                "Kendall Tau" : scipy.stats.kendalltau, \
                "Cosine Similarity" : scipy.spatial.distance.cosine, \
#                "Minkowski Distance" : scipy.spatial.distance.minkowski, \
                "Canberra Distance": scipy.spatial.distance.canberra}

    metric_res = []
    merged = pd.merge(truth, est, on='id', how='outer').fillna(0)
    MARD = mard(merged, tru_col, est_col)
    pcc = merged[[tru_col, est_col]].corr(method='pearson')[tru_col][est_col]
    sp = merged[[tru_col, est_col]].corr(method='spearman')[tru_col][est_col]
    metric_dict = {'mard': MARD, 'pcc': pcc, 'sp': sp}
    for k, v in metrics.items():
        val = v(merged[tru_col], merged[est_col])
        if k == "Kendall Tau":
            metric_dict[k] = val.correlation
        else:
            metric_dict[k] = val
    df = pd.DataFrame(metric_dict, index=[0])
    merged.columns = ['refid', 'truth', 'pred', 'ard']
    merged['abs_err'] = abs(merged['truth'] - merged['pred'])
    return merged, df.transpose()
示例#6
0
def main():
    """This method invokes the training functions for development purposes"""

    # Read data from a file
    print("Running train.py")

    # Hard code the parameters for training the model
    parameters = {
        'learning_rate': 0.02,
        'boosting_type': 'gbdt',
        'objective': 'binary',
        'metric': 'auc',
        'sub_feature': 0.7,
        'num_leaves': 60,
        'min_data': 100,
        'min_hessian': 1,
        'verbose': 2
    }

    # Load the training data as dataframe
    data_dir = "data"
    data_file = os.path.join(data_dir,
                             'porto_seguro_safe_driver_prediction_input.csv')
    train_df = pd.read_csv(data_file)

    data = split_data(train_df)

    # Train the model
    model = train_model(data, parameters)

    # Log the metrics for the model
    metrics = get_model_metrics(model, data)
    for (k, v) in metrics.items():
        print(f"{k}: {v}")
    def get_outcome(
        self,
        options: Dict[str, Union[None, List[str]]] = {}
    ) -> Dict[str, Union[float, type(np.nan)]]:
        opt = deepcopy(self.default_options)
        opt.update(options)

        y_true = self.input_images[0]  # gt
        y_pred = self.input_images[1]  # seg

        if str(y_true.dtype) != 'bool':
            y_true = 0 < y_true
        if str(y_pred.dtype) != 'bool':
            y_pred = 0 < y_pred

        metrics_ = self.new_ordered_dict()

        own_metrics = self._compute_metrics_own_implementation(
            y_true, y_pred, opt)
        metrics_.update({str(k): met for k, met in own_metrics.items()})

        sklearn_metrics = self._compute_metrics_sklearn(y_true, y_pred, opt)
        metrics_.update({
            str(k): -1 if (np.isnan(met) or met is None) else met
            for k, met in sklearn_metrics.items()
        })

        medpy_metrics = self._compute_metrics_medpy(y_true, y_pred, opt)
        metrics_.update({
            str(k): -1 if (np.isnan(met) or met is None) else met
            for k, met in medpy_metrics.items()
        })

        return metrics_
 def mean(self, phase, epoch, item=None):
     mean_metrics = {}
     metrics = self.get_metrics(phase=phase, epoch=epoch, item=item)
     metrics = metrics[phase][epoch]
     for key, value in metrics.items():
         mean_metrics[key] = np.mean(np.array(value))
     return mean_metrics
示例#9
0
def add_metrics_to_spreadsheet(spreadsheet, model_metrics):
    df_metrics = None
    if not (isinstance(model_metrics, dict)):
        model_metrics = {'': model_metrics}
    for model, metrics in model_metrics.items():
        model = '_%s' % model
        standard_metrics = {'metric': [], model: []}
        for metric, value in metrics.items():
            if 'curve' in metric:
                df = pd.DataFrame.from_dict(value, orient='columns')
                df.to_excel(spreadsheet,
                            '%s_curve%s' % (metric.split('_')[0], model),
                            index=False)
            elif 'score' in metric:
                standard_metrics['metric'].append(metric)
                standard_metrics[model].append(value)
            else:
                if not isinstance(value, pd.DataFrame):
                    value = pd.DataFrame(value)
                value.to_excel(
                    spreadsheet,
                    '%s%s' %
                    (metric.replace('classification', 'class').replace(
                        'confusion', 'conf'), model),
                    index=False)

        if df_metrics is None:
            df_metrics = pd.DataFrame.from_dict(standard_metrics)
        else:
            df_metrics = pd.merge(df_metrics,
                                  pd.DataFrame.from_dict(standard_metrics),
                                  on='metric')
    df_metrics.to_excel(spreadsheet, 'metric_comparison', index=False)
示例#10
0
def find_best_model_for_time(results):
    best_overall = 1000
    best_models = {}
    print("This is the result for time")
    for model, iters in results.items():
        best_rate_so_far = 1000
        best_models[model] = {}
        print(model)
        for parameters, metrics in iters.items():
            for metric, rate in metrics.items():
                if metric == "time":
                    #                 print(metric)
                    #                 print(rate)
                    if rate < best_rate_so_far:
                        best_rate_so_far = rate
                        best_model = model
                        best_parameter = parameters
                        best_models[model]['parameters'] = parameters
                        best_models[model]['metrics'] = metrics
                        # print("{} is the best rate so far for model {}, parameter {}".format(best_rate_so_far, best_model, best_parameter))
        to_append = [value for value in best_models[model]['metrics'].values()]
        to_append.append(best_models[model]['parameters'])
        if best_rate_so_far < best_overall:
            best_overall = best_rate_so_far
            overall_model = model
            overall_parameter = parameters
        print(best_parameter)
        print(best_rate_so_far)
    print(
        "And the award for overal winner goes to ... {} under {} at {}".format(
            overall_model, overall_parameter, best_overall))
    print("\n")
示例#11
0
    def new_run(self, description: str = None, copy_folder: bool = True, metrics: dict = None) -> Run:
        '''
        This will begin a new interactive run on the existing AzureML Experiment.  When a previous run was still active, it will be completed.
        Args:
            description (str): An optional description that will be added to the run metadata
            copy_folder (bool): Indicates if the output folder should be snapshotted and persisted
            metrics (dict): The metrics that should be logged in the run already
        Returns:
            Run: the AzureML Run object that can be used for further access and custom logic
        '''
        if(self.__current_run is not None):
            self.__current_run.complete()
        if(copy_folder):
            self.__current_run = self.__experiment.start_logging()
        else:
            self.__current_run = self.__experiment.start_logging(snapshot_directory = None)

        if(metrics is not None):
            for k, v in metrics.items():
                self.__current_run.log(k, v)

        if(description is not None):
            self.__current_run.log('Description', description)
        
        return self.__current_run
示例#12
0
def _my_create_evaluator(model,
                         metrics={},
                         add_index=False,
                         device=None,
                         non_blocking=False,
                         prepare_batch=ignite.engine._prepare_batch):
    if device:
        model.to(device)

    def _inference(engine, batch):
        model.eval()
        with torch.no_grad():
            x, y = prepare_batch(batch[:2],
                                 device=device,
                                 non_blocking=non_blocking)
            y_pred = model(x)

            #if add_index:
            index = batch[2]
            return {'prediction': y_pred, 'target': y, 'idx': index}
            #else:
            #    return {'prediction': y_pred, 'target': y}

    engine = Engine(_inference)

    for name, metric in metrics.items():
        metric.attach(engine, name)

    return engine
示例#13
0
def compute_rejection_curves(probas, labels, groups, metrics):
    order = np.argsort(probas)
    labels = labels[order]
    groups = {g_name: group[order] for g_name, group in groups.items()}

    pred_labels = np.ones(len(labels))
    metrics_res = {metric: [metric_computation(labels, pred_labels, groups)] \
            for metric, metric_computation in metrics.items()}
    for i in range(len(probas)):
        pred_labels[i] = 0

        for metric, metric_computation in metrics.items():
            metrics_res[metric].append(
                metric_computation(labels, pred_labels, groups))

    #assert math.isclose(fairnesses[0], fairnesses[-1])

    return metrics_res
示例#14
0
def write_to_tensorboard(
    metrics,
    global_step,
    logdir,
):
    """Writes metrics to tensorbaord."""
    with tf.summary.FileWriter(logdir) as writer:
        for label, value in metrics.items():
            summary = tf.Summary(
                value=[tf.Summary.Value(tag=label, simple_value=value)])
            writer.add_summary(summary, global_step)
示例#15
0
    def log_evaluation_results(engine):
        model.eval()
        for name, imgs in example_images.items():
            imgs = make_example_images(autoencoder, imgs, device=device)
            summary_writer.add_image(name, imgs, engine.state.iteration)

        evaluator.run(loaders.valid)
        metrics = evaluator.state.metrics

        prefix = nvly.engine.get_log_prefix(engine)
        msgs = ', '.join(
            [f'{name}: {value:.3f}' for name, value in metrics.items()])
        print(f'{prefix} {msgs}')

        for name, value in metrics.items():
            summary_writer.add_scalar(f'metrics/{name}', value,
                                      engine.state.iteration)

        nonlocal best_score
        best_score = max(best_score, metrics['roc_auc'])
def evaluation_dict(y_test, y_test_pred, run_time):
    '''Creates a dictionary with 5 prediction evaluation metrics and a measure
    of how long it took the model to run.'''
    results = {}
    metrics = {'Accuracy': accuracy_score, 'F1_Score': f1_score,
                'Precision': precision_score, 'Recall': recall_score,
                'AUC': roc_auc_score}
    for label, fn in metrics.items():
        results[label] = round(fn(y_test, y_test_pred), 4)
    results['Train Time (s)'] = run_time
    return results
示例#17
0
def get_p_values(metrics):
    """Get p_values."""
    p_values = {}

    result_list = map(dict, itertools.combinations(metrics.items(), 2))

    for ch in result_list:
        values = list(ch.values())
        ttest = stats.ttest_rel(a=values[0], b=values[1])
        p_values[str(tuple(ch.keys()))] = ttest.pvalue

    return p_values
示例#18
0
 def print_best_MRR_and_hits(self):
     """
     Print best results on validation set, and corresponding scores (with same hyper params) on test set
     
     """
     #        tools.logger.info( "Validation metrics:")
     metrics = self.valid_results.print_MRR_and_hits()
     tools.logger.info("Corresponding Test metrics:")
     for model_s, (best_rank, best_lambda, _, _, _, _,
                   _) in metrics.items():
         self.results.print_MRR_and_hits_given_params(
             model_s, best_rank, best_lambda)
示例#19
0
def test(args, model, loader, prefix='', verbose=True):
    """
    This function does a single pass through the testing set
    and evaluates the average dice_score, and average loss. 

    params: args are the run parameters, model is the model being tested,
            loader is the pytorch test loader, prefix is a name string,
            verbose flag is for printing metrics 

    return: dictionary of metric values 
    """

    print("train: Beginning test")

    metrics = defaultdict(list)
    t = tqdm(loader)
    with torch.no_grad():
        for (img, seg) in t:
            img = img.to(args.device)
            seg = seg_utils.map_segmentation(seg, args.num_classes).long()          
            seg_hot = seg_utils.one_hot_encode(seg, args.num_classes).to(args.device)    
            seg = seg.to(args.device)

            seg_hat = model(img)
            
            if args.loss_func=='dice':
                dice_loss = loss_fns.DiceLoss()
                loss = dice_loss(seg_hat, seg_hot)
            elif args.loss_func=='crossentropy':
                log_loss = nn.CrossEntropyLoss()
                loss = log_loss(seg_hat, seg)

            t.set_postfix_str(s='loss: %f'% loss.item())

            try:    
                metrics['loss'].append(loss.item())
            except ValueError:
                print(metrics)
            for k, v in get_metrics(seg_hot, seg_hat).items():
                metrics[k].append(v)

        for k in replace_metric_by_mean:
            metrics[k] = np.mean(metrics[k])

        # Print!
        if verbose:
            start_string = '#### {} evaluation ####'.format(prefix)
            print(start_string)
            for k, v in metrics.items():
                print('#### {} = {}'.format(k, v))
            print(''.join(['#' for _ in range(len(start_string))]))
    return metrics
示例#20
0
def get_metrics(target, pred, errors, name, settings):
    import sklearn.metrics
    import scipy

    metrics = {}

    if settings.get("classification"):
        metrics["roc_auc"] = score = sklearn.metrics.roc_auc_score(
            target.reshape(-1, 1), pred.reshape(-1, 1)
        )
        metrics["ap_score"] = ap_score = sklearn.metrics.average_precision_score(
            target.reshape(-1, 1), pred.reshape(-1, 1), average="micro"
        )
        print(f"ROC-AUC: {score:3.3f}, AP: {ap_score:3.3f}")
    else:
        mae = metrics["mae"] = sklearn.metrics.mean_absolute_error(target, pred)
        try:
            mape = metrics["mape"] = sklearn.metrics.mean_absolute_percentage_error(
                target, pred
            )
        except AttributeError:
            mape = metrics["mape"] = 1e20
        mse = metrics["mse"] = sklearn.metrics.mean_squared_error(target, pred)
        med_ae = metrics["med_ae"] = sklearn.metrics.median_absolute_error(target, pred)
        max_ae = metrics["max_ae"] = sklearn.metrics.max_error(target, pred)
        fit_results = scipy.stats.linregress(
            x=target.reshape(
                -1,
            ),
            y=pred.reshape(
                -1,
            ),
        )
        slope = metrics["slope"] = fit_results.slope
        rvalue = metrics["rvalue"] = fit_results.rvalue

        print(
            f"MAE = {mae:3.3f} {settings.get('units', '')}, MedianAE = {med_ae:3.3f} {settings.get('units', '')}, MAPE = {mape:3.3f}, √MSE = {np.sqrt(mse):3.3f} {settings.get('units', '')}"  # noqa
        )
        print(
            f"MaxAE = {max_ae:3.3f} {settings.get('units', '')}, slope = {slope:3.2f}, R = {rvalue:3.2f}"
        )

    for k,v in metrics.items():
        metrics[k] = float(v)

    with open(f"results/{name}_metrics.json", "w") as f:
        json.dump(metrics, f)

    return metrics
def get_cross_validation_metrics(estimator, X, y, metrics):
    metric_scorers = {}
    for metric, func in metrics.items():
        metric_scorers[metric] = make_scorer(func)
    results = {}
    cv = StratifiedKFold(n_splits=10)
    metric_values = cross_validate(estimator,
                                   X,
                                   y,
                                   scoring=metric_scorers,
                                   cv=cv)
    for metric in metrics:
        results[metric] = metric_values["test_" + metric].mean()
    return results
示例#22
0
def calc_taxlevel_metrics(truer, est):
    true = pd.read_table(truer)
    estimated = pd.read_table(est)
    metrics = { "R^2" : sklearn.metrics.r2_score, \
                "Explained Var." : sklearn.metrics.explained_variance_score, \
                "Mean Abs Error" : sklearn.metrics.mean_absolute_error, \
                "Mean Sq. Error" : sklearn.metrics.mean_squared_error, \
                "Mean Sq. Log. Error" : sklearn.metrics.mean_squared_log_error, \
                "Med Abs Error" : sklearn.metrics.median_absolute_error, \
                "Bray Curtis" : scipy.spatial.distance.braycurtis, \
                "Kendall Tau" : scipy.stats.kendalltau, \
                "Cosine Similarity" : scipy.spatial.distance.cosine, \
#                "Minkowski Distance" : scipy.spatial.distance.minkowski, \
                "Canberra Distance": scipy.spatial.distance.canberra}

    metric_res = {}
    for r in ["Phylum", "Genus", "Species", "Scietific_Name", "TaxID"]:
        metric_res[r] = {}
        tr_taxid_counts = true.groupby(r)['NumCounts'].sum().reset_index()
        es_taxid_counts = estimated.groupby(r)['NumCounts'].sum().reset_index()
        merged = pd.merge(tr_taxid_counts, es_taxid_counts, on=r,
                          how='outer').fillna(0)
        MARD = mard(merged, 'NumCounts_x', 'NumCounts_y')
        pcc = merged[['NumCounts_x', 'NumCounts_y'
                      ]].corr(method='pearson')['NumCounts_x']['NumCounts_y']
        sp = merged[['NumCounts_x', 'NumCounts_y'
                     ]].corr(method='spearman')['NumCounts_x']['NumCounts_y']
        metric_res[r]['mard'] = MARD
        metric_res[r]['pcc'] = pcc
        metric_res[r]['sp'] = sp
        #metric_res[r] += [MARD, pcc, sp]
        for k, v in metrics.items():
            val = v(merged['NumCounts_x'], merged['NumCounts_y'])
            if k == "Kendall Tau":
                metric_res[r][k] = val.correlation
            else:
                metric_res[r][k] = val
        df = pd.DataFrame(metric_res)
        #df.columns=['mard', 'pcc', 'sp', 'r2', 'ex_var', 'mae', 'mse', 'msle', 'medae']
        merged.columns = ['taxid', 'truth', 'pred', 'ard']
        merged['abs_err'] = abs(merged['truth'] - merged['pred'])
    return merged, df[[
        'Phylum', 'Genus', 'Species', 'Scietific_Name', 'TaxID'
    ]]
示例#23
0
def test(args, model, loader, prefix='', verbose=True):
    
    print("train: Beginning test")

    metrics = defaultdict(list)
    t = tqdm(loader)
    with torch.no_grad():
        for (img, seg) in t:
            img = img.to(args.device)
            seg = map_segmentation(seg, args.num_classes).long()          
            seg_hot = seg_utils.one_hot_encode(seg, args.num_classes).to(args.device)    
            seg = seg.to(args.device)

            seg_hat = model(img)
            
            if args.loss_func=='dice':
                dice_loss = loss_fns.DiceLoss()
                loss = dice_loss(seg_hat, seg_hot)
            elif args.loss_func=='crossentropy':
                log_loss = nn.CrossEntropyLoss()
                loss = log_loss(seg_hat, seg)

            t.set_postfix_str(s='loss: %f'% loss.item())

            try:    
                metrics['loss'].append(loss.item())
            except ValueError:
                print(metrics)
            for k, v in get_metrics(seg_hot, seg_hat).items():
                metrics[k].append(v)

        for k in replace_metric_by_mean:
            metrics[k] = np.mean(metrics[k])

        # Print!
        if verbose:
            start_string = '#### {} evaluation ####'.format(prefix)
            print(start_string)
            for k, v in metrics.items():
                print('#### {} = {}'.format(k, v))
            print(''.join(['#' for _ in range(len(start_string))]))
    return metrics
示例#24
0
    def run_epoch(self, epoch, sents, labels, dev_sents, dev_labels):
        """
        Performs one complete pass over the train set and evaluate on dev
        Args:
            epoch:
            sents: dataset sentences that yields tuple of sentences, tags
            labels: dataset label by sentenses that yields tuple of sentences, tags
            dev_sents: data for avaluate
            dev_labels:
        Return:
            f1: (python float), score to select model on, higher is better
        """

        # progbar stuff for logging
        batch_size = self.config.batch_size
        nbatches = (len(sents) + batch_size - 1) // batch_size
        prog = Progbar(target=nbatches)
        # iterate over dataset
        train_loss = 0
        curren_state = np.zeros((2, self.config.batch_size, self.config.hidden_size_lstm))  # initial state cell, hidden layer.

        for i, (words, labels_) in enumerate(minibatches(sents, labels, batch_size)):

            fd, _ = self.get_feed_dict(words, labels_, self.config.lr,
                    self.config.dropout)

            _, train_loss= self.sess.run(
                                            [self.train_op, self.loss], feed_dict=fd)
                # get final state of previous  batch.

            if(i%20==0) or i== (len(sents)//batch_size-1):
                prog.update(i + 1, [("train loss", train_loss)])
            # tensorboard
            # if i % 10 == 0:
            #     self.file_writer.add_summary(summary, epoch*nbatches + i)
        print('\tlearning rate: {:.5f}'.format(self.config.lr))
        metrics = self.run_evaluate(dev_sents,dev_labels)
        msg = " - ".join(["{} {:04.2f}".format(k, v)
                for k, v in metrics.items()])
        self.logger.info(msg)

        return  train_loss, metrics["f1"]
def calculateRank(metrics, result):
    """
    calculate and get optimal library for each dataset and based on user-set weights
    
    - Input 
      - metrics: output from callLibrary
        (performance metrics for each library)
      - weights: user-defined weights
      
    - Output
      - ranks: sorted order of libraried based on weighted sum
    """
    weighted_score = {}
    for key, value in metrics.items():
             
        weighted_score[key] = result["precision"] * value["precision"] + \
                              result["accuracy"] * value["accuracy"]  + \
                              result["specificity"] * value["specificity"] + \
                              result["f1score"] * value["f1score"] + \
                              result["sensitivity"] * value["sensitivity"]
    ranks = dict(sorted(weighted_score.items(), key=lambda x: x[1]))
    return ranks
示例#26
0
def buildModel(df, keyFeatures, target):
    fittingFrame = df.copy()
    keyFeatures.append(target)
    fittingFrame["Name"] = pd.Categorical(fittingFrame["Name"])
    fittingFrame = fittingFrame[keyFeatures]
    categoryColumns = fittingFrame.select_dtypes(include=['category']).columns
    for col in categoryColumns:
        currentCategorical = pd.get_dummies(fittingFrame[col])
        fittingFrame = pd.concat([fittingFrame, currentCategorical], axis=1)
        fittingFrame = fittingFrame.drop([col], axis=1)

    #print(df.info())
    rf = RandomForestRegressor(n_estimators=1800,
                               max_features='auto',
                               min_samples_split=4,
                               min_samples_leaf=1,
                               max_depth=60)

    metrics = cross_validation_metrics(fittingFrame, target, 10)
    metric_frame = pd.DataFrame(list(metrics.items()),
                                columns=['Metric Name', 'Metric Value'])
    X = fittingFrame.drop([target], axis=1)
    connection = {
        'host': config.host,
        'dbname': config.dbname,
        'username': config.username,
        'password': config.password,
        'port': config.port
    }

    saveMetrics(metric_frame, connection)

    rf.fit(X, fittingFrame[target])
    # returns = {
    #     'model':rf,
    #     'metrics':metrics
    # }
    return (rf)
    def eva_metrics(self):
        '''
        Given a classifier, evaluate by various metrics

        Input:
            y_true: a Pandas dataframe of actual label value
            y_pred: a Pandas dataframe of predicted label value
            y_pred_probs: a Pandas dataframe of probability estimates

        Output:
            rv: a dictionary where key is the metric and value is the score

        '''
        rv = {}
        metrics = {
            'accuracy': accuracy_score,
            'f1_score': f1_score,
            'precision': precision_score,
            'recall': recall_score,
            'auc': roc_auc_score
        }
        for metric, fn in metrics.items():
            rv[metric] = fn(self.y_true, self.y_pred)

        y_pred_probs_sorted, y_true_sorted = zip(
            *sorted(zip(self.y_pred_probs, self.y_true), reverse=True))
        levels = [1, 3, 5]
        for k in levels:
            rv['p_at_' + str(k) + '%'] = precision_at_k(
                y_true_sorted, y_pred_probs_sorted, k)
            rv['r_at_' + str(k) + '%'] = recall_at_k(y_true_sorted,
                                                     y_pred_probs_sorted, k)
        rv['p_at_200'] = precision_at_k(y_true_sorted, y_pred_probs_sorted,
                                        200, False)
        rv['r_at_200'] = recall_at_k(y_true_sorted, y_pred_probs_sorted, 200,
                                     False)

        return rv
示例#28
0
def create_supervised_evaluator(model,
                                metrics={},
                                device=None,
                                forward_fn=None):
    def _inference(engine, batch):
        # now compute error
        model.eval()
        with torch.no_grad():
            x, y = utils_data.nestedDictToDevice(
                batch, device=device)  # make it work for dict input too
            if forward_fn is None:
                y_pred = model(x)
            else:
                y_pred = forward_fn(x)

        return y_pred, y

    engine = Engine(_inference)

    for name, metric in metrics.items():
        metric.attach(engine, name)

    return engine
示例#29
0
    def print_best_MRR_and_hits_per_rel(self):
        """
        Print best results on validation set, and corresponding scores (with same hyper params) on test set
        """

        #        tools.logger.info( "Validation metrics:")
        metrics = self.valid_results.print_MRR_and_hits()

        tools.logger.info("Corresponding per relation Test metrics:")
        with open("/home/ksrao/Saikat/complex/relation_test.txt", 'w') as f:
            for rel_name, rel_idx in self.relations_dict.items():
                tools.logger.info(rel_name)
                this_rel_row_idxs = self.test.indexes[:, 1] == rel_idx
                this_rel_test_indexes = self.test.indexes[this_rel_row_idxs, :]
                this_rel_test_values = self.test.values[this_rel_row_idxs]
                this_rel_set = tools.Triplets_set(this_rel_test_indexes,
                                                  this_rel_test_values)
                f.write("%s\n" % rel_name)
                for model_s, (best_rank, best_lambda, _, _, _, _,
                              _) in metrics.items():
                    rel_cv_results = self.results.extract_sub_scores(
                        this_rel_row_idxs)
                    rel_cv_results.print_MRR_and_hits_given_params(
                        model_s, best_rank, best_lambda)
示例#30
0
def inference(
    args,
    dlrm,
    best_acc_test,
    best_auc_test,
    test_ld,
):
    test_accu = 0
    test_samp = 0

    if args.print_auc:
        scores = []
        targets = []

    total_time = 0
    total_iter = 0
    if args.inference_only:
        dlrm = trace_model(args, dlrm, test_ld)
    if args.share_weight_instance != 0:
        run_throughput_benchmark(args, dlrm, test_ld)
    with torch.cpu.amp.autocast(enabled=args.bf16):
        for i, testBatch in enumerate(test_ld):
            should_print = ((i + 1) % args.print_freq == 0
                            or i + 1 == len(test_ld)) and args.inference_only
            if should_print:
                gT = 1000.0 * total_time / total_iter
                print(
                    "Finished {} it {}/{}, {:.2f} ms/it,".format(
                        "inference", i + 1, len(test_ld), gT),
                    flush=True,
                )
                total_time = 0
                total_iter = 0
            # early exit if nbatches was set by the user and was exceeded
            if args.inference_only and nbatches > 0 and i >= nbatches:
                break

            X_test, lS_o_test, lS_i_test, T_test, W_test, CBPP_test = unpack_batch(
                testBatch)

            # forward pass

            if not args.inference_only and isinstance(
                    dlrm.emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
                n_tables = lS_i_test.shape[0]
                idx = [lS_i_test[i] for i in range(n_tables)]
                offset = [lS_o_test[i] for i in range(n_tables)]
                include_last = [False for i in range(n_tables)]
                indices, offsets, indices_with_row_offsets = dlrm.emb_l.linearize_indices_and_offsets(
                    idx, offset, include_last)

            start = time_wrap()
            if not args.inference_only and isinstance(
                    dlrm.emb_l, ipex.nn.modules.MergedEmbeddingBagWithSGD):
                Z_test = dlrm(X_test, indices, offsets,
                              indices_with_row_offsets)
            else:
                Z_test = dlrm(X_test, lS_o_test, lS_i_test)

            total_time += (time_wrap() - start)
            total_iter += 1

            if args.print_auc:
                S_test = Z_test.detach().cpu().float().numpy()  # numpy array
                T_test = T_test.detach().cpu().float().numpy()  # numpy array
                scores.append(S_test)
                targets.append(T_test)
            elif not args.inference_only:
                with record_function("DLRM accuracy compute"):
                    # compute loss and accuracy
                    S_test = Z_test.detach().cpu().float().numpy(
                    )  # numpy array
                    T_test = T_test.detach().cpu().float().numpy(
                    )  # numpy array

                    mbs_test = T_test.shape[0]  # = mini_batch_size except last
                    A_test = np.sum((np.round(S_test,
                                              0) == T_test).astype(np.uint8))

                    test_accu += A_test
                    test_samp += mbs_test
            else:
                # do nothing to save time
                pass

    if args.print_auc:
        with record_function("DLRM mlperf sklearn metrics compute"):
            scores = np.concatenate(scores, axis=0)
            targets = np.concatenate(targets, axis=0)

            metrics = {
                "recall":
                lambda y_true, y_score: sklearn.metrics.recall_score(
                    y_true=y_true, y_pred=np.round(y_score)),
                "precision":
                lambda y_true, y_score: sklearn.metrics.precision_score(
                    y_true=y_true, y_pred=np.round(y_score)),
                "f1":
                lambda y_true, y_score: sklearn.metrics.f1_score(
                    y_true=y_true, y_pred=np.round(y_score)),
                "ap":
                sklearn.metrics.average_precision_score,
                "roc_auc":
                sklearn.metrics.roc_auc_score,
                "accuracy":
                lambda y_true, y_score: sklearn.metrics.accuracy_score(
                    y_true=y_true, y_pred=np.round(y_score)),
            }

        validation_results = {}
        for metric_name, metric_function in metrics.items():
            validation_results[metric_name] = metric_function(targets, scores)
        acc_test = validation_results["accuracy"]
    elif not args.inference_only:
        acc_test = test_accu / test_samp
    else:
        pass

    model_metrics_dict = {
        "nepochs": args.nepochs,
        "nbatches": nbatches,
        "nbatches_test": nbatches_test,
    }
    if not args.inference_only:
        model_metrics_dict["test_acc"] = acc_test

    if args.print_auc:
        is_best = validation_results["roc_auc"] > best_auc_test
        if is_best:
            best_auc_test = validation_results["roc_auc"]
            model_metrics_dict["test_auc"] = best_auc_test
        print(
            "recall {:.4f}, precision {:.4f},".format(
                validation_results["recall"],
                validation_results["precision"],
            ) + " f1 {:.4f}, ap {:.4f},".format(validation_results["f1"],
                                                validation_results["ap"]) +
            " auc {:.4f}, best auc {:.4f},".format(
                validation_results["roc_auc"], best_auc_test) +
            " accuracy {:3.3f} %, best accuracy {:3.3f} %".format(
                validation_results["accuracy"] * 100, best_acc_test * 100),
            flush=True,
        )
        print("Accuracy: {:.34} ".format(validation_results["roc_auc"]))
    elif not args.inference_only:
        is_best = acc_test > best_acc_test
        if is_best:
            best_acc_test = acc_test
        print(
            " accuracy {:3.3f} %, best {:3.3f} %".format(
                acc_test * 100, best_acc_test * 100),
            flush=True,
        )
    else:
        pass
    if not args.inference_only:
        return model_metrics_dict, is_best
    else:
        return
示例#31
0
                            'ap':
                            sklearn.metrics.average_precision_score,
                            'roc_auc':
                            sklearn.metrics.roc_auc_score,
                            'accuracy':
                            lambda y_true, y_score: sklearn.metrics.
                            accuracy_score(y_true=y_true,
                                           y_pred=np.round(y_score)),
                            # 'pre_curve' : sklearn.metrics.precision_recall_curve,
                            # 'roc_curve' :  sklearn.metrics.roc_curve,
                        }

                        # print("Compute time for validation metric : ", end="")
                        # first_it = True
                        validation_results = {}
                        for metric_name, metric_function in metrics.items():
                            # if first_it:
                            #     first_it = False
                            # else:
                            #     print(", ", end="")
                            # metric_compute_start = time_wrap(False)
                            validation_results[metric_name] = metric_function(
                                targets, scores)
                            # metric_compute_end = time_wrap(False)
                            # met_time = metric_compute_end - metric_compute_start
                            # print("{} {:.4f}".format(metric_name, 1000 * (met_time)),
                            #      end="")
                        # print(" ms")
                        gA_test = validation_results['accuracy']
                        gL_test = validation_results['loss']
                    else: