Exemplo n.º 1
0
def get_difficulties(answers, data=None, model=None, force=False, name="difficulty"):
    if data and model:
        runner = Runner(data, model)
        file_name = "../cache/difficulties_{}.pd".format(runner._hash)
    else:
        data = d.Data("../data/matmat/2016-11-28/answers.pd")
        model = EloPriorCurrentModel(KC=2, KI=0.5)
        runner = Runner(data, model)
        file_name = "../cache/difficulties_matmat.pd"
    if os.path.exists(file_name) and not force:
        difficulties = pd.read_pickle(file_name)
    else:
        items = answers["item"].unique()
        runner.run(force=True)
        difficulties = pd.Series(data=model.get_difficulties(items), index=items, name=name)
        difficulties.to_pickle(file_name)

    return difficulties
Exemplo n.º 2
0
    def EvtRun(self, event):
        runner = Runner(self.storage.data)

        dlg = wx.ProgressDialog("Running",
                                "Current Song: ",
                                maximum=len(runner.files) + 1,
                                parent=self,
                                style=0
                                | wx.PD_APP_MODAL
                                | wx.PD_CAN_ABORT
                                | wx.PD_ELAPSED_TIME
                                | wx.PD_ESTIMATED_TIME
                                | wx.PD_REMAINING_TIME)

        runner.run(dlg)
        dlg.Destroy()

        self.resultsWindow = ResultsWindow(self, runner.results,
                                           self.storage.data["targetDir"])
Exemplo n.º 3
0
def compare_model_difficulties(data1, data2, model1, model2, plot=True):
    if str(data1) + str(model1) not in cache:
        runner1 = Runner(data1, model1)
        runner1.run(force=True)
        cache[str(data1) + str(model1)] = runner1, model1
    else:
        runner1, model1 = cache[str(data1) + str(model1)]

    if str(data2) + str(model2) not in cache:
        runner2 = Runner(data2, model2)
        runner2.run(force=True)
        cache[str(data2) + str(model2)] = runner2, model2
    else:
        runner2, model2 = cache[str(data2) + str(model2)]

    items = list(set(data1.get_items()) & set(data2.get_items()))
    difficulties = pd.DataFrame(columns=["model1", "model2"], index=items)
    difficulties["model1"] = model1.get_difficulties(items)
    difficulties["model2"] = model2.get_difficulties(items)
    difficulties_corr = difficulties.corr(method="spearman").loc["model1", "model2"]
    if plot:
        plt.plot(difficulties["model1"], difficulties["model2"], "k.")
        plt.title("Difficulties: {}".format(difficulties_corr))
        plt.xlabel(str(model1))
        plt.ylabel(str(model2))

    return difficulties_corr
Exemplo n.º 4
0
def difficulty_stability(datas, models, labels, points, comparable=True, runs=1, eval_data=None):
    df = pd.DataFrame(columns=["data size", "correlation", "models"])
    for i in range(points):
        ratio = (i + 1) / points
        print("Evaluation for {}% of data".format(ratio * 100))

        values = defaultdict(lambda: [])
        for data, model, label in zip(datas, models, labels):
            for run in range(runs):
                d = data(None)
                d.set_seed(run)
                d.set_train_size(ratio)
                d.filter_data(100, 0)
                m = model(None)

                Runner(d, m).run(force=True, only_train=True)

                items = d.get_items()
                if eval_data is None:
                    values[label].append(pd.Series(m.get_difficulties(items), items))
                else:
                    r = Runner(eval_data, m)
                    r.run(force=True, skip_pre_process=True)
                    values[label].append(pd.Series(r._log))

        for i, (data1, model1, label1) in enumerate(zip(datas, models, labels)):
            for data2, model2, label2 in list(zip(datas, models, labels))[i:]:
                print("Computing correlations for " + label1 + " -- " + label2)
                if comparable and label1 != label2:
                    for v1, v2 in zip(values[label1], values[label2]):
                        df.loc[len(df)] = (ratio, v1.corr(v2), label1 + " -- " + label2)
                else:
                    for v1 in values[label1]:
                        for v2 in values[label2]:
                            if v1.sum() == v2.sum() and ratio != 1:
                                continue
                            df.loc[len(df)] = (ratio, v1.corr(v2), label1 + " -- " + label2)

    print(df)
    sns.factorplot(x="data size", y="correlation", hue="models", data=df)
def train(cfg_path):
    """训练demo"""
    # 获得配置信息
    cfg = get_config(cfg_path)

    # 创建logger
    logger = get_logger(cfg.log_level)
    logger.info("start training:")

    # 创建模型
    model = OneStageDetector(cfg)
    model.to(cfg.device)

    # 创建数据
    dataset = get_dataset(cfg.data.train)
    dataloader = DataLoader(
        dataset,
        batch_size=cfg.batch_size,
        sampler=cfg.sampler,
        num_workers=cfg.num_workers,
        collate_fn=partial(collate, samples_per_gpu=cfg.data.imgs_per_gpu),
        pin_memory=False)

    # 创建训练器并开始训练
    runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir,
                    cfg.log_level)
    runner.register_hooks()
    runner.run(dataloader)
Exemplo n.º 6
0
def compare_models(data1, data2, model1, model2, plot=True):
    if str(data1) + str(model1) not in cache:
        runner1 = Runner(data1, model1)
        runner1.run(force=True)
        cache[str(data1) + str(model1)] = runner1, model1
    else:
        runner1, model1 = cache[str(data1) + str(model1)]

    if str(data2) + str(model2) not in cache:
        runner2 = Runner(data2, model2)
        runner2.run(force=True)
        cache[str(data2) + str(model2)] = runner2, model2
    else:
        runner2, model2 = cache[str(data2) + str(model2)]

    difficulties_corr, skills_corr, predictions_corr = 0, 0, 0

    # difficulties
    items = list(set(data1.get_items()) & set(data2.get_items()))
    difficulties = pd.DataFrame(columns=["model1", "model2"], index=items)
    difficulties["model1"] = model1.get_difficulties(items)
    difficulties["model2"] = model2.get_difficulties(items)
    difficulties_corr = difficulties.corr(method="spearman").loc["model1", "model2"]
    if plot:
        plt.subplot(221)
        plt.plot(difficulties["model1"], difficulties["model2"], "k.")
        plt.title("Difficulties: {}".format(difficulties_corr))
        plt.xlabel(str(model1))
        plt.ylabel(str(model2))

    try:
        # skills
        students = list(set(data1.get_students()) & set(data2.get_students()))
        skills = pd.DataFrame(index=students, columns=["model1", "model2"])
        skills["model1"] = model1.get_skills(students)
        skills["model2"] = model2.get_skills(students)
        skills_corr = skills.corr(method="spearman").loc["model1", "model2"]
        if plot:
            plt.subplot(222)
            plt.plot(skills["model1"], skills["model2"], "k.")
            plt.title("Skills: {}".format(skills_corr))
            plt.xlabel(str(model1))
            plt.ylabel(str(model2))
    except AttributeError:
        pass

    # predictions
    predictions = pd.DataFrame(index=students, columns=["model1", "model2"])
    predictions["model1"] = pd.Series(runner1._log)
    predictions["model2"] = pd.Series(runner2._log)
    predictions_corr = predictions.corr(method="spearman").loc["model1", "model2"]
    if plot:
        plt.subplot(223)
        plt.plot(predictions["model1"], predictions["model2"], "k.")
        plt.title("Predictions: {}".format(predictions_corr))
        plt.xlabel(str(model1))
        plt.ylabel(str(model2))

    return difficulties_corr, skills_corr, predictions_corr
Exemplo n.º 7
0
def main(_):
    """
    Starting point of the application
    """

    flags = PARSER.parse_args()

    params = _cmd_params(flags)

    tf.logging.set_verbosity(tf.logging.ERROR)

    # Optimization flags
    os.environ['CUDA_CACHE_DISABLE'] = '0'

    os.environ['HOROVOD_GPU_ALLREDUCE'] = 'NCCL'

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    os.environ['TF_GPU_THREAD_MODE'] = 'gpu_private'

    os.environ['TF_USE_CUDNN_BATCHNORM_SPATIAL_PERSISTENT'] = '1'

    os.environ['TF_ADJUST_HUE_FUSED'] = '1'
    os.environ['TF_ADJUST_SATURATION_FUSED'] = '1'
    os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'

    os.environ['TF_SYNC_ON_FINISH'] = '0'
    os.environ['TF_AUTOTUNE_THRESHOLD'] = '2'
    os.environ['TF_DISABLE_NVTX_RANGES'] = '1'

    if params['use_amp']:
        assert params['dtype'] == tf.float32, "TF-AMP requires FP32 precision"

        LOGGER.log("TF AMP is activated - Experimental Feature")
        os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'

    runner = Runner(params)

    if 'train' in params['exec_mode'] \
            or 'train_and predict' in params['exec_mode']:
        runner.train()
    if 'train_and predict' in params['exec_mode'] \
            or 'predict' in params['exec_mode']:
        runner.predict()
    if 'benchmark' in params['exec_mode']:
        runner.benchmark()
def main():
    args = parse_args()

    create_save_dir(args.save_directory)
    mechanism = get_mechanism(args.mechanism)

    # Create DataLoaders.
    trainloader = create_dataloader(args.datafile,
                                    "train",
                                    args.batch_size,
                                    mechanism,
                                    shuffle=True)
    validloader = create_dataloader(args.datafile,
                                    "valid",
                                    args.batch_size,
                                    mechanism,
                                    shuffle=False)
    testloader = create_dataloader(args.datafile,
                                   "test",
                                   args.batch_size,
                                   mechanism,
                                   shuffle=False)

    runner = Runner(args, mechanism)

    print("\n------------- Evaluating final model -------------")
    print("\nTrain performance:")
    print_stats(runner.test(trainloader))

    print("\nValidation performance:")
    print_stats(runner.test(testloader))

    print("\nTest performance:")
    print_stats(runner.test(validloader))

    print("\n------------- Evaluating best model -------------")
    print("\nTrain performance:")
    print_stats(runner.test(trainloader, use_best=True))

    print("\nValidation performance:")
    print_stats(runner.test(testloader, use_best=True))

    print("\nTest performance:")
    print_stats(runner.test(validloader, use_best=True))
Exemplo n.º 9
0
def main():
    from network.listener import ConnectionsListener
    from network.session_controller import SessionsController
    from ui.text import UiController

    UI = UiController

    args = params()

    if 'ui' in args:
        if args['ui'] == 'text':
            from ui.text import UiController
            UI = UiController

        elif args['ui'] == 'gui':
            from ui.gui import UiController
            UI = UiController

        else:
            print('Wrong visual mode: {} (must be "gui" or "text")'.
                  format(args['ui']))

    ui_controller = Runner(UI())
    ui_controller.run()
    print('UI thread started!')

    sessions = Runner(SessionsController(ui_controller.task))
    sessions.run()
    print('Session controller thread started!')

    # Connections listener thread start
    listener = Runner(ConnectionsListener(sessions.task))
    listener.run()
    print('Connection listener thread started!')

    print('End of init thread')
Exemplo n.º 10
0
def difficulty_stability2(datas, models, labels, points, runs=1, eval_data=None):
    filename = "../../data/matmat/2016-01-04/tmp2.data.pd"
    df = pd.DataFrame(columns=["answers", "correlation", "models"])
    student_count = len(datas[0](None).get_dataframe_all())
    for i in range(points):
        ratio = (i + 1) / points
        print("Evaluation for {}% of data".format(ratio * 100))

        for data, model, label in zip(datas, models, labels):
            for run in range(runs):
                d = data(None)
                d.set_seed(run)
                d.set_train_size(ratio)
                d.filter_data(100, 0)
                d.get_dataframe_train().to_pickle(filename)
                m1 = model(None)
                m2 = model(None)
                d1 = Data(filename, train_size=0.5, train_seed=run + 42)
                d2 = Data(filename, train_size=0.5, train_seed=-run - 42)

                Runner(d1, m1).run(force=True, only_train=True)
                Runner(d2, m2).run(force=True, only_train=True)

                items = d.get_items()
                if eval_data is None:
                    v1 = pd.Series(m1.get_difficulties(items), items)
                    v2 = pd.Series(m2.get_difficulties(items), items)
                else:
                    r1 = Runner(eval_data(None), m1)
                    r2 = Runner(eval_data(None), m2)
                    r1.run(force=True, skip_pre_process=True)
                    r2.run(force=True, skip_pre_process=True)
                    v1 = pd.Series(r1._log)
                    v2 = pd.Series(r2._log)
                df.loc[len(df)] = (ratio * student_count, v1.corr(v2), label)

    print(df)
    sns.factorplot(x="answers", y="correlation", hue="models", data=df, markers=["o", "^", "v", "s", "D"])
    return df
Exemplo n.º 11
0
class Evaluator:
    def __init__(self, data, model):
        self._model = model
        self._data = data

        self._runner = Runner(data, model)
        self._hash = None

    def clean(self):
        self._runner.clean()

    def evaluate(self, force_evaluate=False, answer_filters=None, **kwargs):
        answer_filters = answer_filters if answer_filters is not None else {}
        report = self._load_report()
        self._data.join_predictions(pd.read_pickle(self._runner.get_log_filename()))
        if force_evaluate or "evaluated" not in report:
            print("Evaluating", self._hash, self._data, self._model)
            report.update(self._basic_metrics(self._data.iter_test(), **kwargs))

            report['time'] = self._basic_metrics(
                self._data.iter_test(),
                prediction_column="time_prediction_log",
                observation_column="response_time_log",
                brier_min=self._data.get_dataframe_test()['time_prediction_log'].min(),
                brier_max=self._data.get_dataframe_test()['time_prediction_log'].max(),
                **kwargs)

            report['time-raw'] = self._basic_metrics(
                self._data.iter_test(),
                prediction_column="time_prediction",
                observation_column="response_time",
                brier_min=self._data.get_dataframe_test()['time_prediction'].min(),
                brier_max=self._data.get_dataframe_test()['time_prediction'].max(),
                **kwargs)

        if answer_filters is not None:
            for filter_name, filter_function in answer_filters.items():
                if force_evaluate or filter_name not in report:
                    print("Evaluating", filter_name, self._hash, self._data, self._model)
                    data = filter_function(self._data.get_dataframe_test())
                    report[filter_name] = self._basic_metrics(self._data.iter(data=data), **kwargs)

                    report[filter_name]['time'] = self._basic_metrics(
                        self._data.iter(data=data),
                        prediction_column="time_prediction_log",
                        observation_column="response_time_log",
                        brier_min=self._data.get_dataframe_test()['time_prediction_log'].min(),
                        brier_max=self._data.get_dataframe_test()['time_prediction_log'].max(),
                        **kwargs)

                    report[filter_name]['time-raw'] = self._basic_metrics(
                        self._data.iter(data=data),
                        prediction_column="time_prediction",
                        observation_column="response_time",
                        brier_min=self._data.get_dataframe_test()['time_prediction'].min(),
                        brier_max=self._data.get_dataframe_test()['time_prediction'].max(),
                        **kwargs)

        self._save_report(report)
        return report

    def _basic_metrics(self, data, brier_bins=20, prediction_column="prediction", observation_column="correct", brier_min=0, brier_max=1):
        report = {}

        n = 0           # log count
        sse = 0         # sum of square error
        llsum = 0       # log-likely-hood sum
        brier_counts = np.zeros(brier_bins)          # count of answers in bins
        brier_correct = np.zeros(brier_bins)        # sum of correct answers in bins
        brier_prediction = np.zeros(brier_bins)     # sum of predictions in bins

        for log in data:
            n += 1
            sse += (log[prediction_column] - log[observation_column]) ** 2
            llsum += math.log(max(0.0001, log[prediction_column] if log[observation_column] else (1 - log[prediction_column])))

            # brier
            bin = min(int((log[prediction_column] - brier_min) / (brier_max - brier_min) * brier_bins), brier_bins - 1)
            brier_counts[bin] += 1
            brier_correct[bin] += log[observation_column]
            brier_prediction[bin] += log[prediction_column]

        answer_mean = sum(brier_correct) / n

        report["extra"] = {"answer_mean": answer_mean}
        report["rmse"] = math.sqrt(sse / n)
        report["log-likely-hood"] = llsum
        if observation_column == "correct":
            try:
                report["AUC"] = metrics.roc_auc_score(self._data.get_dataframe_test()[observation_column],
                                                      self._data.get_dataframe_test()[prediction_column])
            except ValueError:
                print("AUC - converting responses to 0, 1")
                report["AUC"] = metrics.roc_auc_score(self._data.get_dataframe_test()[observation_column] > 0,
                                                      self._data.get_dataframe_test()[prediction_column])

        # brier
        brier_prediction_means = brier_prediction / brier_counts
        brier_prediction_means[np.isnan(brier_prediction_means)] = \
            ((np.arange(brier_bins) + 0.5) / brier_bins)[np.isnan(brier_prediction_means)]
        brier_correct_means = brier_correct / brier_counts
        brier_correct_means[np.isnan(brier_correct_means)] = 0
        brier = {
            "reliability":  sum(brier_counts * (brier_correct_means - brier_prediction_means) ** 2) / n,
            "resolution":  sum(brier_counts * (brier_correct_means - answer_mean) ** 2) / n,
            "uncertainty": answer_mean * (1 - answer_mean),

        }
        report["brier"] = brier

        report["extra"]["brier"] = {
            "max": brier_max,
            "min": brier_min,
            "bin_count": brier_bins,
            "bin_counts": list(brier_counts),
            "bin_prediction_means": list(brier_prediction_means),
            "bin_correct_means": list(brier_correct_means),
        }
        report["evaluated"] = True

        return report

    def get_report(self, force_evaluate=False, force_run=False, **kwargs):
        self._hash = self._runner.run(force=force_run)
        return self.evaluate(force_evaluate=force_evaluate or force_run, **kwargs)

    def roc_curve(self):
        self.get_report()
        self._data.join_predictions(pd.read_pickle(self._runner.get_log_filename()))
        fpr, tpr, thresholds = metrics.roc_curve(self._data.get_dataframe_test()["correct"] > 0, self._data.get_dataframe_test()["prediction"])
        print(fpr, tpr, thresholds)
        plt.plot(fpr, tpr, label=str(self._data))

    def _save_report(self, report):
        json.dump(report, open(self._runner.get_report_filename(), "w"), indent=4)

    def _load_report(self):
        return json.load(open(self._runner.get_report_filename()))

    def __str__(self):
        return json.dumps(self.get_report(), sort_keys=True, indent=4)

    def brier_graphs(self, time=False, time_raw=False):
        report = self.get_report()
        if time and not time_raw:
            report = report['time']
        if time_raw:
            report = report['time-raw']

        plt.figure()
        plt.plot(report["extra"]["brier"]["bin_prediction_means"], report["extra"]["brier"]["bin_correct_means"])
        l = report["extra"]["brier"]['min'], report["extra"]["brier"]['max']
        plt.plot(l, l)

        bin_count = report["extra"]["brier"]["bin_count"]
        counts = np.array(report["extra"]["brier"]["bin_counts"])
        bins = (np.arange(bin_count) + 0.5) * (l[1] - l[0]) / bin_count + l[0]
        plt.bar(bins, counts / max(counts) * l[1], width=(0.5 / bin_count * (l[1] - l[0])), alpha=0.5)
        plt.title(self._model)
        plt.xlabel('prediction')
        plt.ylabel('observation mean')
Exemplo n.º 12
0
    def __init__(self, data, model):
        self._model = model
        self._data = data

        self._runner = Runner(data, model)
        self._hash = None
def main():
    args = parse_args()

    save_path = create_save_dir(args.save_directory)
    mechanism = get_mechanism(args.mechanism)

    # Create DataLoaders
    trainloader = create_dataloader(args.datafile,
                                    "train",
                                    args.batch_size,
                                    mechanism,
                                    shuffle=True)
    validloader = create_dataloader(args.datafile,
                                    "valid",
                                    args.batch_size,
                                    mechanism,
                                    shuffle=False)
    testloader = create_dataloader(args.datafile,
                                   "test",
                                   args.batch_size,
                                   mechanism,
                                   shuffle=False)

    runner = Runner(args, mechanism)

    # Print header
    col_width = 5
    print("\n      |            Train              |            Valid              |")  # pylint: disable=line-too-long
    print_row(col_width, [
        "Epoch", "CE", "Err", "%Opt", "%Suc", "CE", "Err", "%Opt", "%Suc", "W",
        "dW", "Time", "Best"
    ])

    tr_total_loss, tr_total_error, tr_total_optimal, tr_total_success = [], [], [], []
    v_total_loss,   v_total_error,  v_total_optimal,  v_total_success = [], [], [], []
    for epoch in range(args.epochs):
        start_time = time.time()

        # Train the model
        tr_info = runner.train(trainloader, args.batch_size)

        # Compute validation stats and save the best model
        v_info = runner.validate(validloader)
        time_duration = time.time() - start_time

        # Print epoch logs
        print_row(col_width, [
            epoch + 1, tr_info["avg_loss"], tr_info["avg_error"],
            tr_info["avg_optimal"], tr_info["avg_success"], v_info["avg_loss"],
            v_info["avg_error"], v_info["avg_optimal"], v_info["avg_success"],
            tr_info["weight_norm"], tr_info["grad_norm"], time_duration,
            "!" if v_info["is_best"] else " "
        ])

        # Keep track of metrics:
        tr_total_loss.append(tr_info["avg_loss"])
        tr_total_error.append(tr_info["avg_error"])
        tr_total_optimal.append(tr_info["avg_optimal"])
        tr_total_success.append(tr_info["avg_success"])
        v_total_loss.append(v_info["avg_loss"])
        v_total_error.append(v_info["avg_error"])
        v_total_optimal.append(v_info["avg_optimal"])
        v_total_success.append(v_info["avg_success"])

        # Plot learning curves.
        def _plot(train, valid, name):
            plt.clf()
            x = np.array(range(len(train)))
            y = np.array(valid)
            plt.plot(x, np.array(train), label="train")
            plt.plot(x, np.array(valid), label="valid")
            plt.legend()
            plt.savefig(name)

        _plot(tr_total_loss, v_total_loss, save_path + "_total_loss.pdf")
        _plot(tr_total_error, v_total_error, save_path + "_total_error.pdf")
        _plot(tr_total_optimal, v_total_optimal,
              save_path + "_total_optimal.pdf")
        _plot(tr_total_success, v_total_success,
              save_path + "_total_success.pdf")

        # Save intermediate model.
        if args.save_intermediate:
            torch.save(
                {
                    "model": runner.model.state_dict(),
                    "best_model": runner.best_model.state_dict(),
                    "tr_total_loss": tr_total_loss,
                    "tr_total_error": tr_total_error,
                    "tr_total_optimal": tr_total_optimal,
                    "tr_total_success": tr_total_success,
                    "v_total_loss": v_total_loss,
                    "v_total_error": v_total_error,
                    "v_total_optimal": v_total_optimal,
                    "v_total_success": v_total_success,
                }, save_path + ".e" + str(epoch) + ".pth")

    # Test accuracy
    print("\nFinal test performance:")
    t_final_info = runner.test(testloader)
    print_stats(t_final_info)

    print("\nBest test performance:")
    t_best_info = runner.test(testloader, use_best=True)
    print_stats(t_best_info)

    # Save the final trained model
    torch.save(
        {
            "model": runner.model.state_dict(),
            "best_model": runner.best_model.state_dict(),
            "tr_total_loss": tr_total_loss,
            "tr_total_error": tr_total_error,
            "tr_total_optimal": tr_total_optimal,
            "tr_total_success": tr_total_success,
            "v_total_loss": v_total_loss,
            "v_total_error": v_total_error,
            "v_total_optimal": v_total_optimal,
            "v_total_success": v_total_success,
            "t_final_loss": t_final_info["avg_loss"],
            "t_final_error": t_final_info["avg_error"],
            "t_final_optimal": t_final_info["avg_optimal"],
            "t_final_success": t_final_info["avg_success"],
            "t_best_loss": t_best_info["avg_loss"],
            "t_best_error": t_best_info["avg_error"],
            "t_best_optimal": t_best_info["avg_optimal"],
            "t_best_success": t_best_info["avg_success"],
        }, save_path + ".final.pth")