Пример #1
0
def main(opt, dataloader_train, dataloader_val, path=None):
    # basic settings
    torch.backends.cudnn.enabled = False
    os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu_ids)[1:-1]

    if torch.cuda.is_available():
        device = "cuda"
        torch.backends.cudnn.benchmark = False
    else:
        device = "cpu"
    #####################  Create Baseline Model  ####################
    net = ModelWrapper(opt)
    if not path is None: load(net, path)
    #net.load_checkpoint()
    #net=torch.load('/root/Desktop/res50_flop73_0.752.pth')
    net = net.to(device)
    net.parallel(opt.gpu_ids)
    net.get_compress_part().train()
    ##################### Fine-tuning #########################
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(net.optimizer,
                                                        50,
                                                        eta_min=5e-6)
    #lr_scheduler=optim.lr_scheduler.StepLR(net.optimizer,10,0.8)
    reporter = Reporter(opt)
    #best_acc = net.get_eval_scores(dataloader_val)["accuracy"]
    best_acc = 0
    net._net.train()
    for epoch in range(1, opt.epoch + 1):
        reporter.log_metric("lr", net.optimizer.param_groups[0]["lr"], epoch)
        train_loss = train_epoch(net, dataloader_train, net.optimizer)
        reporter.log_metric("train_loss", train_loss, epoch)
        lr_scheduler.step()
        scores = net.get_eval_scores(dataloader_val)
        print("==> Evaluation: Epoch={} Acc={}".format(epoch, str(scores)))
        reporter.log_metric("eval_acc", scores["accuracy"], epoch)
        if scores["accuracy"] > best_acc:
            best_acc = scores["accuracy"]
        reporter.log_metric("best_acc", best_acc, epoch)
        save_checkpoints(
            scores["accuracy"],
            net._net,
            reporter,
            opt.exp_name,
            epoch,
        )
        print("==> Training epoch %d" % epoch)
Пример #2
0
def init(models: Iterable[Model], opt_config, additional_debugging_names: Optional[List[str]] = None):
    # debugger and logging
    if additional_debugging_names is None:
        additional_debugging_names = []
    debugger = Reporter([model.name for model in models] + additional_debugging_names)
    logging_fh = logging.FileHandler(debugger.file_path('logs.log'), 'w')
    logging_fh.setLevel(logging.DEBUG)
    logging_fh.setFormatter(logging.Formatter('[%(asctime)s][%(name)s][%(levelname)s] %(message)s'))
    logging.getLogger('').addHandler(logging_fh)
    # initialize plotter
    plotter = Plotter()
    # parse config
    if opt_config is not None:
        if isinstance(opt_config, str) and os.path.isfile(opt_config):
            with open(opt_config) as config_file:
                opt_config = yaml.safe_load(config_file)
                opt_config['sampling_count'] = tuple(opt_config['sampling_count'])
    logging.debug('optimization config parse complete, config:' + repr(opt_config))
    return debugger, opt_config, plotter
Пример #3
0
def gen_rndchk_models(raw_dataset_folder, random_dataset_folder, minimum,
                      maximum, result_dir):
    raw_dset = Dataset.new_from_folders(raw_dataset_folder)
    raw_dset = raw_dset.filter_min_max(minimum, maximum)

    rnd_dset = Dataset.new_from_folders(random_dataset_folder)
    rnd_dset = rnd_dset.filter_min_max(minimum, maximum)

    r = Reporter()
    for cat, tset, vset in datasets_X_random(raw_dset, rnd_dset):
        print(cat)
        model = models.C64_16_2pr_C32_4_2pr_C64_32_2pr_F_D(
            2, 8, 'softmax', 'categorical_crossentropy')
        result = Trainer(model).train(tset, vset)
        h5_path = os.path.join(result_dir, '%s_random.h5' % cat)
        tensorflow.keras.Model.save(model, h5_path)
        r.add(
            result,
            category=cat,
            **report.report_epochs(**result._asdict()),
            **report.report_elapsed(**result._asdict()),
            **report.report_metrics(**result._asdict()),
        )
    r.save_report(result_dir + "/experiments.tsv")
Пример #4
0
def main(opt):
    # basic settings
    os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu_ids)[1:-1]

    if torch.cuda.is_available():
        device = "cuda"
        torch.backends.cudnn.benchmark = True
    else:
        device = "cpu"
    ##################### Get Dataloader ####################
    dataloader_train, dataloader_val = custom_get_dataloaders(opt)
    # dummy_input is sample input of dataloaders
    if hasattr(dataloader_val, "dataset"):
        dummy_input = dataloader_val.dataset.__getitem__(0)
        dummy_input = dummy_input[0]
        dummy_input = dummy_input.unsqueeze(0)
    else:
        # for imagenet dali loader
        dummy_input = torch.rand(1, 3, 224, 224)

    #####################  Create Baseline Model  ####################
    net = ModelWrapper(opt)
    net.load_checkpoint(opt.checkpoint)
    flops_before, params_before = model_summary(net.get_compress_part(),
                                                dummy_input)

    #####################  Load Pruning Strategy ###############
    compression_scheduler = distiller.file_config(net.get_compress_part(),
                                                  net.optimizer,
                                                  opt.compress_schedule_path)

    channel_config = get_channel_config(opt.search_result,
                                        opt.strategy_id)  # pruning strategy

    compression_scheduler = random_compression_scheduler(
        compression_scheduler, channel_config)

    ###### Adaptive-BN-based Candidate Evaluation of Pruning Strategy ###
    thinning(net, compression_scheduler, input_tensor=dummy_input)

    flops_after, params_after = model_summary(net.get_compress_part(),
                                              dummy_input)
    ratio = flops_after / flops_before
    print("FLOPs ratio:", ratio)
    net = net.to(device)
    net.parallel(opt.gpu_ids)
    net.get_compress_part().train()
    with torch.no_grad():
        for index, sample in enumerate(tqdm(dataloader_train, leave=False)):
            _ = net.get_loss(sample)
            if index > 100:
                break

    strategy_score = net.get_eval_scores(dataloader_val)["accuracy"]

    print("Result file:{}, Strategy ID:{}, Evaluation score:{}".format(
        opt.search_result, opt.strategy_id, strategy_score))

    ##################### Fine-tuning #########################
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(
        net.optimizer, opt.epoch)
    reporter = Reporter(opt)
    best_acc = 0
    net._net.train()
    for epoch in range(1, opt.epoch + 1):
        reporter.log_metric("lr", net.optimizer.param_groups[0]["lr"], epoch)
        train_loss = train_epoch(
            net,
            dataloader_train,
            net.optimizer,
        )
        reporter.log_metric("train_loss", train_loss, epoch)

        lr_scheduler.step()

        scores = net.get_eval_scores(dataloader_val)
        print("==> Evaluation: Epoch={} Acc={}".format(epoch, str(scores)))

        reporter.log_metric("eval_acc", scores["accuracy"], epoch)

        if scores["accuracy"] > best_acc:
            best_acc = scores["accuracy"]
        reporter.log_metric("best_acc", best_acc, epoch)

        save_checkpoints(
            scores["accuracy"],
            net._net,
            reporter,
            opt.exp_name,
            epoch,
        )

        print("==> Training epoch %d" % epoch)
Пример #5
0
 def __init__(self, file_name):
     self._file_name = file_name
     self._recorder = Recorder(self._file_name)
     self._reporter = Reporter(self._recorder)
Пример #6
0
def train_gcn(seed, epochs, num_splits):
    # this is made intentional for lazy loading
    # for convenience in handle errors in argparsing faster
    from typing import Generator
    import random
    import os
    import sys
    import datetime as dt
    from copy import copy, deepcopy

    import numpy as np
    import pandas as pd
    import networkx as nx
    from matplotlib import pyplot
    import matplotlib
    import seaborn as sns

    import torch
    import torch_geometric as tg

    from sklearn.decomposition import PCA
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.datasets import load_iris
    from sklearn.model_selection import train_test_split
    from sklearn.naive_bayes import GaussianNB

    from mpl_proc import MplProc, ProxyObject

    from gf_dataset import GasFlowGraphs
    from locations import Coordinates
    from models import MyNet3, MyNet2, MyNet, cycle_loss, cycle_dst2
    from models import cycle_loss
    from report import FigRecord, StringRecord, Reporter

    from seed_all import seed_all

    from animator import Animator

    class LineDrawer:
        def __init__(self, *, ax: matplotlib.axes.Axes, kw_reg, kw_min,
                     kw_train, kw_test):
            self.min_diff = float('inf')
            self.ax = ax
            self.kw_reg = kw_reg
            self.kw_min = kw_min
            self.kw_train = kw_train

            class FakeHline:
                def set(self, *args, **kwargs):
                    pass

            self.kw_test = kw_test
            self.min_train_hline, self.min_test_hline = FakeHline(), FakeHline(
            )

        def append(self, *, train_loss: float, test_loss: float):
            crt_diff = abs(test_loss - train_loss)
            if crt_diff < self.min_diff:
                self.min_diff = crt_diff
                self.min_train_hline.set(**self.kw_reg)
                self.min_test_hline.set(**self.kw_reg)
                self.min_train_hline = self.ax.hlines(**self.kw_train,
                                                      **self.kw_min,
                                                      y=train_loss)
                self.min_test_hline = self.ax.hlines(**self.kw_test,
                                                     **self.kw_min,
                                                     y=test_loss)
            else:
                self.ax.hlines(**self.kw_reg, **self.kw_train, y=train_loss)
                self.ax.hlines(**self.kw_reg, **self.kw_test, y=test_loss)

    print("[ Using Seed : ", seed, " ]")
    seed_all(seed)

    mpl_proc = MplProc()

    animator = Animator(mpl_proc)
    graph_dataset = GasFlowGraphs()
    lines = LineDrawer(ax=mpl_proc.proxy_ax,
                       kw_min=dict(),
                       kw_reg=dict(linewidth=0.3, color='gray'),
                       kw_train=dict(linestyle=':', xmin=300, xmax=400),
                       kw_test=dict(xmin=400, xmax=500))

    for seed in range(num_splits):
        # torch.manual_seed(seed)
        train_graphs, test_graphs = torch.utils.data.random_split(
            graph_dataset, (len(graph_dataset) - 20, 20))

        decision_tree = DecisionTreeClassifier(min_samples_leaf=6,
                                               max_depth=4,
                                               max_leaf_nodes=12)
        X = np.concatenate([g.edge_attr.T for g in train_graphs])
        y = np.concatenate([g.y for g in train_graphs])[:, 1]
        decision_tree.fit(X, y)
        predicted = decision_tree.predict(
            np.concatenate([g.edge_attr.T for g in test_graphs]))
        target = np.array([g.y[0, 1].item() for g in test_graphs])

        test_loss = cycle_loss(target, predicted)
        train_loss = cycle_loss(y, decision_tree.predict(X))

        if abs(test_loss - train_loss) < lines.min_diff:
            train_loader = tg.data.DataLoader(train_graphs,
                                              batch_size=len(train_graphs))
            test_loader = tg.data.DataLoader(test_graphs,
                                             batch_size=len(test_graphs))

        lines.append(test_loss=test_loss, train_loss=train_loss)

    lines = LineDrawer(ax=mpl_proc.proxy_ax,
                       kw_min=dict(),
                       kw_reg=dict(linewidth=0.3, color='gray'),
                       kw_train=dict(linestyle=':', xmin=100, xmax=200),
                       kw_test=dict(xmin=200, xmax=300))

    for seed in range(num_splits):
        train_graphs, test_graphs = torch.utils.data.random_split(
            graph_dataset, (len(graph_dataset) - 20, 20))
        gnb = GaussianNB()
        X = np.concatenate([g.edge_attr.T for g in train_graphs])
        y = np.concatenate([g.y for g in train_graphs])[:, 1]
        gnb.fit(X, y)
        predicted = gnb.predict(
            np.concatenate([g.edge_attr.T for g in test_graphs]))
        target = np.array([g.y[0, 1].item() for g in test_graphs])

        lines.append(test_loss=cycle_loss(target, predicted),
                     train_loss=cycle_loss(y, gnb.predict(X)))

    mynet = MyNet3()

    # seed_all(seed)
    train_graphs, test_graphs = torch.utils.data.random_split(
        graph_dataset, (len(graph_dataset) - 20, 20))
    train_loader = tg.data.DataLoader(train_graphs,
                                      batch_size=len(train_graphs))
    test_loader = tg.data.DataLoader(test_graphs, batch_size=len(test_graphs))

    optimizer = torch.optim.Adam(mynet.parameters(), lr=0.001)

    # lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
    # torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

    def train_epochs():
        for epoch in range(epochs):
            train_loss = 0
            for batch in train_loader:
                # criterion = torch.nn.MSELoss()
                predicted = mynet(batch)

                loss = cycle_loss(predicted.flatten(), batch.y[:, 1].float())
                # loss = criterion(predicted, batch.y.float())
                train_loss += loss.item()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                # lr_scheduler.step()
            train_loss /= len(train_loader)
            yield train_loss

    class IntersectionFinder:
        def __init__(self):
            self.old = (None, None)

        def intersects(self, a: float, b: float) -> bool:
            old_a, old_b = self.old
            self.old = a, b
            if old_a is None:
                return False
            if a == b:
                return True
            return (old_a > old_b) != (a > b)

    intersections = IntersectionFinder()

    min_test_loss = float('inf')
    min_test_epoch = -1
    for epoch_no, train_loss in enumerate(train_epochs()):
        with torch.no_grad():
            test_loss = 0.0
            for batch in test_loader:
                predicted = mynet(batch)

                loss = cycle_loss(predicted.flatten(), batch.y[:, 1].float())
                test_loss += loss.item()
            test_loss /= len(test_loader)
            if test_loss < min_test_loss:
                min_test_loss = test_loss
                best = deepcopy(mynet)
                min_test_epoch = epoch_no
            if intersections.intersects(train_loss, test_loss):
                mpl_proc.proxy_ax.scatter(epoch_no,
                                          train_loss,
                                          s=100,
                                          marker='x',
                                          color='#3d89be')

        animator.add(train_loss, test_loss)

    fig: matplotlib.figure.Figure
    ax1: matplotlib.axes.Axes
    ax2: matplotlib.axes.Axes
    ax3: matplotlib.axes.Axes
    ax4: matplotlib.axes.Axes
    fig, ((ax1, ax2), (ax3, ax4)) = pyplot.subplots(ncols=2,
                                                    nrows=2,
                                                    sharey=True)

    def draw_tables(ax: matplotlib.axes.Axes, net: torch.nn.Module,
                    data: tg.data.DataLoader):
        table = np.full((13, 12), np.nan)
        for batch in data:
            predicted = net(batch)
            Y = batch.y[:, 0] - 2008
            M = batch.y[:, 1]
            table[Y, M] = cycle_dst2(M.float(),
                                     predicted.flatten().detach().numpy())**.5

        mshow = ax.matshow(table, vmin=0, vmax=6)
        ax.set(yticks=range(13), yticklabels=range(2008, 2021))
        return mshow

    mshow = draw_tables(ax1, mynet, train_loader)
    draw_tables(ax2, mynet, test_loader)
    draw_tables(ax3, best, train_loader)
    draw_tables(ax4, best, test_loader)

    fig.subplots_adjust(right=0.8)
    cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
    fig.colorbar(mshow, cax=cbar_ax)

    ax1.title.set_text('last')
    ax3.title.set_text(f'best {min_test_epoch}')

    def nxt_num() -> int:
        return sum(
            (1
             for n in os.listdir('experiments') if n.startswith('exp-1'))) + 1

    N = nxt_num()

    reporter = Reporter('report4.md')
    reporter.append(StringRecord(f'# {N}'))
    reporter.append(StringRecord(f'''
    ```
    {mynet}
    ```
    '''))
    reporter.append(FigRecord(fig, 'exp-2', f'experiments/exp-2-{N}.png'))
    reporter.append(
        FigRecord(mpl_proc.proxy_fig, 'exp-1', f'experiments/exp-1-{N}.png'))

    reporter.write()

    pyplot.show()
Пример #7
0
    #检查文件完整性
    check_config_file(src_dir)
    check_config_file(src_dir, 'SubSourceCrawlerConfig.xml')
    check_config_file(src_dir, 'webForumConfiguration.xml')

    main = MainProcess(src_name, src_dir, thread_num)
    main.prepare()
    try:
        if flag == 'y' or flag == 'Y' or flag == 'yes' or flag == 'YES':
            main.copy_subsource_resource()
            main.run_subsourcecrawler()
            main.read_finished_xml()
        else:
            check_config_file(src_dir, 'finished.xml')
            main.read_finished_xml(True)
        main.run_ingentia()

        print 'Generating error report...'
        rpt = Reporter(src_name)
        rpt.gen()
    except OSError as e:
        print e
    except KeyboardInterrupt as e:
        print e
    finally:
        #main.rm_temp()
        send_gtalk("Testing [%s] finished " % src_name)


Пример #8
0
def main(opt, channel_config, dataloader_train, dataloader_val, path):
    # basic settings
    torch.backends.cudnn.enabled = False
    os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.gpu_ids)[1:-1]

    if torch.cuda.is_available():
        device = "cuda"
        torch.backends.cudnn.benchmark = False
    else:
        device = "cpu"
    ##################### Get Dataloader ####################

    # dummy_input is sample input of dataloaders
    if hasattr(dataloader_val, "dataset"):
        dummy_input = dataloader_val.dataset.__getitem__(0)
        dummy_input = dummy_input[0]
        dummy_input = dummy_input.unsqueeze(0)
    else:
        # for imagenet dali loader
        dummy_input = torch.rand(1, 3, 224, 224)

    #####################  Create Baseline Model  ####################
    net = ModelWrapper(opt)
    load(net, path)
    #net.load_checkpoint(opt.checkpoint)
    #####################  Load Pruning Strategy ###############
    compression_scheduler = distiller.file_config(net.get_compress_part(),
                                                  net.optimizer,
                                                  opt.compress_schedule_path)
    compression_scheduler = setCompressionScheduler(compression_scheduler,
                                                    channel_config)
    ###### Adaptive-BN-based Candidate Evaluation of Pruning Strategy ###
    thinning(net, compression_scheduler, input_tensor=dummy_input)
    flops_after, params_after = model_summary(net.get_compress_part(),
                                              dummy_input)
    net = net.to(device)
    net.parallel(opt.gpu_ids)
    net.get_compress_part().train()
    t = tqdm(dataloader_train, leave=False)
    with torch.no_grad():
        for index, sample in enumerate(t):
            _ = net.get_loss(sample)
            if index > 100:
                break
    strategy_score = net.get_eval_scores(dataloader_val)["accuracy"]
    old = strategy_score
    print("Evaluation score:{}".format(strategy_score))
    ##################### Fine-tuning #########################
    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(net.optimizer,
                                                        100,
                                                        eta_min=5e-5)
    #lr_scheduler=optim.lr_scheduler.StepLR(net.optimizer,5,0.9)
    reporter = Reporter(opt)
    best_acc = strategy_score
    best_kappa = 0
    net._net.train()
    for epoch in range(1, opt.epoch + 1):
        net.confusion_matrix.reset()
        reporter.log_metric("lr", net.optimizer.param_groups[0]["lr"], epoch)
        train_loss = train_epoch(
            net,
            dataloader_train,
            net.optimizer,
        )
        reporter.log_metric("train_loss", train_loss, epoch)
        lr_scheduler.step()
        scores = net.get_eval_scores(dataloader_val)
        kappa = CaluKappa(net.confusion_matrix)
        print("==> Evaluation: Epoch={} Acc={}".format(epoch, str(scores)))
        reporter.log_metric("eval_acc", scores["accuracy"], epoch)
        reporter.log_metric("kappa", kappa, epoch)
        if scores["accuracy"] > best_acc:
            best_acc = scores["accuracy"]
            best_kappa = kappa
            save_checkpoints(
                scores["accuracy"],
                net._net,
                reporter,
                opt.exp_name,
                epoch,
            )
        reporter.log_metric("best_acc", best_acc, epoch)
        save_checkpoints(
            scores["accuracy"],
            net._net,
            reporter,
            opt.exp_name,
            epoch,
        )
        print("==> Training epoch %d" % epoch)
    """将模型转换为torch script保存"""
    ckpt_name = "{}_best.pth".format(opt.exp_name)
    load(net, os.path.join(reporter.ckpt_log_dir, ckpt_name))
    net._net.eval()
    traced_script_module = torch.jit.trace(net._net,
                                           torch.rand(1, 3, 256, 256))
    traced_script_module.save(os.path.join(reporter.log_dir, "model.pt"))
    del net
    return old, best_acc, best_kappa, flops_after, params_after