Пример #1
0
def rate(title, body, source):
    # Get score from trained classifier
    clf = Classifier()
    classifier_score = clf.predict(title, body)
    # Get score from article searcher
    related_article, search_score = searcher_score(title, body)
    #search_score = min(0.9, search_score * 2)
    if source[:7] == "http://":
        source = source[7:]
    elif source[:8] == "https://":
        source = source[8:]

    first_slash_index = source.find("/")
    if first_slash_index != -1:
        source = source[:first_slash_index]

    print(source)
    is_trusted = is_from_trusted_source(source)
    print("Is trusted ", is_trusted)
    p = 0.55
    q = 0.3

    if is_trusted:
        return 10 * (classifier_score * p +
                     (1 - p - q) + search_score * q), related_article
    else:
        return 10 * (classifier_score * 0.7 +
                     search_score * 0.3), related_article
Пример #2
0
def main():
    classifier = Classifier("data/best_model/model.meta",
                            "data/best_model/model")
    sentences = [["0", "I used to like cars."]]
    tokenized_sentences = tokenize_sentences(sentences)
    encoded_sentences, _ = encode_sentences(tokenized_sentences, 32)
    result = classifier.infer(encoded_sentences)
    print(result)
Пример #3
0
def rate(title, body):
    # Get score from trained classifier
    clf = Classifier()
    classifier_score = clf.predict(title, body)

    # Get score from article searcher
    search_score = searcher_score(title, body)

    return classifier_score * 0.8 + search_score * 0.2
Пример #4
0
def main(fmodel, fvocab, rpath, wpath):
    clf = Classifier()
    dr = DocReader()
    clf.loadmodel(fmodel)
    flist = [join(rpath,fname) for fname in listdir(rpath) if fname.endswith('conll')]
    vocab = load(gzip.open(fvocab))
    for (fidx, fname) in enumerate(flist):
        print "Processing file: {}".format(fname)
        doc = dr.read(fname, withboundary=False)
        sg = SampleGenerator(vocab)
        sg.build(doc)
        M, _ = sg.getmat()
        predlabels = clf.predict(M)
        doc = postprocess(doc, predlabels)
        writedoc(doc, fname, wpath)
Пример #5
0
def run(args):
    with open(args.model_path + 'cfg.json') as f:
        cfg = edict(json.load(f))

    device_ids = list(map(int, args.device_ids.split(',')))
    num_devices = torch.cuda.device_count()
    if num_devices < len(device_ids):
        raise Exception('#available gpu : {} < --device_ids : {}'.format(
            num_devices, len(device_ids)))
    device = torch.device('cuda:{}'.format(device_ids[0]))

    model = Classifier(cfg)
    model = DataParallel(model, device_ids=device_ids).to(device).eval()
    ckpt_path = os.path.join(args.model_path, 'best1.ckpt')
    ckpt = torch.load(ckpt_path, map_location=device)
    model.module.load_state_dict(ckpt['state_dict'])

    dataloader_test = DataLoader(ImageDataset(args.in_csv_path,
                                              cfg,
                                              mode='test'),
                                 batch_size=cfg.dev_batch_size,
                                 num_workers=args.num_workers,
                                 drop_last=False,
                                 shuffle=False)

    test_epoch(cfg, args, model, dataloader_test, args.out_csv_path)

    print('Save best is step :', ckpt['step'], 'AUC :', ckpt['auc_dev_best'])
Пример #6
0
    def _generate_next_sw_config(self, current_vertex_distributions,
                                 current_clustering, level_counter):
        """Generate sw configuration for next run base on current clustering result."""
        if level_counter == 1:
            graph_size = len(self.corpus)
            next_vertex_distributions = self._generate_initial_vertex_distributions(
            )
        else:
            graph_size = len(current_clustering)
            # create new vertex distribution
            next_vertex_distributions = _combine_vertex_distributions_given_clustering(
                current_vertex_distributions, current_clustering)

        if level_counter == 1:
            config = SWConfig(graph_size,
                              vertex_distributions=next_vertex_distributions,
                              documents=self.corpus.documents,
                              vocabularies=self.corpus.vocabularies,
                              level=level_counter)
        elif level_counter == 2:
            classifier = None
            if self._classifier_model_file is not None:
                classifier = Classifier(self._classifier_model_file)
            config = SWConfigLevel2(
                graph_size,
                vertex_distributions=next_vertex_distributions,
                documents=self.corpus.documents,
                vocabularies=self.corpus.vocabularies,
                level=level_counter,
                classifier=classifier)
        config.setup()
        return config
Пример #7
0
def sw_Process():
    [all_sentences,
     true_segment] = readingfiles.read_testing_file('2008080814')
    [
        transition_prob, length_prior, seg_num_prior
    ] = readingfiles.load_model_parameters('preprocessing/model_segmenter.txt')
    classifier = Classifier('preprocessing/model_segmenter.txt')

    segmentation_model = SegmentationModel(all_sentences, transition_prob,
                                           length_prior, seg_num_prior,
                                           classifier)
    plotter = Plotter(segmentation_model, true_segment)

    node_number = len(all_sentences)
    edges = []
    for i in range(0, node_number - 1):
        j = i + 1
        edges.append([i, j])

    print('Start Sampling')
    sw.sample(node_number,
              edges,
              segmentation_model.calculate_Qe,
              segmentation_model.target_evaluation_func,
              plotter.plot_callback,
              initial_clustering=None,
              monitor_statistics=segmentation_model.calculate_energy)
    print('Converged.')
    plotter.save()
def get_json_data():
    if request.method == 'POST':
        data = request.get_json()
        ### get city, state and country name
        url = "https://api.ipgeolocation.io/ipgeo?apiKey=2b1ee37501e64754b85f704fab4a5b82&ip=" + data[
            "ip"]
        resp = requests.get(url=url)
        info = resp.json()

        ### ==== step 1: using data to classify user as unique or not and get ID if it is
        platform = data["platformmodel"]
        OS = data["os"].split("|")[0]
        timezone = int(data["timezone"])
        user_agent = data["user_agent"].split("|")[0].lower()
        browser = ""
        browser_version = ""
        try:
            browser, browser_version = [
                i.lower() for i in data["browser"].split()
            ]
        except:
            browser = data["browser"].lower()
        channel, width, height = [
            int(i) for i in data["resolution"].split("|")
        ]
        vendor = data["vendor"]
        language = data["language"].lower()
        ### step 2 call model to get userID
        obj = Classifier()
        boolean, id = obj.test(platform, OS, browser, timezone, width, height,
                               channel, user_agent, vendor, language)
        print(boolean, id)

        ###
        data["country"] = info["country_name"]
        data["city"] = info["city"]
        data["state"] = info["state_prov"]
        data["time"] = datetime.datetime.now().isoformat(
        )  #strftime("%Y-%m-%d %H:%M:%S").isoformat()

        del data["ip"]
        print("********-------------*****")
        print("Data to send is ", data)
        print("*******--------------*****")
        es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
        res = es.index(index='my-index-000001', body=data)
    return '', 200
Пример #9
0
def main(ftrain, fdev=None, fmodel='model/model.pickle.gz'):
    # Load data
    print 'Loading training data ...'
    data = load(gzip.open(ftrain))
    M, labels = data['data'], data['labels']
    # Load dev data
    if fdev is not None:
        print 'Loading dev data ...'
        devdata = load(gzip.open(fdev))
        devM, devlabels = devdata['data'], devdata['labels']
    else:
        devM, devlabels = None, None
    # Training with specified parameters
    print 'Training ...'
    clf = Classifier()
    clf.train(M, labels, devM, devlabels)
    clf.savemodel(fmodel)
Пример #10
0
def train(opt):
    # Prepare the training corpus
    print(options.TrainLogPrefix + "Prepare the training corpus begin!")
    from datasource.input_corpus import InputCorpus
    input_corpus = InputCorpus(opt.corpus_root, encoding=opt.encoding)
    print(options.TrainLogPrefix + "Prepare the training corpus end!")

    # Get the basic tfidf features
    print(options.TrainLogPrefix + "Get the basic tfidf features begin!")
    from feature.ngram_tfidf import NgramTfidf
    ngram_tfidf = NgramTfidf(input_corpus)
    ngram_tfidf.set_stopwords('./resource/stop_words_zh.utf8.txt')
    import numpy as np
    tfidf_mat, features = ngram_tfidf.get_tfidf_mat(top_k=opt.tfidf_top_k)
    tfidf_mat = np.asarray(tfidf_mat)
    features = np.asarray(features)
    targets = np.asarray(input_corpus.get_filenames_and_targets()[1])
    print(options.TrainLogPrefix + "Get the basic tfidf features end!")

    # Do feature selection
    print(options.TrainLogPrefix + "Do feature selection begin!")
    if opt.which_filter == 'mi':
        from feature.feature_selection import MISelection as FeatureSelection
        feature_selector = FeatureSelection(tfidf_mat,
                                            targets,
                                            mi_threshold=opt.mi_threshold)
    else:
        from feature.feature_selection import GBDTSelection as FeatureSelection
        feature_selector = FeatureSelection(tfidf_mat, targets)
    boolean_selection_index = feature_selector.get_boolean_selection_lst()
    filtered_tfidf_mat = tfidf_mat[:, boolean_selection_index]
    filtered_features = features[boolean_selection_index]
    print(options.TrainLogPrefix + "Do feature selection end!")

    # Training model
    print(options.TrainLogPrefix + "Training model begin!")
    if opt.which_classifier == 'svm':
        from model.classifier import SVMClassifier as Classifier
    else:
        from model.classifier import GBDTClassifier as Classifier
    classifier_model = Classifier()
    from model.classifier import Scorer
    scorer = Scorer(classifier_model.get_model(), filtered_tfidf_mat, targets)
    print(options.TrainLogPrefix + "Training model end!")
    scorer.show_score()

    # Save the model
    model_save_path = opt.path_to_save_model
    from utils import util
    util.mkdirs('/'.join(model_save_path.split('/')[:-1]))
    classifier_model.dump(filtered_tfidf_mat, targets, model_save_path)
    print(options.TrainLogPrefix + 'model save to ' + model_save_path)

    # Save the filtered features
    filtered_features_save_path = opt.path_to_save_model + options.FeaturesSaveSuffix
    df_vec = ngram_tfidf.numDocsContainingFeatures(filtered_features)
    save_features_df(df_vec, filtered_features, len(tfidf_mat),
                     filtered_features_save_path)
def main(save_id, gen_p, train_p, eval_p, backbone_id, return_eval=False, use_bottleneck=True, file_id=""):
    print("save_id: {0}, train_p : {1}, eval_p: {2}, backbone_id: {3}, ".format(save_id, train_p, eval_p, backbone_id))

    from model_def import define_model
    model_dict = define_model(backbone_id)

    num_segments = model_dict["num_segments"]
    bottleneck_size = model_dict["bottleneck_size"]
    dense_sample = model_dict["dense_sample"]
    dense_rate = model_dict["dense_rate"]

    dir_name = os.path.join("saved_models", save_id)  # lfd_params
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)
    filename = os.path.join(dir_name, "../model")

    lfd_params = default_model_args(save_id=save_id, log_dir=dir_name,
                                    num_segments=num_segments, bottleneck_size=bottleneck_size,
                                    dense_sample=dense_sample, dense_rate=dense_rate)  # parse_model_args()

    if gen_p:
        print("Generating ITR Files")
        model = Classifier(lfd_params, filename, backbone_id, use_feature_extractor=True, use_spatial_lstm=False,
                           spatial_train=False, use_bottleneck=use_bottleneck)

        generate_iad_files(lfd_params, model, "train", backbone=backbone_id)
        generate_iad_files(lfd_params, model, "evaluation", backbone=backbone_id)

    if train_p:
        model = Classifier(lfd_params, filename, backbone_id, use_feature_extractor=False, use_spatial_lstm=True,
                                spatial_train=True, use_bottleneck=use_bottleneck)

        model = train(lfd_params, model, verbose=True, input_dtype="iad")
        model.save_model()

    if eval_p:
        model = Classifier(lfd_params, filename, backbone_id, use_feature_extractor=False, use_spatial_lstm=True,
                                spatial_train=False, use_bottleneck=use_bottleneck)

        train_df = evaluate(lfd_params, model, mode="train", input_dtype="iad")
        train_df["mode"] = ["train"]*len(train_df)
        eval_df = evaluate(lfd_params, model, mode="evaluation", verbose=True, input_dtype="iad")
        eval_df["mode"] = ["evaluation"] * len(eval_df)
        df = pd.concat([train_df, eval_df])

        if return_eval:
            return df

        df["repeat"] = ["1"]*len(df)

        out_filename = os.path.join(lfd_params.args.output_dir, "output_" + save_id + file_id+".csv")
        df.to_csv(out_filename)
        print("Output placed in: " + out_filename)
Пример #12
0
def build_model(cfg, paramsfile, device):
    model = Classifier(cfg)
    model = DataParallel(model, device_ids=device_ids).to(device).eval()
    ckpt = torch.load(paramsfile, map_location=device)
    state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt
    model.module.load_state_dict(state_dict)
    if 'step' in ckpt and 'auc_dev_best' in ckpt:
        print(f"Using model '{paramsfile}' at step: {ckpt['step']} "
              f"with AUC: {ckpt['auc_dev_best']}")
    return model
Пример #13
0
def define_model(args, lfd_params, train, app=None, suffix=None, use_bottleneck=False, backbone=False):
    backbone_id = model_dict[args.model]
    filename = make_model_name(args, lfd_params, backbone=backbone)

    if app is None:
        app = args.app

    use_feature_extractor = False
    use_spatial = False
    use_pipeline = False
    use_temporal = False

    train_feature_extractor = False
    train_spatial = False
    train_pipeline = False
    train_temporal = False

    if suffix == Suffix.BACKBONE:
        use_feature_extractor = True
        use_spatial = True
        train_feature_extractor = train
        train_spatial = train
    elif suffix == Suffix.GENERATE_IAD:
        use_feature_extractor = True
        use_spatial = False
    elif suffix == Suffix.PIPELINE:
        use_pipeline = True
        train_pipeline = train
    elif suffix in [Suffix.LINEAR, Suffix.LINEAR_IAD, Suffix.LSTM_IAD, Suffix.LSTM, Suffix.TCN]:
        use_spatial = True
        train_spatial = train
    elif suffix == Suffix.DITRL:
        use_temporal = True
        train_temporal = train
    else:
        print(f"ERROR: execute.py: suffix '{suffix}' not available")
        return None

    # classifier
    if app == 'c' or suffix in [Suffix.PIPELINE, suffix.GENERATE_IAD]:
        return Classifier(lfd_params, filename, backbone_id, suffix,
                          use_feature_extractor=use_feature_extractor, train_feature_extractor=train_feature_extractor,
                          use_bottleneck=use_bottleneck,
                          use_spatial=use_spatial, train_spatial=train_spatial,
                          use_pipeline=use_pipeline, train_pipeline=train_pipeline,
                          use_temporal=use_temporal, train_temporal=train_temporal)

    # policy_learner
    return PolicyLearner(lfd_params, filename, backbone_id, suffix,
                         use_feature_extractor=use_feature_extractor, train_feature_extractor=train_feature_extractor,
                         use_bottleneck=use_bottleneck,
                         use_spatial=use_spatial, train_spatial=train_spatial,
                         use_pipeline=use_pipeline, train_pipeline=train_pipeline,
                         use_temporal=use_temporal, train_temporal=train_temporal,
                         train_policy=train)
    def __init__(self):
        root_path = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                                 "model")

        with open(os.path.join(root_path, "config/example.json")) as f:
            cfg = edict(json.load(f))

        self.model = Classifier(cfg)
        self.model.cfg.num_classes = [1, 1, 1, 1, 1, 1]
        self.model._init_classifier()
        self.model._init_attention_map()
        self.model._init_bn()

        if torch.cuda.is_available():
            self.model = self.model.eval().cuda()
        else:
            self.model = self.model.eval().cpu()

        chkpt_path = os.path.join(root_path, "model_best.pt")
        self.model.load_state_dict(
            torch.load(chkpt_path, map_location=lambda storage, loc: storage))
Пример #15
0
def build_model(cfg, paramsfile):
    model = Classifier(cfg)
    model = model.to('cpu')
    ckpt = torch.load(paramsfile, map_location='cpu')
    state_dict = ckpt['state_dict'] if 'state_dict' in ckpt else ckpt
    model.load_state_dict(state_dict)
    if 'step' in ckpt and 'auc_dev_best' in ckpt:
        print(f"Using model '{paramsfile}' at step: {ckpt['step']} "
              f"with AUC: {ckpt['auc_dev_best']}")
    return model.eval()
Пример #16
0
def profile_model(cfg, train_file, save_prefix, use_fl):
    model = Classifier(cfg)

    dataloader_train = DataLoader(ImageDataset(train_file, cfg, mode='train'),
                                  batch_size=cfg.train_batch_size,
                                  num_workers=4,
                                  drop_last=True,
                                  shuffle=False)

    device = torch.device("cpu")

    custom_ops = {
        ExpPool: count_exp_pool,
        LinearPool: count_lin_pool,
        LogSumExpPool: count_log_sum_exp_pool,
        torch.nn.modules.activation.Sigmoid: count_sig,
    }

    for data in dataloader_train:
        inputs = data[0].to(device)
        macs, params = profile(model, inputs=(inputs, ), custom_ops=custom_ops)
        break

    steps = len(dataloader_train)
    if use_fl:
        comm_rounds = cfg.epoch
        epochs = cfg.local_epoch
        total_batches = steps * epochs * comm_rounds
    else:
        epochs = cfg.epoch
        total_batches = steps * epochs

    # When comparing MACs /FLOPs, we want the number to be implementation-agnostic and as general as possible.
    # The THOP library therefore only considers the number of multiplications and ignore all other operations.
    total_macs = macs * total_batches
    total_flops_approx = 2 * total_macs

    total_macs_formatted, _ = clever_format([total_macs, params], "%.5f")
    total_flops_approx_formatted, _ = clever_format(
        [total_flops_approx, params], "%.5f")

    print(f"Total MACs: {total_macs_formatted}")
    print(f"Approximate Total FLOPs: {total_flops_approx_formatted}")

    # Save results to file
    with open(save_prefix, "w") as f:
        f.write(f"Total MACs: {total_macs_formatted}\n")
        f.write(f"Approximate Total FLOPs: {total_flops_approx_formatted}")
Пример #17
0
def main(ftrain, fdev=None, fmodel='model/model.pickle.gz'):
    # Load data
    print 'Loading training data ...'
    data = load(gzip.open(ftrain))
    M, labels = data['data'], data['labels']
    # Load dev data
    if fdev is not None:
        print 'Loading dev data ...'
        devdata = load(gzip.open(fdev))
        devM, devlabels = devdata['data'], devdata['labels']
    else:
        devM, devlabels = None, None
    # Training with specified parameters
    print 'Training ...'
    clf = Classifier()
    clf.train(M, labels, devM, devlabels)
    clf.savemodel(fmodel)
def exec_classifier_backbone(args):

    # Train
    if args.eval_only:
        model = Classifier(lfd_params,
                           filename,
                           backbone_id,
                           use_feature_extractor=False,
                           use_spatial=True,
                           spatial_train=True,
                           use_bottleneck=use_bottleneck)

        model = train(lfd_params, model, verbose=True)
        model.save_model()

    # Evaluate
    model = Classifier(lfd_params,
                       filename,
                       backbone_id,
                       use_feature_extractor=False,
                       use_spatial=True,
                       spatial_train=False,
                       use_bottleneck=use_bottleneck)

    train_df = evaluate(lfd_params, model, mode="train")
    train_df["mode"] = ["train"] * len(train_df)
    eval_df = evaluate(lfd_params, model, mode="evaluation", verbose=True)
    eval_df["mode"] = ["evaluation"] * len(eval_df)
    df = pd.concat([train_df, eval_df])
    df["repeat"] = ["1"] * len(df)

    out_filename = os.path.join(lfd_params.args.output_dir,
                                "output_" + save_id + "_spatial.csv")
    df.to_csv(out_filename)
    print("Output placed in: " + out_filename)

    return 0
Пример #19
0
def main():
    if args.dataset == 'ChestXray-NIHCC':
        if args.no_fiding:
            classes = [
                'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration',
                'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation',
                'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening',
                'Hernia', 'No Fiding'
            ]
        else:
            classes = [
                'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration',
                'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation',
                'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening',
                'Hernia'
            ]
    elif args.dataset == 'CheXpert-v1.0-small':
        classes = [
            'No Finding', 'Enlarged Cardiomediastinum', 'Cardiomegaly',
            'Lung Opacity', 'Lung Lesion', 'Edema', 'Consolidation',
            'Pneumonia', 'Atelectasis', 'Pneumothorax', 'Pleural Effusion',
            'Pleural Other', 'Fracture', 'Support Devices'
        ]
    else:
        print('--dataset incorrect')
        return

    torch.manual_seed(args.seed)
    use_gpu = torch.cuda.is_available()
    if args.use_cpu: use_gpu = False

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu_devices))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU (GPU is highly recommended)")

    pin_memory = True if use_gpu else False

    print("Initializing dataset: {}".format(args.dataset))

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Resize(556),
            transforms.CenterCrop(512),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        'valid':
        transforms.Compose([
            transforms.Resize(556),
            transforms.CenterCrop(512),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
    }

    datasetTrain = DatasetGenerator(path_base=args.base_dir,
                                    dataset_file='train',
                                    transform=data_transforms['train'],
                                    dataset_=args.dataset,
                                    no_fiding=args.no_fiding)

    datasetVal = DatasetGenerator(path_base=args.base_dir,
                                  dataset_file='valid',
                                  transform=data_transforms['valid'],
                                  dataset_=args.dataset,
                                  no_fiding=args.no_fiding)

    train_loader = DataLoader(dataset=datasetTrain,
                              batch_size=args.train_batch,
                              shuffle=args.train_shuffle,
                              num_workers=args.workers,
                              pin_memory=pin_memory)
    valid_loader = DataLoader(dataset=datasetVal,
                              batch_size=args.valid_batch,
                              shuffle=args.valid_shuffle,
                              num_workers=args.workers,
                              pin_memory=pin_memory)

    with open(args.infos_densenet) as f:
        cfg = edict(json.load(f))

    print('Initializing densenet branch')
    model_dense = Classifier(cfg)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model_dense.parameters()) / 1000000.0))

    with open(args.infos_resnet) as f:
        cfg = edict(json.load(f))

    print('Initializing resnet branch')
    model_res = Classifier(cfg)
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model_res.parameters()) / 1000000.0))

    print('Initializing fusion branch')
    model_fusion = Fusion(input_size=7424, output_size=len(classes))
    print("Model size: {:.5f}M".format(
        sum(p.numel() for p in model_fusion.parameters()) / 1000000.0))

    print("Initializing optimizers")
    optimizer_dense = init_optim(args.optim, model_dense.parameters(),
                                 args.learning_rate, args.weight_decay,
                                 args.momentum)
    optimizer_res = init_optim(args.optim, model_res.parameters(),
                               args.learning_rate, args.weight_decay,
                               args.momentum)
    optimizer_fusion = init_optim(args.optim, model_fusion.parameters(),
                                  args.learning_rate, args.weight_decay,
                                  args.momentum)

    criterion = nn.BCELoss()

    print("Initializing scheduler: {}".format(args.scheduler))
    if args.stepsize > 0:
        scheduler_dense = init_scheduler(args.scheduler, optimizer_dense,
                                         args.stepsize, args.gamma)
        scheduler_res = init_scheduler(args.scheduler, optimizer_res,
                                       args.stepsize, args.gamma)
        scheduler_fusion = init_scheduler(args.scheduler, optimizer_fusion,
                                          args.stepsize, args.gamma)

    start_epoch = args.start_epoch
    best_loss = np.inf

    if args.resume_densenet:
        checkpoint_dense = torch.load(args.resume_densenet)
        model_dense.load_state_dict(checkpoint_dense['state_dict'])
        epoch_dense = checkpoint_dense['epoch']
        print("Resuming densenet from epoch {}".format(epoch_dense + 1))

    if args.resume_resnet:
        checkpoint_res = torch.load(args.resume_resnet)
        model_res.load_state_dict(checkpoint_res['state_dict'])
        epoch_res = checkpoint_res['epoch']
        print("Resuming resnet from epoch {}".format(epoch_res + 1))

    if args.resume_fusion:
        checkpoint_fusion = torch.load(args.resume_fusion)
        model_fusion.load_state_dict(checkpoint_fusion['state_dict'])
        epoch_fusion = checkpoint_fusion['epoch']
        print("Resuming fusion from epoch {}".format(epoch_fusion + 1))

    if use_gpu:
        model_dense = nn.DataParallel(model_dense).cuda()
        model_res = nn.DataParallel(model_res).cuda()
        model_fusion = nn.DataParallel(model_fusion).cuda()

    if args.evaluate:
        print("Evaluate only")
        if args.step == 1:
            valid('step1', model_dense, model_res, model_fusion, valid_loader,
                  criterion, args.print_freq, classes, cfg,
                  data_transforms['valid'])
        elif args.step == 2:
            valid('step2', model_dense, model_res, model_fusion, valid_loader,
                  criterion, args.print_freq, classes, cfg,
                  data_transforms['valid'])
        elif args.step == 3:
            valid('step3', model_dense, model_res, model_fusion, valid_loader,
                  criterion, args.print_freq, classes, cfg,
                  data_transforms['valid'])
        else:
            print('args.step not found')
        return

    if args.step == 1:
        #################################### DENSENET BRANCH INIT ##########################################
        start_time = time.time()
        train_time = 0
        best_epoch = 0
        print("==> Start training of densenet branch")

        for p in model_dense.parameters():
            p.requires_grad = True

        for p in model_res.parameters():
            p.requires_grad = False

        for p in model_fusion.parameters():
            p.requires_grad = True

        for epoch in range(start_epoch, args.max_epoch):
            start_train_time = time.time()
            train('step1', model_dense, model_res, model_fusion, train_loader,
                  optimizer_dense, optimizer_res, optimizer_fusion, criterion,
                  args.print_freq, epoch, args.max_epoch, cfg,
                  data_transforms['train'])
            train_time += round(time.time() - start_train_time)
            if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
                    epoch + 1) == args.max_epoch:
                print("==> Validation")
                loss_val = valid('step1', model_dense, model_res, model_fusion,
                                 valid_loader, criterion, args.print_freq,
                                 classes, cfg, data_transforms['valid'])

                if args.stepsize > 0:
                    if args.scheduler == 'ReduceLROnPlateau':
                        scheduler_dense.step(loss_val)
                        scheduler_fusion.step(loss_val)
                    else:
                        scheduler_dense.step()
                        scheduler_fusion.step()

                is_best = loss_val < best_loss
                if is_best:
                    best_loss = loss_val
                    best_epoch = epoch + 1

                if use_gpu:
                    state_dict_dense = model_dense.module.state_dict()
                    state_dict_fusion = model_fusion.module.state_dict()
                else:
                    state_dict_dense = model_dense.state_dict()
                    state_dict_fusion = model_fusion.state_dict()

                save_checkpoint(
                    {
                        'state_dict': state_dict_dense,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'dense')
                save_checkpoint(
                    {
                        'state_dict': state_dict_fusion,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'fusion')

        print("==> Best Validation Loss {:.4%}, achieved at epoch {}".format(
            best_loss, best_epoch))

        elapsed = round(time.time() - start_time)
        elapsed = str(datetime.timedelta(seconds=elapsed))
        train_time = str(datetime.timedelta(seconds=train_time))
        print(
            "Dense branch finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}."
            .format(elapsed, train_time))
        #################################### DENSENET BRANCH END ##########################################

    elif args.step == 2:
        #################################### RESNET BRANCH INIT ##########################################
        start_time = time.time()
        train_time = 0
        best_epoch = 0
        print("==> Start training of local branch")

        for p in model_dense.parameters():
            p.requires_grad = False

        for p in model_res.parameters():
            p.requires_grad = True

        for p in model_fusion.parameters():
            p.requires_grad = True

        for epoch in range(start_epoch, args.max_epoch):
            start_train_time = time.time()
            train('step2', model_dense, model_res, model_fusion, train_loader,
                  optimizer_dense, optimizer_res, optimizer_fusion, criterion,
                  args.print_freq, epoch, args.max_epoch, cfg,
                  data_transforms['train'])
            train_time += round(time.time() - start_train_time)
            if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
                    epoch + 1) == args.max_epoch:
                print("==> Validation")
                loss_val = valid('step2', model_dense, model_res, model_fusion,
                                 valid_loader, criterion, args.print_freq,
                                 classes, cfg, data_transforms['valid'])

                if args.stepsize > 0:
                    if args.scheduler == 'ReduceLROnPlateau':
                        scheduler_res.step(loss_val)
                        scheduler_fusion.step(loss_val)
                    else:
                        scheduler_res.step()
                        scheduler_fusion.step()

                is_best = loss_val < best_loss
                if is_best:
                    best_loss = loss_val
                    best_epoch = epoch + 1

                if use_gpu:
                    state_dict_res = model_res.module.state_dict()
                    state_dict_fusion = model_fusion.module.state_dict()
                else:
                    state_dict_res = model_res.state_dict()
                    state_dict_fusion = model_fusion.state_dict()

                save_checkpoint(
                    {
                        'state_dict': state_dict_res,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'res')
                save_checkpoint(
                    {
                        'state_dict': state_dict_fusion,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'fusion')

        print("==> Best Validation Loss {:.4%}, achieved at epoch {}".format(
            best_loss, best_epoch))

        elapsed = round(time.time() - start_time)
        elapsed = str(datetime.timedelta(seconds=elapsed))
        train_time = str(datetime.timedelta(seconds=train_time))
        print(
            "Resnet branch finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}."
            .format(elapsed, train_time))
        #################################### RESNET BRANCH END ##########################################

    elif args.step == 3:
        #################################### FUSION BRANCH INIT ##########################################
        start_time = time.time()
        train_time = 0
        best_epoch = 0
        print("==> Start training of fusion branch")

        for p in model_dense.parameters():
            p.requires_grad = True

        for p in model_res.parameters():
            p.requires_grad = True

        for p in model_fusion.parameters():
            p.requires_grad = True

        for epoch in range(start_epoch, args.max_epoch):
            start_train_time = time.time()
            train('step3', model_dense, model_res, model_fusion, train_loader,
                  optimizer_dense, optimizer_res, optimizer_fusion, criterion,
                  args.print_freq, epoch, args.max_epoch, cfg,
                  data_transforms['train'])
            train_time += round(time.time() - start_train_time)
            if args.eval_step > 0 and (epoch + 1) % args.eval_step == 0 or (
                    epoch + 1) == args.max_epoch:
                print("==> Validation")
                loss_val = valid('step3', model_dense, model_res, model_fusion,
                                 valid_loader, criterion, args.print_freq,
                                 classes, cfg, data_transforms['valid'])

                if args.stepsize > 0:
                    if args.scheduler == 'ReduceLROnPlateau':
                        scheduler_dense.step(loss_val)
                        scheduler_res.step(loss_val)
                        scheduler_fusion.step(loss_val)
                    else:
                        scheduler_dense.step()
                        scheduler_res.step()
                        scheduler_fusion.step()

                is_best = loss_val < best_loss
                if is_best:
                    best_loss = loss_val
                    best_epoch = epoch + 1

                if use_gpu:
                    state_dict_dense = model_dense.module.state_dict()
                    state_dict_res = model_res.module.state_dict()
                    state_dict_fusion = model_fusion.module.state_dict()
                else:
                    state_dict_dense = model_dense.state_dict()
                    state_dict_res = model_res.state_dict()
                    state_dict_fusion = model_fusion.state_dict()

                save_checkpoint(
                    {
                        'state_dict': state_dict_dense,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'dense')
                save_checkpoint(
                    {
                        'state_dict': state_dict_res,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'res')
                save_checkpoint(
                    {
                        'state_dict': state_dict_fusion,
                        'loss': best_loss,
                        'epoch': epoch,
                    }, is_best, args.save_dir,
                    'checkpoint_ep' + str(epoch + 1) + '.pth.tar', 'fusion')

        print("==> Best Validation Loss {:.4%}, achieved at epoch {}".format(
            best_loss, best_epoch))

        elapsed = round(time.time() - start_time)
        elapsed = str(datetime.timedelta(seconds=elapsed))
        train_time = str(datetime.timedelta(seconds=train_time))
        print(
            "Fusion branch finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}."
            .format(elapsed, train_time))
        #################################### FUSION BRANCH END ##########################################

    else:
        print('args.step not found')
Пример #20
0
pipeTest = DataPipeline(
    imagePath=testpath,
    imageChannels=imageChannels,
    searchPatternImage='*/*.jpg',
    augmentations=[],
)
pipeTest.build(imageSize=(imageHeight, imageWidth),
               batchSize=batchSize,
               shuffle_buffer_size=200,
               shuffle=True)

# Build model
classy = Classifier(
    numClasses=pipeTest.nClasses,
    imageWidth=imageWidth,
    imageHeight=imageHeight,
    imageChannels=imageChannels,
    learnRate=0.001,
    lastTrainableLayers=2,
)

logging.info(classy.model.summary())

classy.model.load_weights(os.path.join(outputDir, "weights.h5"))
logging.info("Weights loaded!")

intermediate = Model(inputs=[classy.model.input],
                     outputs=classy.model.layers[-2].output)

print(intermediate.summary())
for it, (imgs, labels) in enumerate(pipeTest.ds.take(31)):
    imgsCaf = 255 * imgs - tf.constant([123.68, 116.779, 103.939])
Пример #21
0
    searchPatternImage='*/*.jpg',
    augmentations=[],
)

pipeTest.build(imageSize=(imageHeight, imageWidth),
               batchSize=batchSize,
               shuffle_buffer_size=600,
               shuffle=False)

# Build model
assert pipeTest.nClasses == pipeTrain.nClasses, "Number of classes must match!"

classy = Classifier(
    numClasses=pipeTrain.nClasses,
    imageWidth=imageWidth,
    imageHeight=imageHeight,
    imageChannels=imageChannels,
    learnRate=0.001,
    lastTrainableLayers=2,
)

logging.info(classy.model.summary())

try:
    classy.model.load_weights(os.path.join(outputDir, "weights.h5"))
    logging.info("Weights loaded!")
except:
    logging.warning("Couldnt load weights")

# Model Training
subitsTrain = np.ceil(pipeTrain.ndata / batchSize)
subitsTest = np.ceil(pipeTest.ndata / batchSize)
Пример #22
0
from DataHandler import *
from model.nets import *
from model.train_functions import *
from model.classifier import Classifier
from scipy import misc
import numpy as np
import pandas as pd

params = read_params('Project_path/params.txt')
data = Cifar10_Data()
data.load_and_split(params['input_data_path'], params['labels_path'])
#xtrain=data.train_idxs
#xval=data.val_idxs
#batch = data.get_train_feed_dict('X','y','train',128)
#%%
cls = Classifier(params, data.Ndims, net=convnet2)
cls.train(data, epochs=10, batch_size=128)
#cls.load_weights_from_checkpoint(params['pre-traind_model_path'])

#%% Get The Test Data And Classify It
test_path = params['test_data_path']
labels = []
for batch_num in range(600):
    X = []
    for idx in range(500):
        img_path = test_path + str(batch_num * 500 + idx + 1) + '.png'
        X.append(misc.imread(img_path))
    X = np.array(X)
    X = (X - data.mean) / (data.std + 1e-7)
    preds = cls.predict(X)
    preds = np.argmax(preds, axis=1)
Пример #23
0
def run(args):
    with open(args.cfg_path) as f:
        cfg = edict(json.load(f))
        if args.verbose is True:
            print(json.dumps(cfg, indent=4))

    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    if args.logtofile is True:
        logging.basicConfig(filename=args.save_path + '/log.txt',
                            filemode="w",
                            level=logging.INFO)
    else:
        logging.basicConfig(level=logging.INFO)

    if not args.resume:
        with open(os.path.join(args.save_path, 'cfg.json'), 'w') as f:
            json.dump(cfg, f, indent=1)

    device_ids = list(map(int, args.device_ids.split(',')))
    num_devices = torch.cuda.device_count()
    if num_devices < len(device_ids):
        raise Exception('#available gpu : {} < --device_ids : {}'.format(
            num_devices, len(device_ids)))
    device = torch.device('cuda:{}'.format(device_ids[0]))

    model = Classifier(cfg)
    if args.verbose is True:
        from torchsummary import summary
        if cfg.fix_ratio:
            h, w = cfg.long_side, cfg.long_side
        else:
            h, w = cfg.height, cfg.width
        summary(model.to(device), (3, h, w))
    model = DataParallel(model, device_ids=device_ids).to(device).train()
    if args.pre_train is not None:
        if os.path.exists(args.pre_train):
            ckpt = torch.load(args.pre_train, map_location=device)
            model.module.load_state_dict(ckpt)
    optimizer = get_optimizer(model.parameters(), cfg)

    src_folder = os.path.dirname(os.path.abspath(__file__)) + '/../'
    dst_folder = os.path.join(args.save_path, 'classification')
    # rc, size = subprocess.getstatusoutput('dir --max-depth=0 %s | cut -f1'
    #                                       % src_folder)
    # if rc != 0:
    #     print(size)
    #     raise Exception('Copy folder error : {}'.format(rc))
    # rc, err_msg = subprocess.getstatusoutput('cp -R %s %s' % (src_folder,
    #                                                           dst_folder))
    # if rc != 0:
    #     raise Exception('copy folder error : {}'.format(err_msg))

    copyfile(cfg.train_csv, os.path.join(args.save_path, 'train.csv'))
    copyfile(cfg.dev_csv, os.path.join(args.save_path, 'valid.csv'))

    dataloader_train = DataLoader(ImageDataset(cfg.train_csv,
                                               cfg,
                                               mode='train'),
                                  batch_size=cfg.train_batch_size,
                                  num_workers=args.num_workers,
                                  drop_last=True,
                                  shuffle=True)
    dataloader_dev = DataLoader(ImageDataset(cfg.dev_csv, cfg, mode='dev'),
                                batch_size=cfg.dev_batch_size,
                                num_workers=args.num_workers,
                                drop_last=False,
                                shuffle=False)
    dev_header = dataloader_dev.dataset._label_header

    summary_train = {'epoch': 0, 'step': 0}
    summary_dev = {'loss': float('inf'), 'acc': 0.0}
    summary_writer = SummaryWriter(args.save_path)
    epoch_start = 0
    best_dict = {
        "acc_dev_best": 0.0,
        "auc_dev_best": 0.0,
        "loss_dev_best": float('inf'),
        "fused_dev_best": 0.0,
        "best_idx": 1
    }

    if args.resume:
        ckpt_path = os.path.join(args.save_path, 'train.ckpt')
        ckpt = torch.load(ckpt_path, map_location=device)
        model.module.load_state_dict(ckpt['state_dict'])
        summary_train = {'epoch': ckpt['epoch'], 'step': ckpt['step']}
        best_dict['acc_dev_best'] = ckpt['acc_dev_best']
        best_dict['loss_dev_best'] = ckpt['loss_dev_best']
        best_dict['auc_dev_best'] = ckpt['auc_dev_best']
        epoch_start = ckpt['epoch']

    for epoch in range(epoch_start, cfg.epoch):
        lr = lr_schedule(cfg.lr, cfg.lr_factor, summary_train['epoch'],
                         cfg.lr_epochs)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        summary_train, best_dict = train_epoch(summary_train, summary_dev, cfg,
                                               args, model, dataloader_train,
                                               dataloader_dev, optimizer,
                                               summary_writer, best_dict,
                                               dev_header)

        time_now = time.time()
        summary_dev, predlist, true_list = test_epoch(summary_dev, cfg, args,
                                                      model, dataloader_dev)
        time_spent = time.time() - time_now

        auclist = []
        for i in range(len(cfg.num_classes)):
            y_pred = predlist[i]
            y_true = true_list[i]
            fpr, tpr, thresholds = metrics.roc_curve(y_true,
                                                     y_pred,
                                                     pos_label=1)
            auc = metrics.auc(fpr, tpr)
            auclist.append(auc)
        summary_dev['auc'] = np.array(auclist)

        loss_dev_str = ' '.join(
            map(lambda x: '{:.5f}'.format(x), summary_dev['loss']))
        acc_dev_str = ' '.join(
            map(lambda x: '{:.3f}'.format(x), summary_dev['acc']))
        auc_dev_str = ' '.join(
            map(lambda x: '{:.3f}'.format(x), summary_dev['auc']))

        logging.info('{}, Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},'
                     'Mean auc: {:.3f} '
                     'Run Time : {:.2f} sec'.format(
                         time.strftime("%Y-%m-%d %H:%M:%S"),
                         summary_train['step'], loss_dev_str, acc_dev_str,
                         auc_dev_str, summary_dev['auc'].mean(), time_spent))

        for t in range(len(cfg.num_classes)):
            summary_writer.add_scalar('dev/loss_{}'.format(dev_header[t]),
                                      summary_dev['loss'][t],
                                      summary_train['step'])
            summary_writer.add_scalar('dev/acc_{}'.format(dev_header[t]),
                                      summary_dev['acc'][t],
                                      summary_train['step'])
            summary_writer.add_scalar('dev/auc_{}'.format(dev_header[t]),
                                      summary_dev['auc'][t],
                                      summary_train['step'])

        save_best = False

        mean_acc = summary_dev['acc'][cfg.save_index].mean()
        if mean_acc >= best_dict['acc_dev_best']:
            best_dict['acc_dev_best'] = mean_acc
            if cfg.best_target == 'acc':
                save_best = True

        mean_auc = summary_dev['auc'][cfg.save_index].mean()
        if mean_auc >= best_dict['auc_dev_best']:
            best_dict['auc_dev_best'] = mean_auc
            if cfg.best_target == 'auc':
                save_best = True

        mean_loss = summary_dev['loss'][cfg.save_index].mean()
        if mean_loss <= best_dict['loss_dev_best']:
            best_dict['loss_dev_best'] = mean_loss
            if cfg.best_target == 'loss':
                save_best = True

        if save_best:
            torch.save(
                {
                    'epoch': summary_train['epoch'],
                    'step': summary_train['step'],
                    'acc_dev_best': best_dict['acc_dev_best'],
                    'auc_dev_best': best_dict['auc_dev_best'],
                    'loss_dev_best': best_dict['loss_dev_best'],
                    'state_dict': model.module.state_dict()
                },
                os.path.join(args.save_path,
                             'best{}.ckpt'.format(best_dict['best_idx'])))
            best_dict['best_idx'] += 1
            if best_dict['best_idx'] > cfg.save_top_k:
                best_dict['best_idx'] = 1
            logging.info('{}, Best, Step : {}, Loss : {}, Acc : {},'
                         'Auc :{},Best Auc : {:.3f}'.format(
                             time.strftime("%Y-%m-%d %H:%M:%S"),
                             summary_train['step'], loss_dev_str, acc_dev_str,
                             auc_dev_str, best_dict['auc_dev_best']))
        torch.save(
            {
                'epoch': summary_train['epoch'],
                'step': summary_train['step'],
                'acc_dev_best': best_dict['acc_dev_best'],
                'auc_dev_best': best_dict['auc_dev_best'],
                'loss_dev_best': best_dict['loss_dev_best'],
                'state_dict': model.module.state_dict()
            }, os.path.join(args.save_path, 'train.ckpt'))
    summary_writer.close()
Пример #24
0
def run(args, val_h5_file):
    with open(args.cfg_path) as f:
        cfg = edict(json.load(f))
        if args.verbose is True:
            print(json.dumps(cfg, indent=4))

    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    if args.logtofile is True:
        logging.basicConfig(filename=args.save_path + '/log.txt',
                            filemode="w",
                            level=logging.INFO)
    else:
        logging.basicConfig(level=logging.INFO)

    if not args.resume:
        with open(os.path.join(args.save_path, 'cfg.json'), 'w') as f:
            json.dump(cfg, f, indent=1)

    device_ids = list(map(int, args.device_ids.split(',')))
    num_devices = torch.cuda.device_count()
    if num_devices < len(device_ids):
        raise Exception('#available gpu : {} < --device_ids : {}'.format(
            num_devices, len(device_ids)))
    device = torch.device('cuda:{}'.format(device_ids[0]))

    model = Classifier(cfg)
    if args.verbose is True:
        from torchsummary import summary
        if cfg.fix_ratio:
            h, w = cfg.long_side, cfg.long_side
        else:
            h, w = cfg.height, cfg.width
        summary(model.to(device), (3, h, w))
    model = DataParallel(model, device_ids=device_ids).to(device).train()
    if args.pre_train is not None:
        if os.path.exists(args.pre_train):
            ckpt = torch.load(args.pre_train, map_location=device)
            model.module.load_state_dict(ckpt)
    optimizer = get_optimizer(model.parameters(), cfg)

    #src_folder = os.path.dirname(os.path.abspath(__file__)) + '/../'
    #dst_folder = os.path.join(args.save_path, 'classification')
    #rc, size = subprocess.getstatusoutput('du --max-depth=0 %s | cut -f1' % src_folder)
    #if rc != 0: raise Exception('Copy folder error : {}'.format(rc))
    #rc, err_msg = subprocess.getstatusoutput('cp -R %s %s' % (src_folder, dst_folder))
    #if rc != 0: raise Exception('copy folder error : {}'.format(err_msg))
    #copyfile(cfg.train_csv, os.path.join(args.save_path, 'train.csv'))
    #copyfile(cfg.dev_csv, os.path.join(args.save_path, 'dev.csv'))
    # np_train_h5_file = np.array(train_h5_file['train'][:10000], dtype=np.uint8)
    # np_t_u_ones = np.array(train_h5_file['train_u_ones'][:10000], dtype=np.int8)
    # np_t_u_zeros = np.array(train_h5_file['train_u_zeros'][:10000], dtype=np.int8)
    # np_t_u_random = np.array(train_h5_file['train_u_random'][:10000], dtype=np.int8)

    np_val_h5_file = np.array(val_h5_file['val'], dtype=np.uint8)
    np_v_u_ones = np.array(val_h5_file['val_u_ones'], dtype=np.int8)
    np_v_u_zeros = np.array(val_h5_file['val_u_zeros'], dtype=np.int8)
    np_v_u_random = np.array(val_h5_file['val_u_random'], dtype=np.int8)

    train_labels = {}
    with h5py.File(f'{args.train_chunks}/train_labels.h5', 'r') as fp:
        train_labels['train_u_ones'] = np.array(fp['train_u_ones'],
                                                dtype=np.int8)
        train_labels['train_u_zeros'] = np.array(fp['train_u_zeros'],
                                                 dtype=np.int8)
        train_labels['train_u_random'] = np.array(fp['train_u_random'],
                                                  dtype=np.int8)
    np_train_samples = None
    for i in range(args.chunk_count):
        with open(f'{args.train_chunks}/chexpert_dset_chunk_{i+1}.npy',
                  'rb') as f:
            if np_train_samples is None:
                np_train_samples = np.load(f)
            else:
                np_train_samples = np.concatenate(
                    (np_train_samples, np.load(f)))

    dataloader_train = DataLoader(ImageDataset(
        [np_train_samples, train_labels], cfg, mode='train'),
                                  batch_size=cfg.train_batch_size,
                                  num_workers=args.num_workers,
                                  drop_last=True,
                                  shuffle=True)

    dataloader_dev = DataLoader(ImageDataset(
        [np_val_h5_file, np_v_u_zeros, np_v_u_ones, np_v_u_random],
        cfg,
        mode='val'),
                                batch_size=cfg.dev_batch_size,
                                num_workers=args.num_workers,
                                drop_last=False,
                                shuffle=False)
    #dev_header = dataloader_dev.dataset._label_header
    dev_header = [
        'No_Finding', 'Enlarged_Cardiomediastinum', 'Cardiomegaly',
        'Lung_Opacity', 'Lung_Lesion', 'Edema', 'Consolidation', 'Pneumonia',
        'Atelectasis', 'Pneumothorax', 'Pleural_Effusion', 'Pleural_Other',
        'Fracture', 'Support_Devices'
    ]
    print(f'dataloaders are set. train count: {np_train_samples.shape[0]}')
    logging.info("[LOGGING TEST]: dataloaders are set...")
    summary_train = {'epoch': 0, 'step': 0}
    summary_dev = {'loss': float('inf'), 'acc': 0.0}
    summary_writer = SummaryWriter(args.save_path)
    epoch_start = 0
    best_dict = {
        "acc_dev_best": 0.0,
        "auc_dev_best": 0.0,
        "loss_dev_best": float('inf'),
        "fused_dev_best": 0.0,
        "best_idx": 1
    }

    if args.resume:
        ckpt_path = os.path.join(args.save_path, 'train.ckpt')
        ckpt = torch.load(ckpt_path, map_location=device)
        model.module.load_state_dict(ckpt['state_dict'])
        summary_train = {'epoch': ckpt['epoch'], 'step': ckpt['step']}
        best_dict['acc_dev_best'] = ckpt['acc_dev_best']
        best_dict['loss_dev_best'] = ckpt['loss_dev_best']
        best_dict['auc_dev_best'] = ckpt['auc_dev_best']
        epoch_start = ckpt['epoch']

    q_list = []
    k_list = []
    for i in range(len(cfg.num_classes)):
        q_list.append(args.q)
        k_list.append(args.k)

    k_list = torch.FloatTensor(k_list)
    q_list = torch.FloatTensor(q_list)
    loss_sq_hinge = MultiClassSquaredHingeLoss()
    print('Everything is set starting to train...')
    before = datetime.datetime.now()
    for epoch in range(epoch_start, cfg.epoch):
        lr = lr_schedule(cfg.lr, cfg.lr_factor, summary_train['epoch'],
                         cfg.lr_epochs)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

        summary_train, best_dict = train_epoch(summary_train, summary_dev, cfg,
                                               args, model, dataloader_train,
                                               dataloader_dev, optimizer,
                                               summary_writer, best_dict,
                                               dev_header, q_list, k_list,
                                               loss_sq_hinge)

        time_now = time.time()
        summary_dev, predlist, true_list = test_epoch(summary_dev, cfg, args,
                                                      model, dataloader_dev,
                                                      q_list, k_list,
                                                      loss_sq_hinge)
        time_spent = time.time() - time_now

        auclist = []
        for i in range(len(cfg.num_classes)):
            y_pred = predlist[i]
            y_true = true_list[i]
            fpr, tpr, thresholds = metrics.roc_curve(y_true,
                                                     y_pred,
                                                     pos_label=1)
            auc = metrics.auc(fpr, tpr)
            auclist.append(auc)
        summary_dev['auc'] = np.array(auclist)

        loss_dev_str = ' '.join(
            map(lambda x: '{:.5f}'.format(x), summary_dev['loss']))
        acc_dev_str = ' '.join(
            map(lambda x: '{:.3f}'.format(x), summary_dev['acc']))
        auc_dev_str = ' '.join(
            map(lambda x: '{:.3f}'.format(x), summary_dev['auc']))

        logging.info('{}, Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},'
                     'Mean auc: {:.3f} '
                     'Run Time : {:.2f} sec'.format(
                         time.strftime("%Y-%m-%d %H:%M:%S"),
                         summary_train['step'], loss_dev_str, acc_dev_str,
                         auc_dev_str, summary_dev['auc'].mean(), time_spent))

        for t in range(len(cfg.num_classes)):
            summary_writer.add_scalar('dev/loss_{}'.format(dev_header[t]),
                                      summary_dev['loss'][t],
                                      summary_train['step'])
            summary_writer.add_scalar('dev/acc_{}'.format(dev_header[t]),
                                      summary_dev['acc'][t],
                                      summary_train['step'])
            summary_writer.add_scalar('dev/auc_{}'.format(dev_header[t]),
                                      summary_dev['auc'][t],
                                      summary_train['step'])

        save_best = False

        mean_acc = summary_dev['acc'][cfg.save_index].mean()
        if mean_acc >= best_dict['acc_dev_best']:
            best_dict['acc_dev_best'] = mean_acc
            if cfg.best_target == 'acc':
                save_best = True

        mean_auc = summary_dev['auc'][cfg.save_index].mean()
        if mean_auc >= best_dict['auc_dev_best']:
            best_dict['auc_dev_best'] = mean_auc
            if cfg.best_target == 'auc':
                save_best = True

        mean_loss = summary_dev['loss'][cfg.save_index].mean()
        if mean_loss <= best_dict['loss_dev_best']:
            best_dict['loss_dev_best'] = mean_loss
            if cfg.best_target == 'loss':
                save_best = True

        if save_best:
            torch.save(
                {
                    'epoch': summary_train['epoch'],
                    'step': summary_train['step'],
                    'acc_dev_best': best_dict['acc_dev_best'],
                    'auc_dev_best': best_dict['auc_dev_best'],
                    'loss_dev_best': best_dict['loss_dev_best'],
                    'state_dict': model.module.state_dict()
                },
                os.path.join(args.save_path,
                             'best{}.ckpt'.format(best_dict['best_idx'])))
            best_dict['best_idx'] += 1
            if best_dict['best_idx'] > cfg.save_top_k:
                best_dict['best_idx'] = 1
            logging.info('{}, Best, Step : {}, Loss : {}, Acc : {},'
                         'Auc :{},Best Auc : {:.3f}'.format(
                             time.strftime("%Y-%m-%d %H:%M:%S"),
                             summary_train['step'], loss_dev_str, acc_dev_str,
                             auc_dev_str, best_dict['auc_dev_best']))
        torch.save(
            {
                'epoch': summary_train['epoch'],
                'step': summary_train['step'],
                'acc_dev_best': best_dict['acc_dev_best'],
                'auc_dev_best': best_dict['auc_dev_best'],
                'loss_dev_best': best_dict['loss_dev_best'],
                'state_dict': model.module.state_dict()
            }, os.path.join(args.save_path, 'train.ckpt'))

        print_remaining_time(before,
                             epoch + 1,
                             cfg.epoch,
                             additional='[training]')
    summary_writer.close()
Пример #25
0
class MDAIModel:
    def __init__(self):
        root_path = os.path.dirname(__file__)

        with open(os.path.join(root_path, "config/example.json")) as f:
            cfg = edict(json.load(f))

        self.model = Classifier(cfg)
        self.model.cfg.num_classes = [1, 1, 1, 1, 1, 1]
        self.model._init_classifier()
        self.model._init_attention_map()
        self.model._init_bn()

        if torch.cuda.is_available():
            self.model = self.model.eval().cuda()
        else:
            self.model = self.model.eval().cpu()

        chkpt_path = os.path.join(root_path, "model_best.pt")
        self.model.load_state_dict(
            torch.load(chkpt_path, map_location=lambda storage, loc: storage)
        )

    def predict(self, data):
        """
        The input data has the following schema:

        {
            "instances": [
                {
                    "file": "bytes"
                    "tags": {
                        "StudyInstanceUID": "str",
                        "SeriesInstanceUID": "str",
                        "SOPInstanceUID": "str",
                        ...
                    }
                },
                ...
            ],
            "args": {
                "arg1": "str",
                "arg2": "str",
                ...
            }
        }

        Model scope specifies whether an entire study, series, or instance is given to the model.
        If the model scope is 'INSTANCE', then `instances` will be a single instance (list length of 1).
        If the model scope is 'SERIES', then `instances` will be a list of all instances in a series.
        If the model scope is 'STUDY', then `instances` will be a list of all instances in a study.

        The additional `args` dict supply values that may be used in a given run.

        For a single instance dict, `files` is the raw binary data representing a DICOM file, and
        can be loaded using: `ds = pydicom.dcmread(BytesIO(instance["file"]))`.

        The results returned by this function should have the following schema:

        [
            {
                "type": "str", // 'NONE', 'ANNOTATION', 'IMAGE', 'DICOM', 'TEXT'
                "study_uid": "str",
                "series_uid": "str",
                "instance_uid": "str",
                "frame_number": "int",
                "class_index": "int",
                "data": {},
                "probability": "float",
                "explanations": [
                    {
                        "name": "str",
                        "description": "str",
                        "content": "bytes",
                        "content_type": "str",
                    },
                    ...
                ],
            },
            ...
        ]

        The DICOM UIDs must be supplied based on the scope of the label attached to `class_index`.
        """
        input_instances = data["instances"]
        input_args = data["args"]

        results = []

        for instance in input_instances:
            tags = instance["tags"]
            ds = pydicom.dcmread(BytesIO(instance["file"]))
            x = ds.pixel_array

            x_orig = x

            # preprocess image
            # convert grayscale to RGB
            x = cv2.resize(x, (1024, 1024))
            x = equalize_adapthist(x.astype(float) / x.max(), clip_limit=0.01)
            x = cv2.resize(x, (512, 512))
            x = x * 2 - 1
            x = np.array([[x, x, x]])
            x = torch.from_numpy(x).float()
            if torch.cuda.is_available():
                x = x.cuda()
            else:
                x = x.cpu()

            with torch.no_grad():
                logits, logit_maps = self.model(x)
                logits = torch.cat(logits, dim=1).detach().cpu()
                y_prob = torch.sigmoid(logits - torch.from_numpy(threshs).reshape((1, 6)))
                y_prob = y_prob.cpu().numpy()

            x.requires_grad = True

            y_classes = y_prob >= 0.5
            class_indices = np.where(y_classes.astype("bool"))[1]

            if len(class_indices) == 0:
                # no outputs, return 'NONE' output type
                result = {
                    "type": "NONE",
                    "study_uid": tags["StudyInstanceUID"],
                    "series_uid": tags["SeriesInstanceUID"],
                    "instance_uid": tags["SOPInstanceUID"],
                    "frame_number": None,
                }
                results.append(result)
            else:
                for class_index in class_indices:
                    probability = y_prob[0][class_index]

                    gradcam = GradCam(self.model)
                    gradcam_output = gradcam.generate_cam(x, x_orig, class_index)
                    gradcam_output_buffer = BytesIO()
                    gradcam_output.save(gradcam_output_buffer, format="PNG")

                    intgrad = IntegratedGradients(self.model)
                    intgrad_output = intgrad.generate_integrated_gradients(x, class_index, 5)
                    intgrad_output_buffer = BytesIO()
                    intgrad_output.save(intgrad_output_buffer, format="PNG")

                    result = {
                        "type": "ANNOTATION",
                        "study_uid": tags["StudyInstanceUID"],
                        "series_uid": tags["SeriesInstanceUID"],
                        "instance_uid": tags["SOPInstanceUID"],
                        "frame_number": None,
                        "class_index": int(class_index),
                        "data": None,
                        "probability": float(probability),
                        "explanations": [
                            {
                                "name": "Grad-CAM",
                                "description": "Visualize how parts of the image affects neural network’s output by looking into the activation maps. From _Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization_ (https://arxiv.org/abs/1610.02391)",
                                "content": gradcam_output_buffer.getvalue(),
                                "content_type": "image/png",
                            },
                            {
                                "name": "Integrated Gradients",
                                "description": "Visualize an average of the gradients along the construction of the input towards the decision. From _Axiomatic Attribution for Deep Networks_ (https://arxiv.org/abs/1703.01365)",
                                "content": intgrad_output_buffer.getvalue(),
                                "content_type": "image/png",
                            },
                        ],
                    }
                    results.append(result)

        return results
Пример #26
0
    cfg = edict(json.load(f))
    if args.verbose is True:
        print(json.dumps(cfg, indent=4))

if not args.resume:
    with open(join(args.save_path, 'cfg.json'), 'w') as f:
        json.dump(cfg, f, indent=1)

device_ids = list(map(int, args.device_ids.split(',')))
num_devices = torch.cuda.device_count()
assert num_devices >= len(device_ids), f"""
#available gpu : {num_devices} < --device_ids : {len(device_ids)}"""

device = torch.device(f"cuda:{device_ids[0]}")

model = Classifier(cfg)
if args.verbose:
    from torchsummary import summary
    h, w = (cfg.long_side, cfg.long_side) if cfg.fix_ratio \
           else (cfg.height, cfg.width)
    summary(model.to(device), (3, h, w))

model = DataParallel(model, device_ids=device_ids).to(device)
if args.pre_train is not None:
    if exists(args.pre_train):
        ckpt = torch.load(args.pre_train, map_location=device)
        model.module.load_state_dict(ckpt)

optimizer = get_optimizer(model.parameters(), cfg)

trainset = ImageDataset(cfg.train_csv, cfg, mode='train')
Пример #27
0
""" Entry point for model infering. """
import os

import pandas as pd

from model.classifier import Classifier
from data_utils.preprocessing import encode_sentences, tokenize_sentences

classifier = Classifier("data/best_model/model.meta", "data/best_model/model")


def main():
    classifier = Classifier("data/best_model/model.meta",
                            "data/best_model/model")
    sentences = [["0", "I used to like cars."]]
    tokenized_sentences = tokenize_sentences(sentences)
    encoded_sentences, _ = encode_sentences(tokenized_sentences, 32)
    result = classifier.infer(encoded_sentences)
    print(result)


def classify(tweet_body):
    sentences = [["0", tweet_body]]
    tokenized_sentences = tokenize_sentences(sentences)
    encoded_sentences, _ = encode_sentences(tokenized_sentences, 32)
    result = classifier.infer(encoded_sentences)
    return result


def _get_sentiment_vector_nasdaq(date, tweets_dir):
    all_data = pd.read_csv('./%s/%s.csv' % (tweets_dir, date))
Пример #28
0
def main(args):
    bert_config = BertConfig(args.bert_config_path)
    bert_config.print_config()

    if args.use_cuda and torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    processor = MnliDataProcessor(
        data_dir=args.data_dir,
        vocab_path=args.vocab_path,
        max_seq_len=args.max_seq_len,
        do_lower_case=args.do_lower_case,
        random_seed=args.random_seed
    )

    num_labes = len(processor.get_labels())

    if args.random_seed is not None:
        random.seed(args.random_seed)
        np.random.seed(args.random_seed)
        torch.manual_seed(args.random_seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    train_data_generator = processor.data_generator(
        batch_size=args.batch_size,
        phase='train',
        epoch=args.epoch,
        shuffle=True
    )

    num_train_examples = processor.get_num_examples(phase='train')

    max_train_steps = args.epoch * num_train_examples // args.batch_size

    warmup_steps = int(max_train_steps * args.warmup_proportion)

    classifier = Classifier(bert_config, num_labes).to(device)

    # optimizer = torch.optim.Adam(classifier.parameters(), lr=args.learning_rate)
    optimizer = Optimizer(classifier, warmup_steps, max_train_steps, args.learning_rate, args.weight_decay)

    if args.init_pre_training_params:
        pre_training_params = load_pickle(args.init_pre_training_params)
        classifier.bert.load_state_dict(pre_training_params)

        # Temporal
        cls_ckp = load_pickle('/home/cvds_lab/maxim/transformer_investigation/notebooks/ckp/classifier_ckp.pkl')
        classifier.cls_out.weight.data = torch.tensor(cls_ckp['cls_out_w'], dtype=torch.float32).t().to(device)
        classifier.cls_out.bias.data = torch.tensor(cls_ckp['cls_out_b'], dtype=torch.float32).to(device)
        # Temporal

    logfile = args.log_to
    Logger().add_log(logfile, ['epoch', 'step', 'loss', 'accuracy',
                               'cls_w_mean', 'cls_w_std', 'cls_w_min', 'cls_w_max',
                               'cls_b_mean', 'cls_b_std', 'cls_b_min', 'cls_b_max'])
    steps = 0
    total_loss, total_acc = [], []
    time_begin = time.time()
    for batch in train_data_generator():
        steps += 1

        src_ids = torch.tensor(batch[0], dtype=torch.long).to(device)
        position_ids = torch.tensor(batch[1], dtype=torch.long).to(device)
        sentence_ids = torch.tensor(batch[2], dtype=torch.long).to(device)
        input_mask = torch.tensor(batch[3], dtype=torch.float32).to(device)
        labels = torch.tensor(batch[4], dtype=torch.long).to(device)

        optimizer.zero_grad()
        loss, _, accuracy = classifier(src_ids, position_ids, sentence_ids, input_mask, labels)
        loss.backward()
        optimizer.step(steps)

        current_example, current_epoch = processor.get_train_progress()
        Logger()[logfile]['epoch'].append(current_epoch)
        Logger()[logfile]['step'].append(steps)
        Logger()[logfile]['loss'].append(loss.item())
        Logger()[logfile]['accuracy'].append(accuracy.item())
        with torch.no_grad():
            Logger()[logfile]['cls_w_mean'].append(classifier.cls_out.weight.mean().item())
            Logger()[logfile]['cls_w_std'].append(classifier.cls_out.weight.std().item())
            Logger()[logfile]['cls_w_min'].append(classifier.cls_out.weight.min().item())
            Logger()[logfile]['cls_w_max'].append(classifier.cls_out.weight.max().item())

            Logger()[logfile]['cls_b_mean'].append(classifier.cls_out.bias.mean().item())
            Logger()[logfile]['cls_b_std'].append(classifier.cls_out.bias.std().item())
            Logger()[logfile]['cls_b_min'].append(classifier.cls_out.bias.min().item())
            Logger()[logfile]['cls_b_max'].append(classifier.cls_out.bias.max().item())

        if steps % 1000 == 0:
            Logger().log_all()

        if steps % args.skip_steps == 0:
            total_loss.append(loss.item())
            total_acc.append(accuracy.item())

            current_example, current_epoch = processor.get_train_progress()
            time_end = time.time()
            used_time = time_end - time_begin
            print('epoch: %d, progress: %d/%d, step: %d, ave loss: %f, ave acc: %f, speed: %f steps/s' %
                  (current_epoch, current_example, num_train_examples, steps,
                   np.mean(total_loss).item(), np.mean(total_acc).item(), args.skip_steps / used_time))

            total_loss, total_acc = [], []
            time_begin = time.time()
Пример #29
0
def run_fl(args):
    with open(args.cfg_path) as f:
        cfg = edict(json.load(f))
        if args.verbose is True:
            print(json.dumps(cfg, indent=4))

    if not os.path.exists(args.save_path):
        os.mkdir(args.save_path)
    if args.logtofile is True:
        logging.basicConfig(filename=args.save_path + '/log.txt',
                            filemode="w",
                            level=logging.INFO)
    else:
        logging.basicConfig(level=logging.INFO)

    if not args.resume:
        with open(os.path.join(args.save_path, 'cfg.json'), 'w') as f:
            json.dump(cfg, f, indent=1)

    device_ids = list(map(int, args.device_ids.split(',')))
    num_devices = torch.cuda.device_count()
    if num_devices < len(device_ids):
        raise Exception('#available gpu : {} < --device_ids : {}'.format(
            num_devices, len(device_ids)))
    device = torch.device('cuda:{}'.format(device_ids[0]))

    # initialise global model
    model = Classifier(cfg).to(device).train()

    if args.verbose is True:
        from torchsummary import summary
        if cfg.fix_ratio:
            h, w = cfg.long_side, cfg.long_side
        else:
            h, w = cfg.height, cfg.width
        summary(model.to(device), (3, h, w))

    if args.pre_train is not None:
        if os.path.exists(args.pre_train):
            ckpt = torch.load(args.pre_train, map_location=device)
            model.load_state_dict(ckpt)

    src_folder = os.path.dirname(os.path.abspath(__file__)) + '/../'
    dst_folder = os.path.join(args.save_path, 'classification')
    rc, size = subprocess.getstatusoutput('du --max-depth=0 %s | cut -f1' %
                                          src_folder)

    if rc != 0:
        raise Exception('Copy folder error : {}'.format(rc))
    else:
        print('Successfully determined size of directory')

    rc, err_msg = subprocess.getstatusoutput('cp -R %s %s' %
                                             (src_folder, dst_folder))
    if rc != 0:
        raise Exception('copy folder error : {}'.format(err_msg))
    else:
        print('Successfully copied folder')

    # copy train files
    train_files = cfg.train_csv
    clients = {}
    for i, c in enumerate(string.ascii_uppercase):
        if i < len(train_files):
            clients[c] = {}
        else:
            break

    # initialise clients
    for i, client in enumerate(clients):
        copyfile(train_files[i],
                 os.path.join(args.save_path, f'train_{client}.csv'))
        clients[client]['dataloader_train'] =\
            DataLoader(
                ImageDataset(train_files[i], cfg, mode='train'),
                batch_size=cfg.train_batch_size,
                num_workers=args.num_workers,drop_last=True,
                shuffle=True
            )
        clients[client]['bytes_uploaded'] = 0.0
        clients[client]['epoch'] = 0
    copyfile(cfg.dev_csv, os.path.join(args.save_path, 'dev.csv'))

    dataloader_dev = DataLoader(ImageDataset(cfg.dev_csv, cfg, mode='dev'),
                                batch_size=cfg.dev_batch_size,
                                num_workers=args.num_workers,
                                drop_last=False,
                                shuffle=False)
    dev_header = dataloader_dev.dataset._label_header

    w_global = model.state_dict()

    summary_train = {'epoch': 0, 'step': 0}
    summary_dev = {'loss': float('inf'), 'acc': 0.0}
    summary_writer = SummaryWriter(args.save_path)
    comm_rounds = cfg.epoch
    best_dict = {
        "acc_dev_best": 0.0,
        "auc_dev_best": 0.0,
        "loss_dev_best": float('inf'),
        "fused_dev_best": 0.0,
        "best_idx": 1
    }

    # Communication rounds loop
    for cr in range(comm_rounds):
        logging.info('{}, Start communication round {} of FL - {} ...'.format(
            time.strftime("%Y-%m-%d %H:%M:%S"), cr + 1, cfg.fl_technique))

        w_locals = []

        for client in clients:

            logging.info(
                '{}, Start local training process for client {}, communication round: {} ...'
                .format(time.strftime("%Y-%m-%d %H:%M:%S"), client, cr + 1))

            # Load previous current global model as start point
            model = Classifier(cfg).to(device).train()

            model.load_state_dict(w_global)

            if cfg.fl_technique == "FedProx":
                global_weight_collector = get_global_weights(model, device)
            else:
                global_weight_collector = None

            optimizer = get_optimizer(model.parameters(), cfg)

            # local training loops
            for epoch in range(cfg.local_epoch):
                lr = lr_schedule(cfg.lr, cfg.lr_factor, epoch, cfg.lr_epochs)
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr

                summary_train, best_dict = train_epoch_fl(
                    summary_train, summary_dev, cfg, args, model,
                    clients[client]['dataloader_train'], dataloader_dev,
                    optimizer, summary_writer, best_dict, dev_header, epoch,
                    global_weight_collector)

                summary_train['step'] += 1

            bytes_to_upload = sys.getsizeof(model.state_dict())
            clients[client]['bytes_uploaded'] += bytes_to_upload
            logging.info(
                '{}, Completed local rounds for client {} in communication round {}. '
                'Uploading {} bytes to server, {} bytes in total sent from client'
                .format(time.strftime("%Y-%m-%d %H:%M:%S"), client, cr + 1,
                        bytes_to_upload, clients[client]['bytes_uploaded']))

            w_locals.append(model.state_dict())

        if cfg.fl_technique == "FedAvg":
            w_global = fed_avg(w_locals)
        elif cfg.fl_technique == 'WFedAvg':
            w_global = weighted_fed_avg(w_locals, cfg.train_proportions)
        elif cfg.fl_technique == 'FedProx':
            # Use weighted FedAvg when using FedProx
            w_global = weighted_fed_avg(w_locals, cfg.train_proportions)

        # Test the performance of the averaged model
        avged_model = Classifier(cfg).to(device)
        avged_model.load_state_dict(w_global)

        time_now = time.time()
        summary_dev, predlist, true_list = test_epoch(summary_dev, cfg, args,
                                                      avged_model,
                                                      dataloader_dev)
        time_spent = time.time() - time_now

        auclist = []
        for i in range(len(cfg.num_classes)):
            y_pred = predlist[i]
            y_true = true_list[i]
            fpr, tpr, thresholds = metrics.roc_curve(y_true,
                                                     y_pred,
                                                     pos_label=1)
            auc = metrics.auc(fpr, tpr)
            auclist.append(auc)
        auc_summary = np.array(auclist)

        loss_dev_str = ' '.join(
            map(lambda x: '{:.5f}'.format(x), summary_dev['loss']))
        acc_dev_str = ' '.join(
            map(lambda x: '{:.3f}'.format(x), summary_dev['acc']))
        auc_dev_str = ' '.join(map(lambda x: '{:.3f}'.format(x), auc_summary))

        logging.info(
            '{}, Averaged Model -> Dev, Step : {}, Loss : {}, Acc : {}, Auc : {},'
            'Mean auc: {:.3f} '
            'Run Time : {:.2f} sec'.format(time.strftime("%Y-%m-%d %H:%M:%S"),
                                           summary_train['step'], loss_dev_str,
                                           acc_dev_str, auc_dev_str,
                                           auc_summary.mean(), time_spent))

        for t in range(len(cfg.num_classes)):
            summary_writer.add_scalar('dev/loss_{}'.format(dev_header[t]),
                                      summary_dev['loss'][t],
                                      summary_train['step'])
            summary_writer.add_scalar('dev/acc_{}'.format(dev_header[t]),
                                      summary_dev['acc'][t],
                                      summary_train['step'])
            summary_writer.add_scalar('dev/auc_{}'.format(dev_header[t]),
                                      auc_summary[t], summary_train['step'])

        save_best = False

        mean_acc = summary_dev['acc'][cfg.save_index].mean()
        if mean_acc >= best_dict['acc_dev_best']:
            best_dict['acc_dev_best'] = mean_acc
            if cfg.best_target == 'acc':
                save_best = True

        mean_auc = auc_summary[cfg.save_index].mean()
        if mean_auc >= best_dict['auc_dev_best']:
            best_dict['auc_dev_best'] = mean_auc
            if cfg.best_target == 'auc':
                save_best = True

        mean_loss = summary_dev['loss'][cfg.save_index].mean()
        if mean_loss <= best_dict['loss_dev_best']:
            best_dict['loss_dev_best'] = mean_loss
            if cfg.best_target == 'loss':
                save_best = True

        if save_best:
            torch.save(
                {
                    'epoch': summary_train['epoch'],
                    'step': summary_train['step'],
                    'acc_dev_best': best_dict['acc_dev_best'],
                    'auc_dev_best': best_dict['auc_dev_best'],
                    'loss_dev_best': best_dict['loss_dev_best'],
                    'state_dict': avged_model.state_dict()
                },
                os.path.join(args.save_path,
                             'best{}.ckpt'.format(best_dict['best_idx'])))

            best_dict['best_idx'] += 1
            if best_dict['best_idx'] > cfg.save_top_k:
                best_dict['best_idx'] = 1
            logging.info('{}, Best, Step : {}, Loss : {}, Acc : {},'
                         'Auc :{},Best Auc : {:.3f}'.format(
                             time.strftime("%Y-%m-%d %H:%M:%S"),
                             summary_train['step'], loss_dev_str, acc_dev_str,
                             auc_dev_str, best_dict['auc_dev_best']))
        torch.save(
            {
                'epoch': cr,
                'step': summary_train['step'],
                'acc_dev_best': best_dict['acc_dev_best'],
                'auc_dev_best': best_dict['auc_dev_best'],
                'loss_dev_best': best_dict['loss_dev_best'],
                'state_dict': avged_model.state_dict()
            }, os.path.join(args.save_path, 'train.ckpt'))
class MDAIModel:
    def __init__(self):
        root_path = os.path.join(os.path.dirname(os.path.dirname(__file__)),
                                 "model")

        with open(os.path.join(root_path, "config/example.json")) as f:
            cfg = edict(json.load(f))

        self.model = Classifier(cfg)
        self.model.cfg.num_classes = [1, 1, 1, 1, 1, 1]
        self.model._init_classifier()
        self.model._init_attention_map()
        self.model._init_bn()

        if torch.cuda.is_available():
            self.model = self.model.eval().cuda()
        else:
            self.model = self.model.eval().cpu()

        chkpt_path = os.path.join(root_path, "model_best.pt")
        self.model.load_state_dict(
            torch.load(chkpt_path, map_location=lambda storage, loc: storage))

    def predict(self, data):
        """
        See https://github.com/mdai/model-deploy/blob/master/mdai/server.py for details on the
        schema of `data` and the required schema of the outputs returned by this function.
        """
        input_files = data["files"]
        input_annotations = data["annotations"]
        input_args = data["args"]

        outputs = []

        for file in input_files:
            if file["content_type"] != "application/dicom":
                continue

            ds = pydicom.dcmread(BytesIO(file["content"]))
            x = ds.pixel_array

            x_orig = x

            # preprocess image
            # convert grayscale to RGB
            x = cv2.resize(x, (1024, 1024))
            x = equalize_adapthist(x.astype(float) / x.max(), clip_limit=0.01)
            x = cv2.resize(x, (512, 512))
            x = x * 2 - 1
            x = np.array([[x, x, x]])
            x = torch.from_numpy(x).float()
            if torch.cuda.is_available():
                x = x.cuda()
            else:
                x = x.cpu()

            with torch.no_grad():
                logits, logit_maps = self.model(x)
                logits = torch.cat(logits, dim=1).detach().cpu()
                y_prob = torch.sigmoid(logits -
                                       torch.from_numpy(threshs).reshape((1,
                                                                          6)))
                y_prob = y_prob.cpu().numpy()

            x.requires_grad = True

            y_classes = y_prob >= 0.5
            class_indices = np.where(y_classes.astype("bool"))[1]

            if len(class_indices) == 0:
                # no outputs, return 'NONE' output type
                output = {
                    "type": "NONE",
                    "study_uid": str(ds.StudyInstanceUID),
                    "series_uid": str(ds.SeriesInstanceUID),
                    "instance_uid": str(ds.SOPInstanceUID),
                    "frame_number": None,
                }
                outputs.append(output)
            else:
                for class_index in class_indices:
                    probability = y_prob[0][class_index]

                    gradcam = GradCam(self.model)
                    gradcam_output = gradcam.generate_cam(
                        x, x_orig, class_index)
                    gradcam_output_buffer = BytesIO()
                    gradcam_output.save(gradcam_output_buffer, format="PNG")

                    intgrad = IntegratedGradients(self.model)
                    intgrad_output = intgrad.generate_integrated_gradients(
                        x, class_index, 5)
                    intgrad_output_buffer = BytesIO()
                    intgrad_output.save(intgrad_output_buffer, format="PNG")

                    output = {
                        "type":
                        "ANNOTATION",
                        "study_uid":
                        str(ds.StudyInstanceUID),
                        "series_uid":
                        str(ds.SeriesInstanceUID),
                        "instance_uid":
                        str(ds.SOPInstanceUID),
                        "frame_number":
                        None,
                        "class_index":
                        int(class_index),
                        "data":
                        None,
                        "probability":
                        float(probability),
                        "explanations": [
                            {
                                "name": "Grad-CAM",
                                "description":
                                "Visualize how parts of the image affects neural network’s output by looking into the activation maps. From _Grad-CAM: Visual Explanations from Deep Networks via Gradient-based Localization_ (https://arxiv.org/abs/1610.02391)",
                                "content": gradcam_output_buffer.getvalue(),
                                "content_type": "image/png",
                            },
                            {
                                "name": "Integrated Gradients",
                                "description":
                                "Visualize an average of the gradients along the construction of the input towards the decision. From _Axiomatic Attribution for Deep Networks_ (https://arxiv.org/abs/1703.01365)",
                                "content": intgrad_output_buffer.getvalue(),
                                "content_type": "image/png",
                            },
                        ],
                    }
                    outputs.append(output)

        return outputs
Пример #31
0
                          train=False,
                          transform=transforms.ToTensor())
    dataloader = {
        'train':
        torch.utils.data.DataLoader(train_dataset,
                                    batch_size=opts.batch_size,
                                    shuffle=True),
        'test':
        torch.utils.data.DataLoader(test_dataset,
                                    batch_size=opts.batch_size,
                                    shuffle=False)
    }

    cvae = CVAE(opts.latent_size, device).to(device)
    dis = Discriminator().to(device)
    classifier = Classifier(opts.latent_size).to(device)
    classer = CLASSIFIERS().to(device)

    print(cvae)
    print(dis)
    print(classifier)

    optimizer_cvae = torch.optim.Adam(cvae.parameters(),
                                      lr=opts.lr,
                                      betas=(opts.b1, opts.b2),
                                      weight_decay=opts.weight_decay)
    optimizer_dis = torch.optim.Adam(dis.parameters(),
                                     lr=opts.lr,
                                     betas=(opts.b1, opts.b2),
                                     weight_decay=opts.weight_decay)
    optimizer_classifier = torch.optim.Adam(classifier.parameters(),
Пример #32
0
    train_labels['train_u_ones']    = np.array(fp['train_u_ones'], dtype=np.int8)
    train_labels['train_u_zeros']   = np.array(fp['train_u_zeros'], dtype=np.int8)
    train_labels['train_u_random']  = np.array(fp['train_u_random'], dtype=np.int8)

np_train_samples = None
for i in range(args.chunk_count):
    with open(f'{args.train_chunks}/chexpert_dset_chunk_{i+1}.npy', 'rb') as f:
        if np_train_samples is None:
            np_train_samples = np.load(f)
        else:
            np_train_samples = np.concatenate((np_train_samples, np.load(f)))
#
device = torch.device(f'cuda:{args.gpu}')
# load best chexpert model from normal
print('loading network: '+ args.saved_model_path)
model = Classifier(cfg)
#model = DataParallel(model, device_ids=args.gpu).to(device)
model = DataParallel(model, device_ids=[args.gpu]).to(device)
ckpt = torch.load(args.saved_model_path, map_location=device)
model.module.load_state_dict(ckpt['state_dict'])
model.cuda()
#
dataloader_train = DataLoader(
        ImageDataset([np_train_samples, train_labels], cfg, mode='train'),
        batch_size=cfg.train_batch_size, num_workers=args.num_workers,
        drop_last=False, shuffle=False)

dataloader_dev_val = DataLoader(
        ImageDataset([np_dev_val_h5_file, np_dev_val_u_zeros, np_dev_val_u_ones, np_dev_val_u_random], cfg, mode='val'),
        batch_size=cfg.dev_batch_size, num_workers=args.num_workers,
        drop_last=False, shuffle=False)