示例#1
0
 def __init__(self, config):
     super().__init__()
     self.save_hyperparameters()
     self.model = Classifier(config)
     self.model.init_phi_normal(config['n_classes'])
     self.config = config
     self.loss_func = self.get_loss_func()
def train_model(data, word_embeddings):
    # 10-fold cross validation
    n_fold = 10
    fold_size = int(len(data[0]) / n_fold)

    loss_list, acc_list = [], []
    for i in range(1, n_fold + 1):
        FLAGS.train_time = i
        train_data, valid_data = utils.split_train_valid(data, i, fold_size)
        graph = tf.Graph()
        with tf.Session(graph=graph) as sess:
            model = Classifier(FLAGS, sess)
            model.build_model()
            loss, acc = model.run(train_data, valid_data, word_embeddings)
            loss_list.append(loss)
            acc_list.append(acc)

    avg_loss = np.mean(loss_list)
    avg_acc = np.mean(acc_list)
    print("10fold_loss&acc:", avg_loss, avg_acc)
    print('10fold_std_loss&acc:', np.std(loss_list), np.std(acc_list))
    logging.debug('10fold_loss: ' + str(avg_loss) + '\t10fold_acc :' +
                  str(avg_acc))
    logging.debug('10fold_loss_std: ' + str(np.std(loss_list)) +
                  '\t10fold_acc_std :' + str(np.std(acc_list)))
def get_classifier(args,runs_dir=None):
    classifier_name = args['classifier_name'] 
    balance = args['balance']

    if classifier_name=='tree':
        if balance=='with_loss':
            return tree.DecisionTreeClassifier(class_weight='balanced')
        elif balance=='explicit':
            return tree.DecisionTreeClassifier()
        elif balance =='sample_per_batch':
            print("Decision Tree does not use mini batch")
            exit()
    
    elif classifier_name=='forest':
        if balance=='with_loss':
            return RandomForestClassifier(n_estimators=10,class_weight='balanced')
        elif balance=='explicit':
            return RandomForestClassifier(n_estimators=10)
        elif balance=='sample_per_batch':
            print("Forst does not use mini batch")
            exit()
    
    elif classifier_name=='softmax':
        if runs_dir is None:
            print('please provide runs_dir')
            exit(1)
        clf = Classifier(method='softmax',input_dim = args['input_dim'],num_classes=args['num_class'],lr=args['lr'],reg=args['reg'],num_iters=args['num_iters'],batch_size=args['batch_size'],runs_dir=runs_dir,device=args['device'],balance=args['balance'], class_weights=args['class_weight'])
        return clf

    elif classifier_name=='cnn':
        if runs_dir is None:
            print('please provide runs_dir')
            exit(1)
        clf = Classifier(method='cnn2',input_dim = args['input_dim'],num_classes=args['num_class'],lr=args['lr'],reg=args['reg'],num_iters=args['num_iters'],batch_size=args['batch_size'],runs_dir=runs_dir,device=args['device'],balance=args['balance'],class_weights=args['class_weight'])
        return clf
示例#4
0
def model_train(file_path, dataset, model_type, savedir, **kwargs):
    # load data
    # TODO: 아래 나머지 채울것
    if dataset == 'titanic':
        titanic = TitanicData(file_path)
        (x_train,
         y_train), _ = titanic.transform(scaling=kwargs.pop('scaling'))
    elif dataset == 'house_price':
        house_price = HousePriceData(file_path)
        (x_train,
         y_train), _ = house_price.transform(scaling=kwargs.pop('scaling'))
    elif dataset == 'bike_sharing':
        pass
    elif dataset == 'cervical_cancer':
        cervical_cancer = CervicalCancerData(file_path)
        x_train, y_train = cervical_cancer.transform(
            scaling=kwargs.pop('scaling'))
    elif dataset == 'youtube_spam':
        pass
    print('Complete Data Pre-processing')

    # add argument
    if model_type == 'DNN':
        kwargs['params']['nb_features'] = x_train.shape[1]

    # model training
    clf = Classifier(model_type=model_type, **kwargs.pop('params'))
    clf.train(x_train, y_train, savedir, **kwargs)
    print('Complete Training Model')
    print('Complete Saving Model')
示例#5
0
    def __init__(self, hparams):
        """Summary

        Args:
            hparams (TYPE): hyper parameters from parser
        """
        super(MLCModel, self).__init__()
        self.hparams = hparams
        self.device = torch.device("cuda:{}".format(hparams.gpus) if torch.
                                   cuda.is_available() else "cpu")

        with open(self.hparams.json_path, 'r') as f:
            self.cfg = edict(json.load(f))
            hparams_dict = vars(self.hparams)
            self.cfg['hparams'] = hparams_dict
            if self.hparams.verbose is True:
                print(json.dumps(self.cfg, indent=4))

        if self.cfg.criterion in ['bce', 'focal', 'sce', 'bce_v2', 'bfocal']:
            self.criterion = init_loss_func(self.cfg.criterion,
                                            device=self.device)
        elif self.cfg.criterion == 'class_balance':
            samples_per_cls = list(
                map(int, self.cfg.samples_per_cls.split(',')))
            self.criterion = init_loss_func(self.cfg.criterion,
                                            samples_per_cls=samples_per_cls,
                                            loss_type=self.cfg.loss_type)
        else:
            self.criterion = init_loss_func(self.cfg.criterion)

        self.labels = init_labels(name=self.hparams.data_name)
        if self.cfg.extract_fields is None:
            self.cfg.extract_fields = ','.join(
                [str(idx) for idx in range(len(self.labels))])
        else:
            assert isinstance(self.cfg.extract_fields,
                              str), "extract_fields must be string!"

        self.model = Classifier(self.cfg, self.hparams)
        self.state_dict = None
        # Load cross-model from other configuration
        if self.hparams.load is not None and len(self.hparams.load) > 0:
            if not os.path.exists(hparams.load):
                raise ValueError('{} does not exists!'.format(hparams.load))
            state_dict = load_state_dict(self.hparams.load, self.model,
                                         self.device)
            self.state_dict = state_dict

        # DataParallel model
        if torch.cuda.device_count() > 1 and self.hparams.gpus == 0:
            self.model = nn.DataParallel(self.model)

        self.model.to(device=self.device)
        self.num_tasks = list(map(int, self.cfg.extract_fields.split(',')))
        self.names = list()
        self.optimizer, self.scheduler = self.configure_optimizers()
        self.train_loader = self.train_dataloader()
        self.valid_loader = self.val_dataloader()
        self.test_loader = self.test_dataloader()
示例#6
0
def get_classifier(args):
    classifier_name = args['classifier_name']
    balance = args['balance']

    if classifier_name == 'tree':
        if balance == 'with_loss':
            return tree.DecisionTreeClassifier(
                class_weight='balanced',
                max_features=args['max_features'],
                min_samples_leaf=args['min_samples_leaf'])
        elif balance == 'explicit' or balance == 'no':
            return tree.DecisionTreeClassifier(
                max_features=args['max_features'],
                min_samples_leaf=args['min_samples_leaf'])

    elif classifier_name == 'forest':
        bootstrap = args['bootstrap']
        if balance == 'with_loss':
            return RandomForestClassifier(
                n_estimators=args['n_estimators'],
                class_weight='balanced',
                n_jobs=-1,
                random_state=SEED,
                bootstrap=bootstrap,
                max_features=args['max_features'],
                min_samples_leaf=args['min_samples_leaf'],
                max_samples=args['max_samples'],
                max_depth=args['max_depth'])
        elif balance == 'with_loss_sub':
            return RandomForestClassifier(
                n_estimators=args['n_estimators'],
                class_weight='balanced_subsample',
                n_jobs=-1,
                random_state=SEED,
                bootstrap=bootstrap,
                max_features=args['max_features'],
                min_samples_leaf=args['min_samples_leaf'],
                max_samples=args['max_samples'],
                max_depth=args['max_depth'])

        elif balance == 'explicit' or balance == 'no':
            return RandomForestClassifier(
                n_estimators=args['n_estimators'],
                n_jobs=None,
                random_state=SEED,
                bootstrap=bootstrap,
                max_features=args['max_features'],
                min_samples_leaf=args['min_samples_leaf'],
                max_samples=args['max_samples'],
                max_depth=args['max_depth'])

    elif classifier_name == 'softmax':
        clf = Classifier(args, method='softmax')
        return clf

    elif classifier_name == 'cnn':
        clf = Classifier(args, method='cnn2')
        return clf
示例#7
0
 def __init__(self):
     self.train_lr = 1e-4
     self.num_classes = 9
     self.clf_target = Classifier().cuda()
     self.clf2 = Classifier().cuda()
     self.clf1 = Classifier().cuda()
     self.encoder = Encoder().cuda()
     self.pretrain_lr = 1e-4
     self.weights_coef = 1e-3
示例#8
0
    def __init__(self, state_size, action_size, config):
        self.env_name = config["env_name"]
        self.state_size = state_size
        self.action_size = action_size
        self.seed = config["seed"]
        self.clip = config["clip"]
        self.device = 'cuda'
        print("Clip ", self.clip)
        print("cuda ", torch.cuda.is_available())
        self.double_dqn = config["DDQN"]
        print("Use double dqn", self.double_dqn)
        self.lr_pre = config["lr_pre"]
        self.batch_size = config["batch_size"]
        self.lr = config["lr"]
        self.tau = config["tau"]
        print("self tau", self.tau)
        self.gamma = 0.99
        self.fc1 = config["fc1_units"]
        self.fc2 = config["fc2_units"]
        self.fc3 = config["fc3_units"]
        self.qnetwork_local = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3, self.seed).to(self.device)
        self.qnetwork_target = QNetwork(state_size, action_size, self.fc1, self.fc2,self.fc3,  self.seed).to(self.device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=self.lr)
        self.soft_update(self.qnetwork_local, self.qnetwork_target, 1)
        
        self.q_shift_local = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3, self.seed).to(self.device)
        self.q_shift_target = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3, self.seed).to(self.device)
        self.optimizer_shift = optim.Adam(self.q_shift_local.parameters(), lr=self.lr)
        self.soft_update(self.q_shift_local, self.q_shift_target, 1)
         
        self.R_local = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3,  self.seed).to(self.device)
        self.R_target = QNetwork(state_size, action_size, self.fc1, self.fc2, self.fc3, self.seed).to(self.device)
        self.optimizer_r = optim.Adam(self.R_local.parameters(), lr=self.lr)
        self.soft_update(self.R_local, self.R_target, 1) 

        self.expert_q = DQNetwork(state_size, action_size, seed=self.seed).to(self.device)
        self.expert_q.load_state_dict(torch.load('checkpoint.pth'))
        self.memory = Memory(action_size, config["buffer_size"], self.batch_size, self.seed, self.device)
        self.t_step = 0
        self.steps = 0
        self.predicter = Classifier(state_size, action_size, self.seed).to(self.device)
        self.optimizer_pre = optim.Adam(self.predicter.parameters(), lr=self.lr_pre)
        pathname = "lr_{}_batch_size_{}_fc1_{}_fc2_{}_fc3_{}_seed_{}".format(self.lr, self.batch_size, self.fc1, self.fc2, self.fc3, self.seed)
        pathname += "_clip_{}".format(config["clip"])
        pathname += "_tau_{}".format(config["tau"])
        now = datetime.now()    
        dt_string = now.strftime("%d_%m_%Y_%H:%M:%S")
        pathname += dt_string
        tensorboard_name = str(config["locexp"]) + '/runs/' + pathname
        self.writer = SummaryWriter(tensorboard_name)
        print("summery writer ", tensorboard_name)
        self.average_prediction = deque(maxlen=100)
        self.average_same_action = deque(maxlen=100)
        self.all_actions = []
        for a in range(self.action_size):
            action = torch.Tensor(1) * 0 +  a
            self.all_actions.append(action.to(self.device))
示例#9
0
def main():
    args = get_parser().parse_args()
    # Arguments by hand
    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    args.target_name = "LST_status"

    table = pd.read_csv(args.table_data)
    list_wsi = os.listdir(args.wsi)
    list_lst = [
        table[table['ID'] == x][args.target_name].item() for x in list_wsi
    ]
    list_dataset = []

    ## Initialisation model
    model = Classifier(args=args)

    ## Création des datasets
    for path in list_wsi:
        args.wsi = os.path.join(args.wsi, path)
        list_dataset.append(dataset(args))
        args.wsi = os.path.dirname(args.wsi)
    list_dataset = np.array(list_dataset)

    ## Kfold_validation
    splitter = StratifiedKFold(n_splits=3)
    for r_eval, (id_train,
                 id_val) in enumerate(splitter.split(list_lst, list_lst)):

        model.name = 'repeat_val_{}'.format(r_eval)
        dataset_train = list_dataset[id_train]
        dataset_val = list_dataset[id_val]
        for db in dataset_train:
            db.transform = get_transform(train=True)
        for db in dataset_val:
            db.transform = get_transform(train=False)
        dataset_train = torch.utils.data.ConcatDataset(dataset_train)
        dataset_val = torch.utils.data.ConcatDataset(dataset_val)
        dataloader_train = DataLoader(dataset=dataset_train,
                                      batch_size=args.batch_size,
                                      num_workers=24)
        dataloader_val = DataLoader(dataset=dataset_val,
                                    batch_size=args.batch_size,
                                    num_workers=24)

        # Initialize dataloader Creates 2 dataset : Careful, if I want to load all in memory ill have to change that, to have only one dataset.
        dataloader_train, dataloader_val = make_loaders(args=args)

        while model.counter['epochs'] < args.epochs:
            print("Begin training")
            train(model=model, dataloader=dataloader_train)
            val(model=model, dataloader=dataloader_val)
            if model.early_stopping.early_stop:
                break
        model.writer.close()
示例#10
0
def classification_accuracy(dataset,cls_path, cls_checkpoint, layer, fully_supervised):
    """
        Given a trained classifier, return the classification accuracy on CIFAR10 test data
    """

    args = parser.parse_args()
    dataset = args.dataset
    cls_path = args.cls_path
    cls_checkpoint = args.cls_checkpoint
    layer = args.layer


    # GPU setup
    if torch.cuda.is_available:
        device = torch.device('cuda:0')   
        batch_size = BATCH_SIZE*torch.cuda.device_count()  
    else:
        device = torch.device('cpu') 
        batch_size = BATCH_SIZE
        

    # Load classifier
    cls_file = get_cls_checkpoint(cls_path,cls_checkpoint)
    print("Checkpoint to be loaded:",cls_file)
    cls_file = checkpoint_parser(cls_file)
    fully_supervised = True if layer=='' else False
    classifier = Classifier(eval=True, layer=layer, fully_supervised=fully_supervised)
    classifier.load_state_dict(cls_file)
    classifier = nn.DataParallel(classifier).to(device)

    # Load test data
    _, _, test_batches, _ = data_loader(dataset, batch_size)

    # Accuracy evaluation
    acc = []
    test_batches = tqdm(test_batches)
    for batch, test_label in test_batches:
        batch = batch.to(device)
        y = classifier(batch)
        _, predict_label = y.max(1)
        predict_label = predict_label.to(torch.device('cpu'))
        
        # from tensor to numpy array
        predict_label = predict_label.numpy()
        test_label = test_label.numpy()
        
        batch_acc = predict_label == test_label
        batch_acc = batch_acc.tolist()
        
        
        acc += batch_acc
    
    print(np.mean(acc))
示例#11
0
def pretrain(source_data_loader,
             test_data_loader,
             no_classes,
             embeddings,
             epochs=20,
             batch_size=128,
             cuda=False):

    classifier = Classifier()
    encoder = Encoder(embeddings)

    if cuda:
        classifier.cuda()
        encoder.cuda()
    ''' Jointly optimize both encoder and classifier '''
    encoder_params = filter(lambda p: p.requires_grad, encoder.parameters())
    optimizer = optim.Adam(
        list(encoder_params) + list(classifier.parameters()))

    # Use weights to normalize imbalanced in data
    c = [1] * len(no_classes)
    weights = torch.FloatTensor(len(no_classes))
    for i, (a, b) in enumerate(zip(c, no_classes)):
        weights[i] = 0 if b == 0 else a / b

    loss_fn = nn.CrossEntropyLoss(weight=Variable(weights))

    print('Training encoder and classifier')
    for e in range(epochs):

        # pretrain with whole source data -- use groups with DCD
        for sample in source_data_loader:
            x, y = Variable(sample[0]), Variable(sample[1])
            optimizer.zero_grad()

            if cuda:
                x, y = x.cuda(), y.cuda()

            output = model_fn(encoder, classifier)(x)

            loss = loss_fn(output, y)

            loss.backward()

            optimizer.step()

        print("Epoch", e, "Loss", loss.data[0], "Accuracy",
              eval_on_test(test_data_loader, model_fn(encoder, classifier)))

    return encoder, classifier
示例#12
0
def main():
    args = get_parser().parse_args()
    print(args)
    # Make datasets
    train_dir = os.path.join(args.datadir, 'train')
    val_dir = os.path.join(args.datadir, 'val')
    print('loading train dataset')
    train_loader = get_dataloader(train_dir, args.batch_size, args.pretrained,
                                  args.augmented)
    print('loading val dataset')
    val_loader = get_dataloader(val_dir, args.batch_size, args.pretrained,
                                False)

    args.num_class = 2  # np.unique(train_loader[1])
    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Initialisation model
    model = Classifier(args=args)

    while model.counter['epochs'] < args.epochs:
        train(model=model, dataloader=train_loader)
        val(model=model, dataloader=val_loader)
        if model.early_stopping.early_stop:
            break
    if model.writer:
        model.writer.close()
示例#13
0
    def build_models(self):

        # Models
        self.net = Classifier().to(self.config['device'])

        # Optimizers
        self.optimizer = getattr(torch.optim, self.config['optimizer'])(
            self.net.parameters(),
            lr=self.config['lr'],
        )

        # Citerion
        self.criterion = nn.CrossEntropyLoss(reduce=False)

        # Record
        logging.info(self.net)
示例#14
0
def create_model(args, vocab, embedding=None):
    """Creates the model.

    Args:
        args: Instance of Argument Parser.
        vocab: Instance of Vocabulary.

    Returns:
        A multi class classification model.
    """
    # Load GloVe embedding.
    if args.use_glove:
        embedding = get_glove_embedding(args.embedding_name, 300, vocab)
    else:
        embedding = None

    # Build the models
    logging.info('Creating multi-class classification model...')
    model = Classifier(len(vocab),
                       embedding_dim=args.embedding_dim,
                       embedding=embedding,
                       hidden_dim=args.num_hidden_nodes,
                       output_dim=args.num_output_nodes,
                       num_layers=args.num_layers,
                       bidirectional=args.bidirectional,
                       dropout=args.dropout,
                       rnn_cell=args.rnn_cell)

    return model
示例#15
0
def update_classifier(session, uid, model):
    """
    Persists the user's classifier to the database.
    """
    classifier = Classifier(timestamp=datetime.now(), uid=uid, model=model)
    session.query(Classifier).filter_by(uid=uid).delete(synchronize_session='fetch')
    session.add(classifier)
示例#16
0
def test_cls():
    tf.reset_default_graph()

    classifier = Classifier()

    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, FLAGS.cls_model)

        n_iters = int(np.ceil(len(mnist.test.labels / FLAGS.cls_batch)))
        accuracies = []
        for i in trange(n_iters, desc='Classifier testing'):
            x = mnist.test.images[i::FLAGS.cls_batch]
            y = mnist.test.labels[i::FLAGS.cls_batch]
            accuracy = sess.run(classifier.accuracy,
                                feed_dict={
                                    classifier.x:
                                    x.reshape([-1, FLAGS.h, FLAGS.w, FLAGS.c]),
                                    classifier.y:
                                    y,
                                    classifier.keep_prob:
                                    1.0,
                                })
            accuracies.append(accuracy)
    print('Final accuracy:', sum(accuracies) / len(accuracies))
示例#17
0
def init_model(network_type, restore,num_classes):
    """Init models with cuda and weights."""
    # init weights of model
    if network_type == "src_encoder":
        net = Encoder(num_classes,domain="src",name=params.srcenc_name)
    elif network_type == "src_classifier":
        net = Classifier(num_classes)
    elif network_type == "tgt_encoder":
        net = Encoder(num_classes,domain="tgt",name=params.tgtenc_name)
    elif network_type == "discriminator":
        net = Discriminator(input_dims=params.d_input_dims,
                hidden_dims=params.d_hidden_dims,
                output_dims=params.d_output_dims)
    else:
        print("[util.py] INFO | Network type not implemented.")

    #TODO: Initialise with pretrained resnet18 models for our dataset.
    # net.apply(init_weights)

    # restore model weights
    if restore is not None and os.path.exists(restore):
        net.load_state_dict(torch.load(restore))
        net.restored = True
        print("[utils.py] INFO | Restore model from: {}".format(os.path.abspath(restore)))

    # check if cuda is available
    if torch.cuda.is_available():
        cudnn.benchmark = True
        net.cuda()
    net = (net.to(device))
    return net
示例#18
0
def doTest(**kwargs):
    parse(kwargs)
    model = Classifier().cuda().eval()
    model.load(opt.model_dir)
    nodule_list = glob(opt.img_dir + '*_nodule.npy')
    center_list = glob(opt.img_dir + '*_center.npy')
    f = open(opt.csv_file, "wa")
    csv_writer = csv.writer(f, dialect="excel")
    csv_writer.writerow(
        ['seriesuid', 'coordX', 'coordY', 'coordZ', 'probability'])
    for i, patient in enumerate(nodule_list[:opt.limit]):
        if os.path.exists('/tmp/dcsb'):
            import ipdb
            ipdb.set_trace()
        patient_id = patient.split('/')[-1].split('_')[-2]
        #if int( patient_id.split('-')[1])<800:continue
        # print 'doing on',patient_id
        patient_center = get_filename(center_list, patient_id)
        bb = zero_normalize(np.load(patient))  #导入结点文件
        aa = np.load(patient_center)
        result = do_class(
            bb[:, 24 - 10:24 + 10, 24 - 18:24 + 18, 24 - 18:24 + 18], model)
        length = aa.shape[0]
        if length < opt.topN:
            topN = length
        else:
            topN = opt.topN
        index = get_topn(result, topN)
        probability = result[index]
        center_ = aa[index]
        world = voxel_2_world(center_[:, ::-1], patient_id)
        write_csv(world, probability, csv_writer, patient_id,
                  opt.prob_threshould)
        if i % 20 == 0:
            print i, " hava done"
示例#19
0
class Solver(object):
    def __init__(self, config):

        # Configurations
        self.config = config

        # Build the models
        self.build_models()

    def build_models(self):

        # Models
        self.net = Classifier().to(self.config['device'])

        # Optimizers
        self.optimizer = getattr(torch.optim, self.config['optimizer'])(
            self.net.parameters(),
            lr=self.config['lr'],
        )

        # Citerion
        self.criterion = nn.CrossEntropyLoss(reduce=False)

        # Record
        logging.info(self.net)

    def save_model(self, filename):
        save_path = os.path.join(self.config['save_path'], f'{filename}')
        try:
            logging.info(
                f'Saved best Neural network ckeckpoints into {save_path}')
            torch.save(self.net.state_dict(),
                       save_path,
                       _use_new_zipfile_serialization=False)
        except:
            logging.error(f'Error saving weights to {save_path}')

    def restore_model(self, filename):
        weight_path = os.path.join(self.config['save_path'], f'{filename}')
        try:
            logging.info(f'Loading the trained Extractor from {weight_path}')
            self.net.load_state_dict(
                torch.load(weight_path,
                           map_location=lambda storage, loc: storage))

        except:
            logging.error(f'Error loading weights from {weight_path}')
示例#20
0
def classification_task(enc_checkpoint_path, checkpoint, task=1 ):
    encoder_file = get_encoder_file(enc_checkpoint_path,checkpoint)
    if task ==1: 
        #fully supervised
        classifier = Classifier(layer=None, fully_supervised=True)
    elif task ==2: 
        #supervised using conv from trained encoder
        classifier = Classifier(layer='conv',params_file=encoder_file)
    elif task ==3:
        #supervised using fc from trained encoder
        classifier = Classifier(layer='fc',params_file=encoder_file)
    elif task ==4:
        #supervised using Y from trained encoder
        classifier = Classifier(layer='Y',params_file=encoder_file)
    else:
        raise ValueError('[!] Invalid classification task number.')
    return classifier
示例#21
0
def update_classifier(uid, model):
    """
    Persists the user's classifier to the database.
    """
    classifier = Classifier(timestamp=datetime.now(), uid=uid, model=model)
    db_session.query(Classifier).filter_by(uid=uid).delete()
    db_session.add(classifier)
    db_session.commit()
示例#22
0
def load_classifier(data=None,
                    classes=10,
                    classifier_weights='classifier_weights.h5',
                    backbone_weights='backbone_posttrained_weights.h5',
                    clear_session=True):
    if (clear_session):
        keras.backend.clear_session()

    backbone = ResNet()
    backbone(data.get_test()[0])
    classifier = Classifier(backbone, classes)
    if (classifier_weights):
        classifier.load_weights(classifier_weights)
    if (backbone_weights):
        backbone.load_weights(backbone_weights)

    return classifier
def train_and_predict(train_df, test_df):

    # Data Cleaning
    # clean the data
    cleaner = DataCleaner()
    cleaner.columns_with_no_nan(train_df)
    cleaner.columns_with_no_nan(test_df)
    train_df = cleaner.drop_columns(train_df)
    train_df = cleaner.resolve_nan(train_df)
    test_df = cleaner.drop_columns(test_df)
    test_df = cleaner.resolve_nan(test_df)


    # features engineering
    train_df, test_df = engineer_features(train_df, test_df)

    # train the model from Model
    model = Classifier()
    model = model.model()

    # LabelEncoding/OneHotEncoding?
    train_df = model.encode(train_df)
    test_df = model.encode(test_df)

    # training progress and results
    model = model.train(model, train_df)

    # predict on test_df with predict method from Model
    y_test = model.predict(model, test_df)
    return y_test
示例#24
0
def main():
    args = parse_args()

    cfg = cfg_from_file(args.config)
    print('using config: {}'.format(args.config))

    data_cfg = cfg['data']
    datalist = datalist_from_file(data_cfg['datalist_path'])
    num_train_files = len(datalist) // 5 * 4
    train_dataset = IMetDataset(data_cfg['dataset_path'],
                                datalist[:num_train_files],
                                transform=data_cfg['train_transform'])
    test_dataset = IMetDataset(data_cfg['dataset_path'],
                               datalist[num_train_files:],
                               transform=data_cfg['test_transform'])
    train_dataloader = data.DataLoader(train_dataset,
                                       batch_size=data_cfg['batch_size'],
                                       shuffle=True)
    test_dataloader = data.DataLoader(test_dataset,
                                      batch_size=data_cfg['batch_size'])

    backbone_cfg = cfg['backbone'].copy()
    backbone_type = backbone_cfg.pop('type')
    if backbone_type == 'ResNet':
        backbone = ResNet(**backbone_cfg)
    elif backbone_type == 'ResNeXt':
        backbone = ResNeXt(**backbone_cfg)
    elif backbone_type == 'DenseNet':
        backbone = DenseNet(**backbone_cfg)
    classifier = Classifier(backbone, backbone.out_feat_dim).cuda()

    train_cfg, log_cfg = cfg['train'], cfg['log']
    criterion = FocalLoss()
    optimizer = torch.optim.SGD(classifier.parameters(),
                                lr=train_cfg['lr'],
                                weight_decay=train_cfg['weight_decay'],
                                momentum=train_cfg['momentum'])
    trainer = Trainer(model=classifier,
                      train_dataloader=train_dataloader,
                      val_dataloader=test_dataloader,
                      criterion=criterion,
                      optimizer=optimizer,
                      train_cfg=train_cfg,
                      log_cfg=log_cfg)
    trainer.train()
示例#25
0
 def __init__(self, cfg, inShape):
     super(RefAE, self).__init__()
     self.cladec = ClaDecNet(cfg, inShape, cfg["imCh"])
     self.cladec.train()
     from models import Classifier
     cla = Classifier(cfg)
     actModel = getActModel(cfg, cla)
     actModel.train()
     self.seq = nn.Sequential(actModel, self.cladec)
示例#26
0
 def __init__(self, state_size, action_size, action_dim, config):
     self.state_size = state_size
     self.action_size = action_size
     self.action_dim = action_dim
     self.seed = 0
     self.device = 'cuda'
     self.batch_size = config["batch_size"]
     self.lr = 0.005
     self.gamma = 0.99
     self.q_shift_local = QNetwork(state_size, action_size,
                                   self.seed).to(self.device)
     self.q_shift_target = QNetwork(state_size, action_size,
                                    self.seed).to(self.device)
     self.Q_local = QNetwork(state_size, action_size,
                             self.seed).to(self.device)
     self.Q_target = QNetwork(state_size, action_size,
                              self.seed).to(self.device)
     self.R_local = RNetwork(state_size, action_size,
                             self.seed).to(self.device)
     self.R_target = RNetwork(state_size, action_size,
                              self.seed).to(self.device)
     self.policy = PolicyNetwork(state_size, action_size,
                                 self.seed).to(self.device)
     self.predicter = Classifier(state_size, action_dim,
                                 self.seed).to(self.device)
     #self.criterion = nn.CrossEntropyLoss()
     # optimizer
     self.optimizer_q_shift = optim.Adam(self.q_shift_local.parameters(),
                                         lr=self.lr)
     self.optimizer_q = optim.Adam(self.Q_local.parameters(), lr=self.lr)
     self.optimizer_r = optim.Adam(self.R_local.parameters(), lr=self.lr)
     self.optimizer_p = optim.Adam(self.policy.parameters(), lr=self.lr)
     self.optimizer_pre = optim.Adam(self.predicter.parameters(),
                                     lr=self.lr)
     pathname = "lr {} batch_size {} seed {}".format(
         self.lr, self.batch_size, self.seed)
     tensorboard_name = str(config["locexp"]) + '/runs/' + pathname
     self.writer = SummaryWriter(tensorboard_name)
     self.steps = 0
     self.ratio = 1. / action_dim
     self.all_actions = []
     for a in range(self.action_dim):
         action = torch.Tensor(1) * 0 + a
         self.all_actions.append(action.to(self.device))
示例#27
0
def test_coupled_weights_of_backbone():
    """
    This function will fail because there are multiple models defined 
    in the keras/tensorflow graph which are not used during training. 

    Returns:
        bool -- [description]
    """
    data = mnist_data()
    backbone = ResNet()

    preds = backbone(data.get_test()[0])
    gen = ResGen(backbone)
    input_shape = gen.get_input_shape()
    rand_data_shape = ((50, ) + input_shape[1:] + (1, ))
    random_noise_data = np.random.normal(size=rand_data_shape)

    discriminator = Discriminator(backbone)
    classifier = Classifier(backbone, 10)

    discriminator_predicitons_1 = discriminator(data.get_test()[0])
    classifier_predicitons_1 = classifier.predict(data.get_test()[0])
    generator_predictions_1 = gen.predict(random_noise_data)[0]

    classifier.compile(optimizer='adam',
                       loss='sparse_categorical_crossentropy',
                       metrics=['accuracy'])
    classifier.summary()
    # classifier.fit(x=x_train,y=y_train,batch_size=6000,epochs=1, validation_data=(x_vali,y_vali),callbacks=[checkpoint])
    classifier.fit(x=data.get_n_samples(35)[0],
                   y=data.get_n_samples(35)[1],
                   batch_size=6000,
                   epochs=1,
                   validation_data=data.get_vali())

    discriminator_predicitons_2 = discriminator(data.get_test()[0])
    classifier_predicitons_2 = classifier.predict(data.get_test()[0])
    generator_predictions_2 = gen.predict(random_noise_data)[0]

    discriminator_diff = discriminator_predicitons_1 - discriminator_predicitons_2
    classifier_diff = classifier_predicitons_1 - classifier_predicitons_2
    generator_diff = generator_predicitons_1 - generator_predicitons_2

    return True
示例#28
0
def main():
    args = parser.parse_args()

    # classifier
    if args.classifier is not None:
        snapshot = torch.load(args.classifier, map_location=lambda s, _: s)
        classifier = Classifier(snapshot['channels'])
        classifier.load_state_dict(snapshot['model'])
    else:
        classifier = None

    # dataset
    raw_loader = torch.utils.data.DataLoader(Dataset(
        os.path.join(DATA_DIR, 'raw')),
                                             batch_size=args.batch,
                                             shuffle=True,
                                             drop_last=True)
    noised_loader = torch.utils.data.DataLoader(Dataset(
        os.path.join(DATA_DIR, 'noised_tgt')),
                                                batch_size=args.batch,
                                                shuffle=True,
                                                drop_last=True)

    # model
    generator_f = Generator(args.channels)
    generator_r = Generator(args.channels)
    discriminator_f = Discriminator(args.channels)
    discriminator_r = Discriminator(args.channels)

    # train
    trainer = Trainer(generator_f, generator_r, discriminator_f,
                      discriminator_r, classifier, args.gpu)

    for epoch in range(args.epoch):
        trainer.train(noised_loader, raw_loader, epoch < args.epoch // 10)
        print('[{}] {}'.format(epoch, trainer), flush=True)

        snapshot = {
            'channels': args.channels,
            'model': generator_f.state_dict()
        }
        torch.save(snapshot, '{}.tmp'.format(args.file))
        os.rename('{}.tmp'.format(args.file), args.file)
示例#29
0
    def __init__(self, model=None):
        super(SaL, self).__init__(model)
        if CONFIG.SAL.FC_LAYERS[-1][0] != 2:
            raise ValueError(
                'Shuffle and Learn classifier has only 2 classes:'
                'correct order or incorrect order. Ensure last layer in '
                'config.sal.fc_layers is 2.')

        sal_classifier = Classifier(CONFIG.SAL.FC_LAYERS,
                                    CONFIG.SAL.DROPOUT_RATE)
        self.model['sal_classifier'] = sal_classifier
示例#30
0
def run(dataset, data_path, model_type, generations, populations):

    (df, features, label, categorical_features,
     sensitive_features) = prepare_data(dataset, data_path)
    X, y = process_categorical(df, features, label, categorical_features)
    (X_train, X_test, y_train, y_test) = prepare_data_split(X, y)

    # split_func = split_on_sensitive_attr(X_train)

    model = Classifier(
        dataset,
        model_type,
        X_train,
        y_train,
        X_test,
        y_test,
        features,
        sensitive_features,
    )
    model.fit()

    nsga_cfg = NSGAConfig(
        generations=generations,
        populations=populations,
        model_type=model_type,
        X_sensitive_a1=model.X_m,
    )

    X_m = model.X_m
    X_f = model.X_f
    y_m = model.y_m
    y_f = model.y_f

    X_test_m = model.X_test_m
    X_test_f = model.X_test_f
    y_test_m = model.y_test_m
    y_test_f = model.y_test_f
    try:
        run_nsga(nsga_cfg)
    except Exception as e:
        pass