Пример #1
0
def load_encoder(model_name):
    sys.path.insert(1, 'training/')
    import tools
    reload(tools)
    return tools.load_model('data/' + model_name + '_encoder.npz', 'data/' + model_name + '_dictionary.pkl',\
		#'data/GoogleNews-vectors-negative300.bin')

     'data/ja_word2vec/entity_vector.model.bin')
Пример #2
0
 def load_model(self, path):
     if not os.path.isfile(path):
         sys.stderr.write("[ERROR] file: %s is not exists.\n" % path)
         return False
     load_tree = tools.load_model(path)
     if load_tree:
         self.tree = load_tree
         return True
     return False
Пример #3
0
 def __init__(self, trainedModel=False):
     self.trained = trainedModel
     print('Word Vectors have been loaded...')
     if self.trained:
         print('LOADING TRAINED MODEL')
         self.model = tools.load_model()
     else:
         print('LOADING PRE TRAINED MODEL')
         self.model = skipthoughts.load_model()
     print('loaded model')
Пример #4
0
    def __init__(self,model_path_dict):
        """Initialization requires embedding model path
        """

        self.model_path = model_path_dict

        # compile image feature extractor
        self.vggnet = demo.build_convnet()
        self._get_image_features = theano.function(
            inputs = [self.vggnet['input'].input_var],
            outputs = L.get_output(self.vggnet['fc7'],deterministic=True),
            allow_input_downcast = True
        )

        # load up pretrained VSEM model
        self.model = tools.load_model(
            path_to_model=self.model_path['vse_model']
        )
Пример #5
0
	def testTrain(self):
		global model
		global embed_map
		main.save_vocab('test_dir', 'test_dir/dict.dat')
		self.assertTrue(os.path.exists('test_dir/dict.dat'))
		
		embed_map = tools.load_googlenews_vectors('word2vec.w2v', binary = True)
		train.trainer(list(main.load_corpus('test_dir')), saveto = 'test_dir/model', saveFreq = 10, n_words = 10) #you may want to change parameters saveFreq or n_words if you use other test corpus texts
		os.rename('test_dir/model.pkl', 'test_dir/model.npz.pkl')
		self.assertTrue(os.path.exists('test_dir/model.npz'))
		self.assertTrue(os.path.exists('test_dir/model.npz.pkl'))
		
		model = tools.load_model('test_dir/model.npz', 'test_dir/dict.dat', 'word2vec.w2v', embed_map)
		X_train, y_train = main.training_set(model, ['test_dir/train.csv'])
		
		self.assertEqual(len(X_train.shape), 2)
		self.assertEqual(len(y_train.shape), 1)
		self.assertEqual(X_train.shape[0], y_train.shape[0])
		self.assertEqual(X_train.shape[1], 4800)
Пример #6
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--csv',
                        help='Path to dataset file you would like to evaluate')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument('--model_path', help='Path to the model file.')
    parser.add_argument('--configfile', help='Path to the config file.')

    parser = parser.parse_args(args)
    configs = configparser.ConfigParser()
    configs.read(parser.configfile)

    try:
        maxside = int(configs['TRAINING']['maxside'])
        minside = int(configs['TRAINING']['minside'])
    except Exception as e:
        print(e)
        print(
            'CONFIG FILE IS INVALID. PLEASE REFER TO THE EXAMPLE CONFIG FILE AT config.txt'
        )
        sys.exit()

    if parser.csv is None:
        dataset_eval = None
        print('No validation annotations provided.')
    else:
        dataset_eval = CSVDataset(train_file=parser.csv,
                                  class_list=parser.csv_classes,
                                  transform=transforms.Compose([
                                      Normalizer(),
                                      Resizer(min_side=minside,
                                              max_side=maxside)
                                  ]))
    retinanet = load_model(parser.model_path, parser.configfile)

    mAP = csv_eval.evaluate(dataset_eval, retinanet)
    print('-----------------')
    print(mAP)
    print('-----------------')
Пример #7
0
def train(args, experiment=None, device=None):
    # ---------------------------------------
    # Definition of the hyperaparameters
    # ---------------------------------------

    if args.seed is not None:
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)
        random.seed(args.seed)

    # Loading dataset parameters
    if args.train.lower() == 'mnist':
        net = models.NNMNIST(28 * 28, 10)
        if args.beta > 0.0:
            prior = models.NNMNIST(28 * 28, 10)
            prior.eval()
        elif args.lambda_anchoring > 0.0:
            prior = deepcopy(net)
            prior.eval()
        else:
            prior = None
        net.to(device)
        if prior is not None:
            prior.to(device)
        # Load transforms
        tfms = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        full_dset = torchvision.datasets.MNIST('../../../datasets/MNIST',
                                               train=True,
                                               download=True,
                                               transform=tfms)
        prepr = lambda x: x.view(-1, 28 * 28)
    else:
        raise ValueError('Bad training dataset selected: {}'.format(
            args.train.lower()))

    # Create training and validation split
    train_dset, val_dset, _, _ = dataset.train_valid_split(
        full_dset, split_fold=10, random_seed=args.dataset_seed)
    if args.bootstrapping:
        new_mapping = np.random.choice(np.asarray(train_dset.mapping),
                                       size=train_dset.length)
        train_dset.mapping = new_mapping
    train_loader, val_loader = torch.utils.data.DataLoader(
        train_dset, batch_size=args.batch_size_train,
        shuffle=True), torch.utils.data.DataLoader(
            val_dset, batch_size=args.batch_size_val, shuffle=True)

    # We create a configuration file with all the parameters
    model_name = 'repulsive_train:{}_repulsive:{}_lambda:{}_bandwidth:{}'.format(
        args.train.lower(), args.repulsive, args.lambda_repulsive,
        args.bandwidth_repulsive)
    if args.id is not None:
        model_name = model_name + '_{}'.format(args.id)

    savepath = Path(args.save_folder)
    try:
        if not Path.exists(savepath):
            os.makedirs(savepath)

        if not Path.exists(
                savepath /
                'config.json'):  # Only create json if it does not exist
            with open(savepath / 'config.json', 'w') as fd:
                json.dump(vars(args), fd)
    except FileExistsError:
        print('File already exists')
        pass

    # If the experiment is name we save it in results directly.
    # experiment.log_parameters(vars(args))

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.set_num_threads(1)

    VAL_FREQ = 1

    optimizer = optim.Adam(net.parameters(), lr=args.lr)

    # Load the reference net
    if args.repulsive is not None:
        if args.repulsive.lower() == 'fashionmnist':
            # For the repulsive loader we don't need to split into train and validation, we can use the full set
            tfms = torchvision.transforms.Compose([
                torchvision.transforms.ToTensor(),
                torchvision.transforms.Normalize((0.2859, ), (0.3530, ))
            ])
            dset_repulsive = torchvision.datasets.FashionMNIST(
                '../../../datasets/FashionMNIST',
                train=True,
                download=False,
                transform=tfms)

            # Load the repulsive model
            raw_model = models.NNMNIST(28 * 28, 10)
            reference_net = tools.load_model(Path(args.reference_net),
                                             raw_model)
            reference_net.eval()

        else:
            raise ValueError('Bad repulsive dataset selected: {}'.format(
                args.repulsive.lower()))

        # Create repulsive sampler
        repulsive_loader = torch.utils.data.DataLoader(
            dset_repulsive, batch_size=args.batch_size_repulsive, shuffle=True)
        repulsive_sampler = sampler.repulsiveSampler(
            args.repulsive.upper(),
            dataloader=repulsive_loader,
            batch_size=args.batch_size_repulsive)

    print('Finished loading the datasets.')

    # Partial functions
    if args.repulsive is not None:
        _optimize = partial(tools.optimize,
                            bandwidth_repulsive=args.bandwidth_repulsive,
                            lambda_repulsive=args.lambda_repulsive)
    else:
        _optimize = tools.optimize

    # --------------------------------------------------------------------------------
    # Training
    # --------------------------------------------------------------------------------
    step = 0

    if args.lambda_anchoring > 0.0:
        fac_norm = compute_norm_fac(net)

    for epoch in tqdm(range(args.n_epochs), desc='epochs'):
        # Training phase
        net.train()
        _tqdm = tqdm(train_loader, desc='batch')
        # experiment.log_current_epoch(epoch)
        for j, batch_raw in enumerate(_tqdm):

            if args.repulsive is not None:
                br = repulsive_sampler.sample_batch()
                batch_repulsive = br.to(device)

            # optimization part # prepare the batch, we get images not vectors !
            x_raw, y = batch_raw
            if args.repulsive is not None:
                batch_repulsive = prepr(batch_repulsive)
            x_raw, y = prepr(x_raw), y.view(-1)
            batch = (x_raw.to(device), y.to(device))

            if args.repulsive is not None:
                kwargs = {
                    'reference_net': reference_net,
                    'batch_repulsive': batch_repulsive
                }
            elif args.beta > 0.0:
                kwargs = {'beta': args.beta, 'prior': prior}
            elif args.lambda_anchoring > 0.0:
                kwargs = {
                    'lambda_anchoring': args.lambda_anchoring,
                    'prior': prior,
                    'fac_norm': fac_norm
                }
            else:
                kwargs = {}
            info_training = _optimize(net,
                                      optimizer,
                                      batch,
                                      add_repulsive_constraint=args.repulsive
                                      is not None,
                                      **kwargs)
            if args.verbose:
                _tqdm.set_description('Epoch {}/{}, loss: {:.4f}'.format(
                    epoch + 1, args.n_epochs, info_training['loss']))

            # # Log to Comet.ml
            # for k, v in info_training.items():
            #     experiment.log_metric(k, float(v), step=step)
            step += 1

        if not Path.exists(savepath / 'models'):
            os.makedirs(savepath / 'models')

        if (epoch > 0 and epoch % args.save_freq == 0):
            model_path = savepath / 'models' / '{}_{}epochs.pt'.format(
                model_name, epoch + 1)
            if not Path.exists(model_path):
                torch.save(net.state_dict(), model_path)
            else:
                raise ValueError(
                    'Error trying to save file at location {}: File already exists'
                    .format(model_path))

        if epoch % VAL_FREQ == 0:

            # Evaluate on validation set
            xent = nn.CrossEntropyLoss()
            net.eval()
            total_val_loss, total_val_acc = 0.0, 0.0
            n_val = len(val_loader.dataset)

            for j, batch_raw in enumerate(val_loader):
                x_raw, y = batch_raw
                len_batch = x_raw.size(0)
                x_raw, y = prepr(x_raw), y.view(-1)
                x, y = x_raw.to(device), y.to(device)
                y_logit = net(x)

                # logging
                total_val_loss += (len_batch / n_val) * xent(
                    y_logit, y.view(-1)).item()
                total_val_acc += (y_logit.argmax(1)
                                  == y).float().sum().item() / n_val

            # Compute statistics
            print('Epoch {}/{}, val acc: {:.3f}, val loss: {:.3f}'.format(
                epoch + 1, args.n_epochs, total_val_acc, total_val_loss))
            # experiment.log_metric("val_accuracy", total_val_acc)
            # experiment.log_metric("val_loss", total_val_loss)

    # POST-PROCESSING
    # Save the model
    try:
        dirname = 'models'
        if not Path.exists(savepath / dirname):
            os.makedirs(savepath / dirname)
        if args.beta > 0.0:
            dirname_priors = 'priors'
            if not Path.exists(savepath / dirname_priors):
                os.makedirs(savepath / dirname_priors)

        model_path = savepath / dirname / '{}_{}epochs.pt'.format(
            model_name, epoch + 1)
        if not Path.exists(model_path):
            torch.save(net.state_dict(), model_path)
        if args.beta > 0.0:
            prior_path = savepath / dirname_priors / '{}_{}epochs.pt'.format(
                model_name, epoch + 1)
            if not Path.exists(prior_path):
                torch.save(prior.state_dict(), prior_path)
    except FileExistsError:
        print('Error trying to save file at location {}: File already exists')
Пример #8
0
import tools
import eval_sick

import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

if __name__ == '__main__':
    embed_map = tools.load_googlenews_vectors()
    model = tools.load_model(embed_map)

    eval_sick.evaluate(model, evaltest=True)
Пример #9
0
from flask import Flask, render_template, request, jsonify
from tools import Occurrences, load_model
from numpy import asarray
#--------------------------[ Load model and occurrences ]------------------------------#
# Load model
model = load_model('./model/model.json', './model/model_weights.h5')
# Initialise occurrence object
o = Occurrences()


# Wrapper function to predict value
def predict(command, user):
    global model
    global o
    result = model.predict(asarray([o.encode(str(command), str(user))]))
    return (result)


# Declare Web App
app = Flask(__name__, static_folder='assets')


@app.route('/')
def home():
    return ("""
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <link rel="stylesheet" type="text/css" href="/assets/sass/style.css">
Пример #10
0
 # Add model arguemnt
 parser.add_argument(
     '-w',
     '--weights',
     type=str,
     default='./model/model_weights.h5',
     help='Path of model_weights.h5 file',
 )
 # Add data arguemnt
 parser.add_argument('-v',
                     '--value',
                     type=str,
                     help='Value to predict',
                     required=False)
 # Add data arguemnt
 parser.add_argument('-u',
                     '--user',
                     type=str,
                     help='Command user',
                     required=False)
 # Parse arguments
 args = parser.parse_args()
 # Load model
 model = load_model(args.model, args.weights)
 # Initialise occurrence object
 o = Occurrences()
 # Predict value
 results = model.predict(asarray([o.encode(args.value, args.user)]))
 # Print results
 print(f"Suspicious: {round(results[0][0]*100, 2)}%")
 print(f"Neutral: {round(results[0][1]*100, 2)}%")
Пример #11
0
import tools, evaluation, os

# Hey Kipster!  For this to work, use a python virtualenv
# and pip install -r requirements.txt in IF-root
# you might also need to install numpy or gfortran with your os pkg manager

# First lets make sure the model kinda works
__dirname = os.path.dirname(os.path.realpath(__file__))
model = tools.load_model(__dirname + '/data/coco.npz')
evaluation.evalrank(model, data='coco', split='test')

# Now lets compute sentence vecs for something specific
example_sentences = [
    'black tie women', 'warm winter coat',
    'long dressi gown tuxedo cocktail black_ti'
]
sentence_vectors = tools.encode_sentences(model,
                                          example_sentences,
                                          verbose=True)

print sentence_vectors.shape
print sentence_vectors[0].shape
Пример #12
0
    print('{:>26}: {}'.format(k, v))
    sys.stdout.flush()

# Parameters for the results of the experiment
params = {}

# TRAIN
t_start = time.time()
solution = trainer(**train_arg)
params['time_computing'] = str(timedelta(seconds=time.time() - t_start))
for k, v in solution.items():
    params[k] = v

# TEST
n_fold = 1
model = load_model(args.save_dir, args.model_name, best=True)
print('VALIDATION:')
if n_fold < 2:
    print(
        'No need to compute n-fold validation, using training results on one fold'
    )
else:
    print('Computing', n_fold, '-fold validation')
    params['best_val_res'], params['best_val_score'] = ranking_eval_Nfold(
        model, n_fold, subset='val')
print('TEST:')
params['best_test_res'], params['best_test_score'] = ranking_eval_Nfold(
    model, n_fold, subset='test')

# WRITE RESULTS
flags = translate_flags(arg_dict)
Пример #13
0
    def __init__(self,
                 batch=16,
                 lr=0.01,
                 load_pretrain=False,
                 model=1,
                 aug=False,
                 mixup=False):
        if model == 1:
            '''
            AlexNet+DSC
            5层卷积全变为深度可分离卷积层
            '''
            self.model = Net1()
            self.m = 1
        elif model == 2:
            self.model = Inception_FPN(5)
            self.m = 2
        elif model == 3:
            self.m = 3
            self.model = Classifier()
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')
        if torch.cuda.device_count() > 1:
            print('Lets use', torch.cuda.device_count(), 'GPUs!')
            self.model = nn.DataParallel(self.model)
        self.model = self.model.to(self.device)
        self.load_pretrain = load_pretrain
        self.optimizer = Adam(self.model.parameters(),
                              lr=lr,
                              weight_decay=0.001)
        self.lambda1 = lambda epoch: 0.95**(epoch - 20)
        self.lambda2 = lambda epoch: epoch / 20
        self.scheduler = lr_scheduler.LambdaLR(self.optimizer,
                                               lr_lambda=self.lambda2)
        if self.load_pretrain == True:
            load_model(self.model, self.optimizer, self.scheduler, self.m)
            self.train_loss = ['train_loss']
            self.train_acc = ['train_acc']
            self.verify_loss = ['verify_loss']
            self.verify_acc = ['verify_acc']
            self.lr = ['learning rate']

            data = pd.read_csv('./train_data' + str(self.m) + '.csv')
            self.train_loss.extend(data['train_loss'].tolist())
            self.train_acc.extend(data['train_acc'].tolist())
            self.verify_loss.extend(data['verify_loss'].tolist())
            self.verify_acc.extend(data['verify_acc'].tolist())
            self.lr.extend(data['learning rate'].tolist())
        else:
            self.train_loss = ['train_loss']
            self.train_acc = ['train_acc']
            self.verify_loss = ['verify_loss']
            self.verify_acc = ['verify_acc']
            self.lr = ['learning rate']

            for i in self.model.parameters():
                if len(i.shape) >= 2:
                    xavier_normal_(i)
            if self.m == 3:
                for i in self.model.named_modules():
                    if isinstance(i[1], CBLR):
                        constant_(i[1].BatchNorm2d.weight, 0)
        self.mixup = mixup
        self.train_generator = Generator(batch=batch, mode='train', aug=aug)
        self.test_generator = Generator(batch=batch, mode='verify')
Пример #14
0
def load_decoder(decoder_name):
    sys.path.insert(0, 'decoding/')
    import tools
    return tools.load_model('data/' + decoder_name + '_decoder.npz',
                            'data/' + decoder_name + '_dictionary.pkl')
def main():
    """
    train and test the quality of the produced encodings by training a classifier using the encoded images
    """
    skip_training = False 
    n_components = 10
    n_epochs = 4
    # device = torch.device('cuda:0')
    device = torch.device('cpu')

    data_dir = tools.select_data_dir()
    transform = transforms.Compose([
        transforms.ToTensor(),  # Transform to tensor
        transforms.Lambda(lambda x: x * torch.randn_like(x))
    ])

    trainset = torchvision.datasets.MNIST(root=data_dir, train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

    encoder = Encoder(n_components=n_components)
    decoder = Decoder(n_components=n_components)

    encoder = encoder.to(device)
    decoder = decoder.to(device)

    # Training loop
    if not skip_training:
        en_optimizer = torch.optim.Adam(encoder.parameters(),lr=0.001)
        de_optimizer = torch.optim.Adam(decoder.parameters(),lr=0.001)

        n_epochs = 10
        for epoch in range(n_epochs):
            for i, data in enumerate(trainloader, 0):
                images, labels = data
                images, labels = images.to(device), labels.to(device)
                en_optimizer.zero_grad()
                de_optimizer.zero_grad()
                z_mu, z_logvar = encoder.forward(images)
                sample = encoder.sample(z_mu,z_logvar)
                y_mu, y_logvar = decoder.forward(sample)
                loss =loss_kl(z_mu, z_logvar) + loss_loglik(y_mu, y_logvar, images)
                loss.backward()
                en_optimizer.step() 
                de_optimizer.step()

            print('Train Epoch {}: Loss: {:.6f}'.format(epoch +1, loss.item())) 

        tools.save_model(encoder, 'vae_encoder.pth')
        tools.save_model(decoder, 'vae_decoder.pth')
    else:
        encoder = Encoder(n_components=10)
        tools.load_model(encoder, 'vae_encoder.pth', device)

        decoder = Decoder(n_components=10)
        tools.load_model(decoder, 'vae_decoder.pth', device)

    # Test the quality of the produced embeddings by classification
    print('start testing the quality of the produced embeddings by classification')
    testset = torchvision.datasets.MNIST(root=data_dir, train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)
    traincodes, trainlabels = encode(trainset, encoder)  # traincodes is (60000, 10)
    testcodes, testlabels = encode(testset, encoder)  # testcodes is (10000, 10)
    # Train a simple linear classifier

    logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', max_iter=400)
    logreg.fit(traincodes.cpu(), trainlabels.cpu())

    predicted_labels = logreg.predict(testcodes.cpu())  # (10000,)

    # Compute accuracy of the linear classifier
    accuracy = np.sum(testlabels.cpu().numpy() == predicted_labels) / predicted_labels.size
    print('Accuracy with a linear classifier: %.2f%%' % (accuracy*100))
import demo, tools, datasets

# retrieve img and text by each other
net = demo.build_convnet()
model = tools.load_model(path_to_model='./data/43/43.npz')
train = datasets.load_dataset('43', load_train=True)[0]
vectors = tools.encode_sentences(model, train[0], verbose=False)
# good cases: 10-29
print demo.retrieve_captions(model,
                             net,
                             train[0],
                             vectors,
                             './out_img.jpg',
                             k=10)
print demo.retrieve_captions(model,
                             net,
                             train[0],
                             vectors,
                             './out_img_1.jpg',
                             k=10)
print demo.retrieve_captions(model,
                             net,
                             train[0],
                             vectors,
                             './out_img_2.jpg',
                             k=10)
print demo.retrieve_captions(model,
                             net,
                             train[0],
                             vectors,
                             './out_img_3.jpg',
Пример #17
0
def load_encoder(model_name):
    sys.path.insert(0, 'training/')
    import tools
    return tools.load_model('data/' + model_name + '_encoder.npz', 'data/' + model_name + '_dictionary.pkl',\
     'data/GoogleNews-vectors-negative300.bin')
Пример #18
0
def load_encoder(model_name):
    sys.path.insert(0, 'training/')
    import tools
    return tools.load_model('data/' + model_name + '.npz', 'data/' + model_name + '_dictionary',\
     'data/codemodel')
def main(args=None):
    parser = argparse.ArgumentParser(
        description=
        'Simple script for visualizing results from a RetinaNet network using the csv dataset.'
    )

    parser.add_argument(
        '--csv',
        help='Path to file containing annotations (optional, see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument('--model_path', help='Path to model (.pt) file.')
    parser.add_argument('--configfile', help='Path to the config file.')
    parser.add_argument('--out_path',
                        help='Path to the folder where to save the images.')

    parser = parser.parse_args(args)
    configs = configparser.ConfigParser()
    configs.read(parser.configfile)

    try:
        maxside = int(configs['TRAINING']['maxside'])
        minside = int(configs['TRAINING']['minside'])
    except Exception as e:
        print(e)
        print(
            'CONFIG FILE IS INVALID. PLEASE REFER TO THE EXAMPLE CONFIG FILE AT config.txt'
        )
        sys.exit()

    if parser.csv is None:
        dataset_eval = None
        print('No validation annotations provided.')
    else:
        dataset_eval = CSVDataset(train_file=parser.csv,
                                  class_list=parser.csv_classes,
                                  transform=transforms.Compose([
                                      Normalizer(),
                                      Resizer(min_side=minside,
                                              max_side=maxside)
                                  ]))

    dataloader_val = None
    if dataset_eval is not None:
        sampler_val = AspectRatioBasedSampler(dataset_eval,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_eval,
                                    num_workers=1,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    retinanet = load_model(parser.model_path, parser.configfile)

    unnormalize = UnNormalizer()

    if not os.path.exists(parser.out_path):
        os.makedirs(parser.out_path, exist_ok=True)

    for idx, data in enumerate(dataloader_val):

        with torch.no_grad():
            st = time.time()
            if torch.cuda.is_available():
                scores, classification, transformed_anchors = retinanet(
                    data['img'].cuda().float())
            else:
                scores, classification, transformed_anchors = retinanet(
                    data['img'].float())
            print('Elapsed time: {}'.format(time.time() - st))
            idxs = np.where(scores.cpu() > 0.5)
            img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy()

            img[img < 0] = 0
            img[img > 255] = 255

            img = np.transpose(img, (1, 2, 0))

            img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_RGB2BGR)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]
                x1 = int(bbox[0])
                y1 = int(bbox[1])
                x2 = int(bbox[2])
                y2 = int(bbox[3])
                label_name = dataset_eval.labels[int(
                    classification[idxs[0][j]])]
                draw_caption(img, (x1, y1, x2, y2), label_name)
                cv2.rectangle(img, (x1, y1), (x2, y2),
                              color=(0, 0, 255),
                              thickness=2)

            cv2.imwrite(os.path.join(parser.out_path, f'image_{idx}.png'), img)
Пример #20
0
	    f = item.split(':')
	    d[int(f[0])] = float(f[1])
	bowfeats.append(d)

# model
# load entities
#labels = []
#ent = []
#with open(data_path, 'r') as f:
#    for line in f:
#	labels.append(line.split('\t')[1][:-1])
#	ent.append(line.split('\t')[0])
if not reload_vectors:
    if not reload_model:
	embed_map = tools.load_googlenews_vectors()
	model = tools.load_model(embed_map)
	with open(model_save_path, 'w') as f:
	    pkl.dump(model, f)
    else:
	model = pkl.load(open(model_save_path, 'r'))

    # process
    xp = []
    for entity in ent:
	#line = re.sub(r'[0-9]+','#'," ".join([word for word in entity.split('_') if word not in stops])).lower()
	line = " ".join([word for word in entity.split('_') if word not in stops]).lower()
	xp.append(filter(lambda x: x in string.printable, line))

    # encode new sentences
    vectors = tools.encode(model, xp)
    with open(vec_save_path, 'w') as f:
def main():
    """
    train and test the quality of the produced encodings by training a classifier using the encoded images
    """
    skip_training = False
    n_components = 10
    n_epochs = 4
    # device = torch.device('cuda:0')
    device = torch.device('cpu')

    data_dir = tools.select_data_dir()
    transform = transforms.Compose([
        transforms.ToTensor(),  # Transform to tensor
        transforms.Lambda(lambda x: x * torch.randn_like(x))
    ])

    trainset = torchvision.datasets.MNIST(root=data_dir,
                                          train=True,
                                          download=True,
                                          transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=32,
                                              shuffle=True)

    dae = DAE(n_components)
    dae.to(device)

    # Training loop
    if not skip_training:
        optimizer = torch.optim.Adam(dae.parameters(), lr=0.001)
        n_epochs = 5
        loss_method = nn.MSELoss()

        for epoch in range(n_epochs):
            for i, data in enumerate(trainloader, 0):
                images, _ = data
                noise = torch.randn(*images.shape) * 0.2
                noisy_images = images + noise
                optimizer.zero_grad()
                _, output = dae.forward(noisy_images)
                loss = loss_method(output * noisy_images, images)
                loss.backward()
                optimizer.step()

            print('Train Epoch {}: Loss: {:.6f}'.format(
                epoch + 1, loss.item()))

        tools.save_model(dae, 'dae.pth')
    else:
        device = torch.device('cpu')
        dae = DAE(n_components=10)
        tools.load_model(dae, 'dae.pth', device)

    # Test the quality of the produced embeddings by classification
    print(
        'start testing the quality of the produced embeddings by classification'
    )
    testset = torchvision.datasets.MNIST(root=data_dir,
                                         train=False,
                                         download=True,
                                         transform=transform)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=100,
                                             shuffle=False)
    traincodes, trainlabels = encode(trainset,
                                     dae)  # traincodes is (60000, 10)
    testcodes, testlabels = encode(testset, dae)  # testcodes is (10000, 10)

    # Train a simple linear classifier

    logreg = LogisticRegression(C=1e5,
                                solver='lbfgs',
                                multi_class='multinomial',
                                max_iter=200)
    logreg.fit(traincodes.cpu(), trainlabels.cpu())

    predicted_labels = logreg.predict(testcodes.cpu())  # (10000,)

    accuracy = np.sum(
        testlabels.cpu().numpy() == predicted_labels) / predicted_labels.size
    print('Accuracy with a linear classifier: %.2f%%' % (accuracy * 100))
Пример #22
0
# Loading the trained nets
if args.dataset in ['mnist', 'emnist', 'kmnist', 'notmnist']:
    base_model = models.NNMNIST(28 * 28, 10)
    reshape = lambda x: x.view(-1, 28 * 28)
else:
    raise ValueError('Can\'t load model for test dataset {}'.format(args.dataset))

all_trained_models = []
model_names = []
all_trained_priors = []
prior_names = []

for model_name in [e for e in os.listdir(p / 'models') if e[-2:] == 'pt']:
    net = deepcopy(base_model)
    net = tools.load_model(p / 'models' / model_name, net)
    net.eval()
    all_trained_models.append(net)
    model_names.append(model_name)

for prior_name in [e for e in os.listdir(p / 'priors') if e[-2:] == 'pt']:
    net = deepcopy(base_model)
    net = tools.load_model(p / 'priors' / prior_name, net)
    net.eval()
    all_trained_priors.append(net)
    prior_names.append(prior_name)


nets = all_trained_models
priors = all_trained_priors
# coding: utf-8

import vocab
import train
import tools
import numpy as np

with open("../../wikipedia_txt/result_wakati.txt") as f:
    fdata = [line.rstrip() for i, line in enumerate(f)]
print '# lines: ', len(fdata)

worddict, wordcount = vocab.build_dictionary(fdata)
vocab.save_dictionary(worddict, wordcount, "word_dict")
print '# vocab: ', len(worddict)

train.trainer(fdata, dictionary="word_dict", saveFreq=100, saveto="model", reload_=True, n_words=40000)

model = tools.load_model()
vectors = tools.encode(model, fdata, use_norm=False)
np.savez('vecs.npz', vectors)

Пример #24
0
import tools
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

if __name__ == '__main__':
    args = tools.parse_args_visualize()
    model = tools.load_model(args['load_path'])
    data, target = tools.read_data(args['data_path'])
    sns.set_style('darkgrid')
    sns.scatterplot(x=data, y=target, label='Data')
    reg_x = np.arange(min(data), max(data), 5)
    reg_y = [model.predict(i) for i in reg_x]
    plt.plot(reg_x, reg_y, color='red', label='Regression')
    plt.legend(loc='best')
    plt.savefig('plot.png')
    print('Plot saved to plot.png')
Пример #25
0
def main():

    device = torch.device('cuda:0')
    n_features = 256
    n_epochs = 40
    batch_size = 64
    skip_training = False

    # Create the transformer model
    encoder = Encoder(src_vocab_size=trainset.input_lang.n_words,
                      n_blocks=3,
                      n_features=n_features,
                      n_heads=16,
                      n_hidden=1024)
    decoder = Decoder(tgt_vocab_size=trainset.output_lang.n_words,
                      n_blocks=3,
                      n_features=n_features,
                      n_heads=16,
                      n_hidden=1024)
    encoder.to(device)
    decoder.to(device)

    # define training loop parameters
    parameters = list(encoder.parameters()) + list(decoder.parameters())
    adam = torch.optim.Adam(parameters, lr=0, betas=(0.9, 0.98), eps=1e-9)
    optimizer = NoamOptimizer(n_features, 2, 10000, adam)
    loss_method = nn.NLLLoss(ignore_index=0, reduction='mean')

    # prepare data
    data_dir = tools.select_data_dir()
    trainset = TranslationDataset(data_dir, train=True)
    trainloader = DataLoader(dataset=trainset,
                             batch_size=64,
                             shuffle=True,
                             collate_fn=collate,
                             pin_memory=True)

    # training
    if not skip_training:
        for epoch in range(n_epochs):
            loss = training_loop(encoder, decoder, optimizer, loss_method,
                                 trainloader)
            print(f'Train Epoch {epoch+1}: Loss: {loss}')

    # save and load trained model
        tools.save_model(encoder, 'tr_encoder.pth')
        tools.save_model(decoder, 'tr_decoder.pth')
    else:
        encoder = Encoder(src_vocab_size=trainset.input_lang.n_words,
                          n_blocks=3,
                          n_features=256,
                          n_heads=16,
                          n_hidden=1024)
        tools.load_model(encoder, 'tr_encoder.pth', device)

        decoder = Decoder(tgt_vocab_size=trainset.output_lang.n_words,
                          n_blocks=3,
                          n_features=256,
                          n_heads=16,
                          n_hidden=1024)
        tools.load_model(decoder, 'tr_decoder.pth', device)

    # Generate translations with the trained model

    # translate sentences from the training set
    print('Translate training data:')
    print('-----------------------------')
    for i in range(5):
        src_sentence, tgt_sentence = trainset[np.random.choice(len(trainset))]
        print(
            '>', ' '.join(trainset.input_lang.index2word[i.item()]
                          for i in src_sentence))
        print(
            '=', ' '.join(trainset.output_lang.index2word[i.item()]
                          for i in tgt_sentence))
        out_sentence = translate(encoder, decoder, src_sentence)
        print(
            '<', ' '.join(trainset.output_lang.index2word[i.item()]
                          for i in out_sentence), '\n')

    # translate sentences from the test set
    testset = TranslationDataset(data_dir, train=False)
    print('Translate test data:')
    print('-----------------------------')
    for i in range(5):
        input_sentence, target_sentence = testset[np.random.choice(
            len(testset))]
        print(
            '>', ' '.join(testset.input_lang.index2word[i.item()]
                          for i in input_sentence))
        print(
            '=', ' '.join(testset.output_lang.index2word[i.item()]
                          for i in target_sentence))
        output_sentence = translate(encoder, decoder, input_sentence)
        print(
            '<', ' '.join(testset.output_lang.index2word[i.item()]
                          for i in output_sentence), '\n')
def main():
    """
    function to train model, plot generated samples, compute training score,
    save train model, load train model, and evaluate model
    """

    # device = torch.device('cuda:0')
    device = torch.device('cpu')

    batch_size=32
    n_epochs = 15
    lambda_n = 10

    scorer = Scorer()
    scorer.to(device)
    
    nz = 10
    netG = Generator(nz=nz, ngf=128, nc=1).to(device)
    netD = Critic(nc=1, ndf=128).to(device)

    if not skip_training:
        d_optimizer = torch.optim.Adam(netD.parameters(),lr=0.0001)
        g_optimizer = torch.optim.Adam(netG.parameters(),lr=0.0001)


        for epoch in range(n_epochs):
            for i, data in enumerate(trainloader, 0):
                images, _= data
                images= images.to(device)


                netD.train()
                netD.zero_grad()
                noise = torch.randn(batch_size, nz, 1, 1, device=device)
                fake_images = netG(noise)
                d_loss = critic_loss(netD, images, fake_images)
                
                grad_penalty,x_hat = gradient_penalty(netD, images, fake_images.detach())
                
                critic_loss_total = d_loss + grad_penalty*lambda_n
                critic_loss_total.sum().backward(retain_graph=True)
                d_optimizer.step()

                netG.train()
                netG.zero_grad()
                g_loss = generator_loss(netD, fake_images)
                g_loss.backward(retain_graph=True)
                g_optimizer.step()

                
            with torch.no_grad():
            # Plot generated images
                z = torch.randn(144, nz, 1, 1, device=device)
                samples = netG(z)
                tools.plot_generated_samples(samples)

            # Compute score
                z = torch.randn(1000, nz, 1, 1, device=device)
                samples = netG(z)
                samples = (samples + 1) / 2  # Re-normalize to [0, 1]
                score = scorer(samples)
        

            print('Train Epoch {}: score {}'.format(epoch +1,score))   

        tools.save_model(netG, 'wgan_g.pth')
        tools.save_model(netD, 'wgan_d.pth')

    else:
        nz = 10
        netG = Generator(nz=nz, ngf=128, nc=1)
        netD = Critic(nc=1, ndf=128)
        
        tools.load_model(netG, 'wgan_g.pth', device)
        tools.load_model(netD, 'wgan_d.pth', device) 

        with torch.no_grad():
            z = torch.randn(2000, nz, 1, 1, device=device)
            samples = (netG(z) + 1) / 2
            score = scorer(samples)

        print(f'The trained WGAN-GP achieves a score of {score:.5f}')
Пример #27
0
from math import floor
import h5py
import numpy as np
from scipy.misc import imread, imresize

import pdb

import numpy as np

import sys

sys.path.insert(0, 'order-embedding')
import tools

model = tools.load_model('order-embedding/snapshots/order')


def encode_caption(tokens, token_to_idx, max_token_length):
    encoded = np.zeros(max_token_length, dtype=np.int32)
    for i, token in enumerate(tokens):
        if token in token_to_idx:
            encoded[i] = token_to_idx[token]
        else:
            encoded[i] = token_to_idx['<UNK>']
    return encoded


def main(args):

    # read in the data
def main():
    """
    train and test the quality of the produced encodings by training a classifier using the encoded images
    """
    args = parser.parse_args()
    if args.cuda:
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')

    data_dir = tools.select_data_dir()

    transform = transforms.Compose([
        transforms.ToTensor(),
    ])

    trainset = torchvision.datasets.MNIST(root=data_dir,
                                          train=True,
                                          download=True,
                                          transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=args.batch_size,
                                              shuffle=True)

    net = PixelCNN(n_channels=args.n_channels, kernel_size=args.kernel_size)
    net.to(device)

    if not args.skip_training:
        optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate)

        for epoch in range(args.n_epochs):
            for i, data in enumerate(trainloader, 0):
                images, _ = data
                images = images.to(device)
                net.train()
                optimizer.zero_grad()
                y = net(images)
                y = y.to(device)
                loss = loss_fn(y, images)
                loss = loss.to(device)
                loss.backward()
                optimizer.step()

            with torch.no_grad():
                samples = generate(net,
                                   n_samples=args.n_samples,
                                   device=device)
                tools.plot_generated_samples(samples)

            print('Train Epoch {}: Loss: {:.6f}'.format(
                epoch + 1, loss.item()))

        # Save the model to disk
        tools.save_model(net, '10_pixelcnn.pth')
    else:
        net = PixelCNN(n_channels=args.n_channels,
                       kernel_size=args.kernel_size)
        tools.load_model(net, '10_pixelcnn.pth', device)

    # Generate samples
    print('Generate samples with trained model')
    with torch.no_grad():
        samples = generate(net, n_samples=args.n_samples, device=device)
        tools.plot_generated_samples(samples)
def main():
    """
    train and test the quality of the produced encodings by training a classifier using the encoded images
    """

    skip_training = False 
    n_components = 10
    n_epochs = 4
    # device = torch.device('cuda:0')
    device = torch.device('cpu')

    data_dir = tools.select_data_dir()



    transform = transforms.Compose([
        transforms.ToTensor(),  # Transform to tensor
        transforms.Normalize((0.5,), (0.5,))  # Minmax normalization to [-1, 1]
    ])

    trainset = torchvision.datasets.MNIST(root=data_dir, train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

    # Create a deep autoencoder
    encoder = Encoder(n_components)
    encoder.to(device)

    decoder = Decoder(n_components)
    decoder.to(device)

    # Training loop
    if not skip_training:
        encoder_optimizer = torch.optim.Adam(encoder.parameters(),lr=0.001)
        decoder_optimizer = torch.optim.Adam(decoder.parameters(),lr=0.001)
        loss_method = nn.MSELoss()
    
        for epoch in range(n_epochs):
            for i, data in enumerate(trainloader, 0):
                images, labels = data

                encoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()
                
                encoder_output = encoder.forward(images)
                decoder_output = decoder.forward(encoder_output)


                loss = loss_method(decoder_output,images)

                
                loss.backward()
                encoder_optimizer.step() 
                decoder_optimizer.step() 
            
            print('Train Epoch {}: Loss: {:.6f}'.format(epoch +1, loss.item()))
        print('training is finished.')

        tools.save_model(encoder, 'ae_encoder.pth')
        tools.save_model(decoder, 'ae_decoder.pth')
    else:
        device = torch.device("cpu")

        encoder = Encoder(n_components=10)
        tools.load_model(encoder, 'ae_encoder.pth', device)

        decoder = Decoder(n_components=10)
        tools.load_model(decoder, 'ae_decoder.pth', device)  

    # Test the quality of the produced embeddings by classification
    print('start testing the quality of the produced embeddings by classification')
    testset = torchvision.datasets.MNIST(root=data_dir, train=False, download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False)
    traincodes, trainlabels = encode(trainset, encoder)  # traincodes is (60000, 10)
    testcodes, testlabels = encode(testset, encoder)  # testcodes is (10000, 10)  

    logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')
    logreg.fit(traincodes.cpu(), trainlabels.cpu())

    predicted_labels = logreg.predict(testcodes.cpu())  # (10000,)

    accuracy = np.sum(testlabels.cpu().numpy() == predicted_labels) / predicted_labels.size
    print('Accuracy with a linear classifier: %.2f%%' % (accuracy*100))
Пример #30
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple script for converting pytorch models to onnx.')

    parser.add_argument(
        '--out_name',
        help='Name of resulting onnx file',
        required=True,
        nargs='?',
        const=1,
        type=str,
        default='model.onnx',
    )

    parser.add_argument(
        '--model',
        help='Path to pt model state dict to be converted',
        required=True,
        nargs='?',
        const=1,
        type=str,
        default='resnet18-retinanet.pt',
    )

    parser.add_argument(
        '--configfile',
        help='Config File of the PT Model',
        required=False,
        nargs='?',
        const=1,
        type=str,
        default='config.txt',
    )

    parser = parser.parse_args(args)
    configs = configparser.ConfigParser()
    configs.read(parser.configfile)
    try:
        input_shape = json.loads(configs['MODEL']['input_shape'])
    except:
        print("CONFIG FILE DOES NOT HAVE INPUT_SHAPE")
        sys.exit()

    retinanet = load_model(parser.model, parser.configfile, no_nms=True)

    input_shape = (1, 3, input_shape[1], input_shape[2])
    onnx_path = parser.out_name

    try:
        export_onnx_model(retinanet,
                          input_shape,
                          onnx_path,
                          output_names=['regression', 'classification'])
        print('Model conversion finished')
    except Exception as e:
        print(e)
        traceback.print_exc(file=sys.stdout)
        print('Error converting')

    print(
        'Before converting model to TRT, simplify the model using onnx-simplifier'
    )
    print(
        f'The command is: python3 -m onnxsim {parser.out_name} new_model_name.onnx'
    )
    exit(0)
Пример #31
0
    'world': False,
    'description': {
        '$exists': True
    },
    'name': {
        '$exists': True
    }
}, limit=10000, projection=['description', 'name'])

words = map(lambda i: ((i['name'] + ' ' + i['description']).lower()), items)


#
# load the model
#
model = tools.load_model()

sentence_vectors = tools.encode_sentences(model, words, verbose=True)

print sentence_vectors.shape

from sklearn.neighbors import BallTree

print 'building ball tree'
tree = BallTree(sentence_vectors)

print 'finding nearest neighbor for ' + words[1]
dist, ind = tree.query(sentence_vectors[1], k=3)
print ind

print 'was ' + words[ind[0][0]]
        return THETAS[index_mid]


def evaluate(y, proba):
    y = np.array(y)
    proba = np.array(proba)
    values = []
    for theta in THETAS:
        proba_new = np.where(proba >= theta, 1, 0)
        f1 = f1_score(y, proba_new)
        values.append(f1)
    return np.array(values)


if __name__ == "__main__":
    y_test, pred_proba = load_model("data/Data_flow/tradeoff")
    pred_label = np.where(pred_proba > 0.5, 1, 0)
    # y_test, test_proba = load_model("data/Data_flow/tradeoff-backup")

    print("performance before setting threshold")
    # print(classification_report(y_test[:, :-1], pred_label[:, :-1]))
    print(classification_report(y_test, pred_label))

    F = y_test[:, 0]
    proba_F = pred_proba[:, 0]

    I = y_test[:, 1]
    proba_I = pred_proba[:, 1]

    # calculate f1 scores
    values_F = evaluate(F, proba_F)
# coding: utf-8

import tools
import numpy as np

dec = tools.load_model()
vec = np.load("../training/vecs.npz")['arr_0']

for cond in vec:
    text = tools.run_sampler(dec, cond, stochastic=True)
    print text[0]