Пример #1
0
def rest():
    config.parse_args(sys.argv)
    logging.setup("cloudemulator")

    launcher = service.process_launcher()

    server = service.WSGIService('rest', use_ssl=False,
                                 max_url_len=16384)
    launcher.launch_service(server, workers=server.workers or 1)
    launcher.wait()
Пример #2
0
def main():
    try:
        config.parse_args()
        log.setup('glance')

        server = wsgi.Server()
        server.start(config.load_paste_app('glance-api'), default_port=9292)
        server.wait()
    except exception.WorkerCreationFailure as e:
        fail(2, e)
    except RuntimeError as e:
        fail(1, e)
Пример #3
0
def pytest_funcarg__config_a(request, test_corp):
    conf = config.parse_args(raw_args = [
        '--mode',       'train',
        '--model',      'v2',
        '--embed',      '128',
        '--hidden',     '64',
        '--minbatch',   '33',
        '--lr',         '0.3',
        '--train_file', 'data/test.html'
    ])
    conf.corpus = test_corp
    return conf
Пример #4
0
def runtest(args, **kwargs):
    parser = config.create_parser('make concurrent requests')
    # receive push, -p is ideal but we have --port option
    parser.add_argument('-r', '--receive-push', action='store_true', default=False,
                        help='receive push message')
    parser.add_argument('-c', '--count', action='store', type=int, default=1,
                         help='concurrent count')
    ns = config.parse_args(args, parser = parser, **kwargs)
    case = ConcurrentCase(ns.count, ns.message.values())
    try:
        case.run(ns.address, ns.port)
    except KeyboardInterrupt:
        case.print_result()
Пример #5
0
def infer():
    args = parse_args()
    print(args)

    place = fluid.CPUPlace()
    inference_scope = fluid.Scope()

    test_valid_files = [
        os.path.join(args.test_valid_data_dir, fname)
        for fname in next(os.walk(args.test_valid_data_dir))[2]
    ]
    test_files = random.sample(test_valid_files,
                               int(len(test_valid_files) * 0.5))
    if not test_files:
        test_files = test_valid_files
    print('test files num {}'.format(len(test_files)))

    criteo_dataset = CriteoDataset()
    criteo_dataset.setup(args.vocab_dir)
    test_reader = criteo_dataset.test_reader(test_files, args.batch_size, 100)

    startup_program = fluid.framework.Program()
    test_program = fluid.framework.Program()
    cur_model_path = os.path.join(args.model_output_dir,
                                  'epoch_' + args.test_epoch, "checkpoint")

    with fluid.scope_guard(inference_scope):
        with fluid.framework.program_guard(test_program, startup_program):
            cat_feat_dims_dict = OrderedDict()
            for line in open(args.cat_feat_num):
                spls = line.strip().split()
                assert len(spls) == 2
                cat_feat_dims_dict[spls[0]] = int(spls[1])
            dcn_model = DCN(args.cross_num, args.dnn_hidden_units,
                            args.l2_reg_cross, args.use_bn, args.clip_by_norm,
                            cat_feat_dims_dict, args.is_sparse)
            dcn_model.build_network(is_test=True)

            exe = fluid.Executor(place)
            feeder = fluid.DataFeeder(
                feed_list=dcn_model.data_list, place=place)

            exe.run(startup_program)
            fluid.load(fluid.default_main_program(), cur_model_path)

            for var in dcn_model.auc_states:  # reset auc states
                set_zero(var.name, scope=inference_scope, place=place)

            loss_all = 0
            num_ins = 0
            for batch_id, data_test in enumerate(test_reader()):
                loss_val, auc_val = exe.run(test_program,
                                            feed=feeder.feed(data_test),
                                            fetch_list=[
                                                dcn_model.avg_logloss.name,
                                                dcn_model.auc_var.name
                                            ])
                # num_ins += len(data_test)
                num_ins += 1
                loss_all += loss_val
                logger.info('TEST --> batch: {} loss: {} auc_val: {}'.format(
                    batch_id, loss_all / num_ins, auc_val))

            print(
                'The last log info is the total Logloss and AUC for all test data. '
            )
Пример #6
0
import itertools
from BiLSTM_CRF import BiLSTM_CRF_S, BiLSTM_CRF_L

from config import parse_args
import torch.nn as nn
from evaluate import evaluate, evaluate_with_perl, predict_write, evaluate_by_file


def complete_dict(label2idx, add_label2idx):
    for label in add_label2idx:
        if label not in label2idx:
            label2idx[label] = len(label2idx)


if __name__ == '__main__':
    args = parse_args()

    model_name = '{:}_{:}_{:}_{:.2f}_{:}_{:}_{:.3f}'.format(
        args.batch_size, args.hidden_size, args.embedding_dim, args.drop_out,
        args.layers, args.update, args.lr)
    args.checkpoint_dir = os.path.join(args.checkpoint_dir, model_name)
    args.log_path = os.path.join(args.checkpoint_dir, 'log')
    # 不然最后面会带一个\n!
    args.load_check_point = args.load_check_point.strip()

    # 创建checkpoint目录
    if not os.path.exists(args.checkpoint_dir):
        print('Making dir %s' % args.checkpoint_dir)
        os.makedirs(args.checkpoint_dir)

    if args.gpu >= 0 and torch.cuda.is_available():
Пример #7
0
import uvicorn

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from routes import items

import config

from constants import *

config.parse_args()
app = FastAPI(
    title="API",
    description="API boilerplate",
    version="1.0.0",
    openapi_tags=API_TAGS_METADATA,
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

app.include_router(items.router)


@app.get("/")
async def root():
Пример #8
0
import tensorflow as tf
from tqdm import tqdm

from config import parse_args, FLAGS
from tfsolver import TFSolver
from dataset import DatasetFactory
from network_completion import CompletionResnet
from ocnn import l2_regularizer
sys.path.append('..')
from libs import octree_scan, octree_batch, normalize_points


# flags
FLAGS.DATA.train.camera = '_' # used to generate partial scans
FLAGS.MODEL.skip_connections = True
FLAGS = parse_args()


# the dataset
class NormalizePoints:
  def __call__(self, points):
    radius = 64.0
    center = (64.0, 64.0, 64.0)
    points = normalize_points(points, radius, center)
    return points


class PointDataset:
  def __init__(self, parse_example, normalize_points, transform_points, points2octree):
    self.parse_example = parse_example
    self.normalize_points = normalize_points
Пример #9
0
def train():
    opt = parse_args()

    os.makedirs("images/%s" % (opt.dataset), exist_ok=True)
    os.makedirs("checkpoints/%s" % (opt.dataset), exist_ok=True)

    cuda = True if torch.cuda.is_available() else False
    FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    # get dataloader
    train_loader = celeba_loader(opt, mode='train')
    val_loader = celeba_loader(opt, mode='val')

    # Dimensionality
    c_dim = len(opt.selected_attrs)

    # Initialize generator and discriminator
    generator = Generator(opt.channels, opt.residual_blocks, c_dim)
    discriminator = Discriminator(opt.channels, opt.img_height, c_dim)

    # Initialize weights
    generator.apply(weights_init_normal)
    discriminator.apply(weights_init_normal)

    # Loss function
    cycle_loss = torch.nn.L1Loss()

    if cuda:
        generator = generator.cuda()
        discriminator = discriminator.cuda()
        cycle_loss.cuda()

    # Optimizers
    optimizer_G = torch.optim.Adam(generator.parameters(),
                                   lr=opt.lr,
                                   betas=(opt.b1, opt.b2))
    optimizer_D = torch.optim.Adam(discriminator.parameters(),
                                   lr=opt.lr,
                                   betas=(opt.b1, opt.b2))

    # ------------
    #  Training
    # ------------

    prev_time = time.time()
    for epoch in range(opt.epochs):
        for i, (imgs, labels) in enumerate(train_loader):

            # Model inputs
            imgs = Variable(imgs.type(FloatTensor))
            labels = Variable(labels.type(FloatTensor))

            # Sample label as generator inputs and Generate fake batch of images
            sampled_c = Variable(
                FloatTensor(np.random.randint(0, 2, (imgs.size(0), c_dim))))
            fake_imgs = generator(imgs, sampled_c)

            # ----------------------
            # Train Discriminator
            # ----------------------

            optimizer_D.zero_grad()

            real_validity, pred_cls = discriminator(imgs)
            fake_validity, _ = discriminator(fake_imgs.detach())
            gradient_penalty = compute_gradient_penalty(
                discriminator, imgs.data, fake_imgs.data, FloatTensor)

            d_adv_loss = -torch.mean(real_validity) + torch.mean(
                fake_validity) + opt.lambda_gp * gradient_penalty
            d_cls_loss = criterion_cls(pred_cls, labels)
            D_loss = d_adv_loss + opt.lambda_cls * d_cls_loss

            D_loss.backward()
            optimizer_D.step()

            # -----------------------------
            # Train Generators
            # -----------------------------
            optimizer_G.zero_grad()

            if i % opt.n_critic == 0:
                gen_imgs = generator(imgs, sampled_c)
                recov_imgs = generator(gen_imgs, labels)

                fake_validity, pred_cls = discriminator(gen_imgs)

                g_adv_loss = -torch.mean(fake_validity)
                g_cls_loss = criterion_cls(pred_cls, sampled_c)
                g_rec_loss = cycle_loss(recov_imgs, imgs)
                G_loss = g_adv_loss + opt.lambda_cls * g_cls_loss + opt.lambda_rec * g_rec_loss

                G_loss.backward()
                optimizer_G.step()

                # ------------------
                # Log Information
                # ------------------

                batches_done = epoch * len(train_loader) + i
                batches_left = opt.epochs * len(train_loader) - batches_done
                time_left = datetime.timedelta(seconds=batches_left *
                                               (time.time() - prev_time))
                prev_time = time.time()

                print(
                    "[Epoch %d/%d] [Batch %d/%d] [D loss: %f, aux: %f] [G loss: %f, aux: %f, cycle: %f] ETA: %s"
                    % (epoch, opt.epochs, i, len(train_loader), D_loss.item(),
                       d_cls_loss.item(), G_loss.item(), g_cls_loss.item(),
                       g_rec_loss, time_left))

                if batches_done % opt.sample_interval == 0:
                    save_sample(opt.dataset, val_loader, batches_done,
                                generator, FloatTensor)

                if batches_done % opt.checkpoint_interval == 0:
                    torch.save(
                        Generator.state_dict(),
                        "checkpoints/%s/G_%d.pth" % (opt.dataset, epoch))

    torch.save(Generator.state_dict(),
               "checkpoints/%s/shared_E_done.pth" % opt.dataset)
    print("Training Process has been Done!")
Пример #10
0
import requests
import logging
import config as c

opts = c.parse_args([c.DBS])
db = opts.db

list_of_tweets = []


def query(url):
    r = requests.get(url)
    if r.status_code != 200:
        return True
    else:
        logging.info("Tweet still exists")


def read_database(db):
    cur = db.getTweets()
    for tweet in cur:
        list_of_tweets.append(tweet)
        logging.info(tweet)
    return list_of_tweets


def check_tweet():
    for tweet in read_database(db):
        if query(tweet[3]) is True:
            db.markDeleted(tweet[4])
Пример #11
0
def main():

    args = parse_args()
    args.pretrain = False
    print("Using GPU: {}".format(args.local_rank))
    root_path = 'exps/exp_{}'.format(args.exp)
    if args.local_rank == 0 and not os.path.exists(root_path):
        os.mkdir(root_path)
        os.mkdir(os.path.join(root_path, "log"))
        os.mkdir(os.path.join(root_path, "model"))

    base_lr = args.lr  # base learning rate
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    train_dataset, val_dataset = build_dataset(args.dataset, args.data_root,
                                               args.train_list)
    args.world_size = len(args.gpu.split(","))
    if args.world_size > 1:
        os.environ['MASTER_PORT'] = args.port
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group('nccl')
        device = torch.device('cuda:{}'.format(args.local_rank))
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset,
            num_replicas=len(args.gpu.split(",")),
            rank=args.local_rank)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               sampler=train_sampler,
                                               num_workers=args.num_workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             num_workers=args.num_workers,
                                             pin_memory=True)

    model = VNet(args.n_channels, args.n_classes).cuda(args.local_rank)

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=0.0005)
    #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.7)
    if args.world_size > 1:
        model = DDP(model,
                    device_ids=[args.local_rank],
                    output_device=args.local_rank,
                    find_unused_parameters=True)

    model.train()
    print("Loaded weights")

    logger = Logger(root_path)
    saver = Saver(root_path)

    for epoch in range(args.start_epoch, args.epochs):
        train(model, train_loader, optimizer, logger, args, epoch)
        validate(model, val_loader, optimizer, logger, saver, args, epoch)
        adjust_learning_rate(args, optimizer, epoch)
Пример #12
0
def main():
    global args
    args = parse_args()
    # global logger
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter("[%(asctime)s] %(levelname)s:%(name)s:%(message)s")
    # file logger
    fh = logging.FileHandler(os.path.join(args.save, args.expname)+'.log', mode='w')
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    # console logger
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    # argument validation
    args.cuda = args.cuda and torch.cuda.is_available()
    device = torch.device("cuda:0" if args.cuda else "cpu")
    if args.sparse and args.wd != 0:
        logger.error('Sparsity and weight decay are incompatible, pick one!')
        exit()
    logger.debug(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    train_dir = os.path.join(args.data, 'train/')
    test_dir = os.path.join(args.data, 'test/')

    # get vocab object from vocab file previously written
    vocab_toks = Vocab(filename=os.path.join(args.data, 'vocab_toks.txt'), data=[Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD])
    vocab_chars = Vocab(filename=os.path.join(args.data, 'vocab_chars.txt'))
    vocab_pos = Vocab(filename=os.path.join(args.data, 'vocab_pos.txt'))
    vocab_rels = Vocab(filename=os.path.join(args.data, 'vocab_rels.txt'))

    vocab_output = Vocab(filename=os.path.join(args.data, 'vocab_output.txt'))

    # Set number of classes based on vocab_output
    args.num_classes = vocab_output.size()

    logger.debug('==> LC-QUAD vocabulary toks size : %d ' % vocab_toks.size())
    logger.debug('==> LC-QUAD vocabulary chars size : %d ' % vocab_chars.size())
    logger.debug('==> LC-QUAD vocabulary pos size : %d ' % vocab_pos.size())
    logger.debug('==> LC-QUAD vocabulary rels size : %d ' % vocab_rels.size())
    logger.debug('==> LC-QUAD output vocabulary size : %d ' % vocab_output.size())

    # load LC_QUAD dataset splits
    train_file = os.path.join(args.data, 'pth/lc_quad_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = LC_QUAD_Dataset(train_dir, vocab_toks, vocab_pos, vocab_rels, args.num_classes)
        torch.save(train_dataset, train_file)
    logger.debug('==> Size of train data   : %d ' % len(train_dataset))

    test_file = os.path.join(args.data, 'pth/lc_quad_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = LC_QUAD_Dataset(test_dir, vocab_toks, vocab_pos, vocab_rels, args.num_classes)
        torch.save(test_dataset, test_file)
    logger.debug('==> Size of test data    : %d ' % len(test_dataset))

    criterion = nn.NLLLoss()

    input_dim = EMBEDDING_DIM + vocab_pos.size() + vocab_rels.size() + vocab_chars.size()

    model = TreeLSTM(
        input_dim,
        args.mem_dim,
        args.num_classes,
        criterion,
        vocab_output,
        dropout=True
    )

    toks_embedding_model = nn.Embedding(vocab_toks.size(), EMBEDDING_DIM)
    chars_embedding_model = nn.Embedding(vocab_chars.size(), vocab_chars.size())
    pos_embedding_model = nn.Embedding(vocab_pos.size(), vocab_pos.size())
    rels_embedding_model = nn.Embedding(vocab_rels.size(), vocab_rels.size())

    toks_emb = generate_embeddings(vocab_toks, os.path.join(args.data, 'pth/lc_quad_toks_embed.pth'))
    chars_emb = generate_one_hot_vectors(vocab_chars)
    pos_emb = generate_one_hot_vectors(vocab_pos)
    rels_emb = generate_one_hot_vectors(vocab_rels)

    # plug these into embedding matrix inside model
    chars_embedding_model.state_dict()['weight'].copy_(chars_emb)
    toks_embedding_model.state_dict()['weight'].copy_(toks_emb)
    pos_embedding_model.state_dict()['weight'].copy_(pos_emb)
    rels_embedding_model.state_dict()['weight'].copy_(rels_emb)

    model.to(device), criterion.to(device)
    if args.optim == 'adam':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()), lr=args.lr, weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad([
                {'params': model.parameters(), 'lr': args.lr}
            ], lr=args.lr, weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     model.parameters()), lr=args.lr, weight_decay=args.wd)

    metrics = Metrics()
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.25)

    # create trainer object for training and testing
    trainer = Trainer(args, model, {'toks': toks_embedding_model, 'pos': pos_embedding_model, 'rels': rels_embedding_model, 'chars': chars_embedding_model}, {'toks': vocab_toks, 'chars': vocab_chars, 'output': vocab_output}, criterion, optimizer)
    file_name = "analysis/expname={},input_dim={},mem_dim={},lr={},emblr={},wd={},epochs={}".format(args.expname, input_dim, args.mem_dim, args.lr, args.emblr, args.wd, args.epochs)

    for epoch in range(args.epochs):
        print("\n" * 5)
        scheduler.step()

        # Train Model
        trainer.train(train_dataset)

        # Test Model on Training Dataset
        train_loss, train_pred = trainer.test(train_dataset)
        train_acc = metrics.accuracy_score(train_pred, train_dataset.labels, vocab_output)

        print('==> Train loss   : %f \t' % train_loss, end="")
        print('Epoch ', str(epoch + 1), 'train percentage ', train_acc)
        write_analysis_file(file_name, epoch, train_pred, train_dataset.labels, "train_acc", train_acc, train_loss, vocab_output)

        # Test Model on Testing Dataset
        test_loss, test_pred = trainer.test(test_dataset)
        test_acc = metrics.accuracy_score(test_pred, test_dataset.labels, vocab_output)

        print('==> Test loss   : %f \t' % test_loss, end="")
        print('Epoch ', str(epoch + 1), 'test percentage ', test_acc)
        write_analysis_file(file_name, epoch, test_pred, test_dataset.labels, "test_acc", test_acc, test_loss, vocab_output)

        checkpoint_filename = '%s.pt' % os.path.join(args.save, args.expname + ',epoch={},test_acc={}'.format(epoch + 1, test_acc))
        checkpoint = {'trainer': trainer, 'test_accuracy': test_acc, 'scheduler': scheduler}
        torch.save(checkpoint, checkpoint_filename)
Пример #13
0
    if updates is not None:
        checkpoint_path = "checkpoints_{}_{}.pt".format(epoch, updates)
    else:
        checkpoint_path = "checkpoints_{}.pt".format(epoch)
    checkpoint_path = os.path.join(args.save, checkpoint_path)
    torch.save(model.state_dict(), checkpoint_path)
    ## save best 
    if score > best_scores:
        save_checkpoint.best_scores = score
        logging.info("save best checkpoint ::best scores {}, epoch {} updates {}".format(score, epoch, updates))
        checkpoint_path = os.path.join(args.save, "checkpoints_best.pt")
        torch.save(model.state_dict(), checkpoint_path)


if __name__ == "__main__":
    args = config.parse_args()
    main(args)













Пример #14
0
def main():
    config.parse_args(sys.argv)
    opts.register_opts()
    conf = cfg.CONF
    app.run(host=conf.server.addr, port=conf.server.port)
import os
import os.path as osp
import sys

import numpy as np
from scipy.io import loadmat

sys.path.insert(0, '../')
import config as cfg

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--mat_dir', default='slices_mat_64x64', type=str)
    parser.add_argument('--npy_dir', default='slices_npy_64x64', type=str)
    args = cfg.parse_args(parser)
    args.mat_path = osp.join(args.data_root, args.mat_dir)
    args.npy_path = osp.join(args.data_root, args.npy_dir)

    for date in sorted(os.listdir(args.mat_path)):
        date_mat_path = osp.join(args.mat_path, date)
        date_npy_path = osp.join(args.npy_path, date)

        for session in sorted(os.listdir(date_mat_path)):
            session_mat_path = osp.join(date_mat_path, session)
            session_npy_path = osp.join(date_npy_path, session)
            if not osp.isdir(session_npy_path):
                os.makedirs(session_npy_path)

            for file in sorted(os.listdir(session_mat_path)):
                file_mat_path = osp.join(session_mat_path, file)
Пример #16
0
def main():

    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.enabled = True

    args = parse_args()
    print("Model input size: ", args.model_input_size)
    print("Batch size: ", args.batch_size)
    print('Arch: ', args.arch)
    print('Optimizer: ', args.optim)
    print('Weighted loss: ', args.use_weighted_loss)

    dir_path = os.path.dirname(__file__)

    print('Loading dataset and dataloader..')
    train_percentage = args.train_percentage
    if args.validate and args.train_percentage == 1:
        train_percentage = 0.9
        print(
            'Warning: train percentage was given 1 with validation enabled, train percentage dropped to 0.9'
        )
    train_set, train_loader = data_fact.get_dataloader(
        args, 'train', train_percentage=train_percentage)
    if args.validate:
        val_set, val_loader = data_fact.get_dataloader(
            args, 'val', train_percentage=train_percentage)
    if args.test:
        test_set, test_loader = data_fact.get_dataloader(args, 'test')
    num_classes = len(train_set.classes)

    class_weights = [1.0] * num_classes
    if args.use_weighted_loss:
        class_weights = train_set.class_weights
        print("Class weights: ", class_weights)
    class_weights = torch.Tensor(class_weights)

    criterion = nn.CrossEntropyLoss(class_weights)
    if args.use_cuda:
        criterion = criterion.cuda()

    best_perf1 = 0
    best_perf5 = 0
    begin_epoch = 0
    best_epoch = 0
    state_dict = None  # won't be None if resuming from a trained model
    optimizer_dict = None  # won't be None if resuming from a trained model
    scheduler_steps = 2  # [30, 60, 90]
    scheduler_decay = 0.9  # 0.1

    if args.resume_path:
        print('Loading finetuned model from {}..', args.resume_path)
        checkpoint = torch.load(args.resume_path)
        begin_epoch = checkpoint['epoch']
        best_epoch = begin_epoch
        # best_epoch = checkpoint['best_epoch']
        best_perf1 = checkpoint['perf1']
        # best_perf5 = checkpoint['perf5']
        args.arch = checkpoint['arch']
        num_classes = checkpoint['num_classes']
        state_dict = checkpoint['state_dict']
        optimizer_dict = checkpoint['optimizer']
        print('Begin epoch: ', begin_epoch)
        print('Best Acc@1 at epoch {}: {}'.format(best_epoch, best_perf1))
        # scheduler.load_state_dict(checkpoint['scheduler'])

    model = model_factory.generate_model(args.arch, num_classes, state_dict,
                                         args.use_cuda)
    optimizer = get_optimizer(args, model, optimizer_dict)
    print('Learning rate: {:1.5f}', optimizer.param_groups[0]['lr'])

    if args.train:
        for epoch in range(begin_epoch, args.num_epochs):
            print('Epoch: {} / {}'.format(epoch + 1, args.num_epochs))

            perf_indicator1, perf_indicator5 = train_epoch(
                epoch, train_loader, model, criterion, optimizer,
                args.use_cuda)

            if args.validate:
                perf_indicator1, perf_indicator5 = validate_epoch(
                    val_loader, model, criterion, args.use_cuda)

            if perf_indicator1 >= best_perf1:
                best_perf1 = perf_indicator1
                best_perf5 = perf_indicator5
                best_epoch = epoch

                checkpoint_file = '{}_{}_{}_{}{}{}'.format(
                    args.model_input_size, args.arch, args.optim,
                    args.batch_size, '_subset' if args.subset_finetune else '',
                    '_weightedloss' if args.use_weighted_loss else '')

                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'best_epoch': best_epoch + 1,
                        'perf1': best_perf1,
                        'perf5': best_perf5,
                        'arch': args.arch,
                        'num_classes': model.num_classes,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        # 'scheduler': scheduler.state_dict(),
                    },
                    dir_path,
                    is_best=True,
                    filename=checkpoint_file)

            if (epoch + 1) % 5 == 0:  # save model every 5 epochs
                checkpoint_file = 'Epoch{}_{}_{}_{}_{}{}{}'.format(
                    epoch + 1, args.model_input_size, args.arch, args.optim,
                    args.batch_size, '_subset' if args.subset_finetune else '',
                    '_weightedloss' if args.use_weighted_loss else '')

                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'best_epoch': best_epoch + 1,
                        'perf1': best_perf1,
                        'perf5': best_perf5,
                        'arch': args.arch,
                        'num_classes': model.num_classes,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        # 'scheduler': scheduler.state_dict(),
                    },
                    dir_path,
                    filename=checkpoint_file)

            print('Epoch {} perf acc@1: {}, perf acc@5: {}'.format(
                epoch + 1, perf_indicator1, perf_indicator5))
            print('Best perf acc@1: {}, perf acc@5: {} at epoch {}'.format(
                best_perf1, best_perf5, best_epoch + 1))
            # scheduler.step(perf_indicator1)
            if epoch + 1 < 100:
                adjust_learning_rate(optimizer,
                                     epoch + 1,
                                     args,
                                     steps=scheduler_steps,
                                     dec_rate=scheduler_decay)

    if args.test:
        test_cassava(test_loader, model, train_set.classes, args)
Пример #17
0
def main():

    args = parse_args()
    args.pretrain = True
    print("Using GPU: {}".format(args.local_rank))

    base_lr = args.lr  # base learning rate
    batch_size = 1
    max_iterations = 20000

    cell_size = 96  # size of volume we crop patch from
    patch_size = 64
    puzzle_config = 3  # 2 or 3 for 2X2X2 or 3X3X3 puzzle
    puzzle_num = puzzle_config**3
    feature_len = 256  #
    iter_num = 0
    sr_feature_size = 32
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    train_dataset, val_dataset = build_dataset(args)
    args.world_size = len(args.gpu.split(","))
    if args.world_size > 1:
        os.environ['MASTER_PORT'] = args.port
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group('nccl')
        device = torch.device('cuda:{}'.format(args.local_rank))
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset,
            num_replicas=len(args.gpu.split(",")),
            rank=args.local_rank)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               sampler=train_sampler,
                                               num_workers=args.num_workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             num_workers=args.num_workers,
                                             pin_memory=True)

    model = VNet(args.n_channels, args.n_classes, input_size=64,
                 pretrain=True).cuda(args.local_rank)
    model_ema = VNet(args.n_channels,
                     args.n_classes,
                     input_size=64,
                     pretrain=True).cuda(args.local_rank)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=0.9,
                                weight_decay=0.0005)
    #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.7)
    parallel_state_dict = torch.load(args.load_path)['state_dict']
    new_state_dict = {}
    for key in parallel_state_dict.keys():
        new_state_dict[key[7:]] = parallel_state_dict[key]

    model.load_state_dict(new_state_dict)
    model.eval()
    print("Loaded weights")
    print("Using Dataset: {}".format(type(train_dataset)))

    features = []
    for i, batch in enumerate(tqdm(train_loader)):
        volume = batch['image'].cuda(args.local_rank, non_blocking=True)
        volume = volume.view((-1, ) + volume.shape[2:])

        with torch.no_grad():
            q = model(volume, pretrain=True)

        features.append(q)
        if i > 100:
            break
    features = torch.cat(features, 0)

    pickle.dump(features.cpu().numpy(), open("features.pkl", 'wb'))
def main():
    global args
    args = parse_args()
    # global logger
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter("[%(asctime)s] %(levelname)s:%(name)s:%(message)s")
    # file logger
    fh = logging.FileHandler(os.path.join(args.save, args.expname)+'.log', mode='w')
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    # console logger
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    # argument validation
    args.cuda = args.cuda and torch.cuda.is_available()
    if args.sparse and args.wd != 0:
        logger.error('Sparsity and weight decay are incompatible, pick one!')
        exit()
    logger.debug(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    sick_vocab_file = os.path.join(args.data, 'sick.vocab')
    if not os.path.isfile(sick_vocab_file):
        token_files_b = [os.path.join(split, 'b.toks') for split in [train_dir, dev_dir, test_dir]]
        token_files_a = [os.path.join(split, 'a.toks') for split in [train_dir, dev_dir, test_dir]]
        token_files = token_files_a + token_files_b
        sick_vocab_file = os.path.join(args.data, 'sick.vocab')
        build_vocab(token_files, sick_vocab_file)

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=sick_vocab_file, data=[Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD])
    logger.debug('==> SICK vocabulary size : %d ' % vocab.size())

    # load SICK dataset splits
    train_file = os.path.join(args.data, 'sick_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SICKDataset(train_dir, vocab, args.num_classes)
        torch.save(train_dataset, train_file)
    logger.debug('==> Size of train data   : %d ' % len(train_dataset))
    dev_file = os.path.join(args.data, 'sick_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SICKDataset(dev_dir, vocab, args.num_classes)
        torch.save(dev_dataset, dev_file)
    logger.debug('==> Size of dev data     : %d ' % len(dev_dataset))
    test_file = os.path.join(args.data, 'sick_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SICKDataset(test_dir, vocab, args.num_classes)
        torch.save(test_dataset, test_file)
    logger.debug('==> Size of test data    : %d ' % len(test_dataset))

    # initialize model, criterion/loss_function, optimizer
    model = SimilarityTreeLSTM(
                vocab.size(),
                args.input_dim,
                args.mem_dim,
                args.hidden_dim,
                args.num_classes,
                args.sparse,
                args.freeze_embed)
    criterion = nn.KLDivLoss()
    if args.cuda:
        model.cuda(), criterion.cuda()
    if args.optim == 'adam':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.data, 'sick_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:
        # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(os.path.join(args.glove, 'glove.840B.300d'))
        logger.debug('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
        emb = torch.Tensor(vocab.size(), glove_emb.size(1)).normal_(-0.05, 0.05)
        # zero out the embeddings for padding and other special words if they are absent in vocab
        for idx, item in enumerate([Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD, Constants.EOS_WORD]):
            emb[idx].zero_()
        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(word)]
        torch.save(emb, emb_file)
    # plug these into embedding matrix inside model
    if args.cuda:
        emb = emb.cuda()
    model.emb.weight.data.copy_(emb)

    # create trainer object for training and testing
    trainer = Trainer(args, model, criterion, optimizer)

    best = -float('inf')
    for epoch in range(args.epochs):
        train_loss             = trainer.train(train_dataset)
        train_loss, train_pred = trainer.test(train_dataset)
        dev_loss, dev_pred     = trainer.test(dev_dataset)
        test_loss, test_pred   = trainer.test(test_dataset)

        train_pearson = metrics.pearson(train_pred, train_dataset.labels)
        train_mse = metrics.mse(train_pred, train_dataset.labels)
        logger.info('==> Epoch {}, Train \tLoss: {}\tPearson: {}\tMSE: {}'.format(epoch, train_loss, train_pearson, train_mse))
        dev_pearson = metrics.pearson(dev_pred, dev_dataset.labels)
        dev_mse = metrics.mse(dev_pred, dev_dataset.labels)
        logger.info('==> Epoch {}, Dev \tLoss: {}\tPearson: {}\tMSE: {}'.format(epoch, dev_loss, dev_pearson, dev_mse))
        test_pearson = metrics.pearson(test_pred, test_dataset.labels)
        test_mse = metrics.mse(test_pred, test_dataset.labels)
        logger.info('==> Epoch {}, Test \tLoss: {}\tPearson: {}\tMSE: {}'.format(epoch, test_loss, test_pearson, test_mse))

        if best < test_pearson:
            best = test_pearson
            checkpoint = {
                'model': trainer.model.state_dict(), 
                'optim': trainer.optimizer,
                'pearson': test_pearson, 'mse': test_mse,
                'args': args, 'epoch': epoch
                }
            logger.debug('==> New optimum found, checkpointing everything now...')
            torch.save(checkpoint, '%s.pt' % os.path.join(args.save, args.expname))
Пример #19
0
    close failed: [Errno 9] Bad file descriptor
    '''





if __name__ == '__main__':
    parser = config.default_parser(defaults)
    parser.add_option("--profile", default="",
        dest="profile", help="unit to profile doctest [default: %default]")
    parser.add_option('--psyco', dest='psyco', default='',
            help="specialized python compiler for speed without debugging")
    
    import sys
    (options, args) = config.parse_args(parser, sys.argv)
    configuration.set(options.__dict__)
    configuration.subprocess_gateway = eval(configuration.subprocess_gateway)
    configuration.setup_client = eval(configuration.setup_client)
    configuration.globe_class = eval(configuration.globe_class)
    config.setup_logging(configuration.verbose)

    if setup_flash_master == configuration.setup_client:
        # TODO:  Master client class
        set_property = slave_set_property
        dispatch_event = slave_dispatch_event
        mouse_down_and_sleep = slave_mouse_down_and_sleep
        mouse_down_and_news = slave_mouse_down_and_news

    #from optparse import OptionParser
    #parser = OptionParser()
Пример #20
0
def segment_data(dfs, col_names):
    """Segment the given dataframes into EDUs, add the EDUs into the dataframes and return"""
    args = parse_args()
    np.random.seed(args.seed)
    tf.set_random_seed(args.seed)

    # Logging
    logger = logging.getLogger("SegEDU")
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    console_handler = logging.StreamHandler()
    console_handler.setLevel(logging.INFO)
    console_handler.setFormatter(formatter)
    logger.addHandler(console_handler)

    # Loading
    rst_data = RSTData()
    logger.info('Loading vocab...')
    with open(args.word_vocab_path, 'rb') as fin:
        word_vocab = pickle.load(fin)
        logger.info('Word vocab size: {}'.format(word_vocab.size()))
    rst_data.word_vocab = word_vocab
    logger.info('Loading the model...')
    model = AttnSegModel(args, word_vocab)
    model.restore('best', args.model_dir)
    if model.use_ema:
        model.sess.run(model.ema_backup_op)
        model.sess.run(model.ema_assign_op)

    spacy_nlp = spacy.load('en', disable=['parser', 'ner', 'textcat'])

    for df, col_name in zip(dfs, col_names):
        edu_results = {}
        for idx, row in tqdm(df.iterrows(), total=len(df.index)):
            try:
                # logger.info('Segmenting example {}...'.format(idx))
                raw_sents = [row[col_name]]
                samples = []
                for sent in spacy_nlp.pipe(raw_sents, batch_size=1000, n_threads=5):
                    samples.append({'words': [token.text for token in sent],
                                    'words_ws': [token.text_with_ws for token in sent],
                                    'edu_seg_indices': []})
                rst_data.test_samples = samples
                data_batches = rst_data.gen_mini_batches(args.batch_size, test=True, shuffle=False)

                edus = []
                for batch in data_batches:
                    batch_pred_segs = model.segment(batch)
                    for sample, pred_segs in zip(batch['raw_data'], batch_pred_segs):
                        one_edu_words = []
                        for word_idx, word in enumerate(sample['words_ws']):
                            if word_idx in pred_segs:
                                edus.append(''.join(one_edu_words))
                                one_edu_words = []
                            one_edu_words.append(word)
                        if one_edu_words:
                            edus.append(''.join(one_edu_words))

                edu_results[idx] = edus
            except:
                logger.info("Crashed while segmenting {}.".format(idx))
                edu_results[idx] = []
                continue

        df['edus'] = pd.Series(edu_results)
    merged = pd.concat(dfs).reset_index(drop=True)
    merged = merged[merged['edus'].map(lambda x: len(x)) > 0]  # Remove rows with unsegmentable EDUs
    return merged
Пример #21
0
def main():
    # export CUDA_VISIBLE_DEVICES=3
    global args
    args = parse_args()
    # global logger
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter("[%(asctime)s] %(levelname)s:%(name)s:%(message)s")
    
    # file logger
    fh = logging.FileHandler(os.path.join(args.save, args.expname)+'.log', mode='w')
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    
    # console logger
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    
    # argument validation
    args.cuda = False
    device = torch.device("cuda:0" if args.cuda else "cpu")
    if args.sparse and args.wd != 0:
        logger.error('Sparsity and weight decay are incompatible, pick one!')
        exit()
    logger.debug(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)
    
    # some settings
    if args.fine_grain:
        args.num_classes = 5
    else:
        args.num_classes = 3


    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # 准备目录
    vocab_file = os.path.join(args.data, 'vocab-cased.txt') # use vocab-cased
    # NO, DO NOT BUILD VOCAB,  USE OLD VOCAB

    # get vocab object from vocab file previously written
    print(vocab_file)
    vocab = Vocab(filename=vocab_file, 
                data=[Constants.PAD_WORD, Constants.UNK_WORD,
                    Constants.BOS_WORD, Constants.EOS_WORD])
    print('==> SST vocabulary size : %d ' % vocab.size())

    # let program turn off after preprocess data
    is_preprocessing_data = False 

    # train
    train_file = os.path.join(args.data, 'sst_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SSTDataset(train_dir, vocab, args.num_classes, args.fine_grain)
        torch.save(train_dataset, train_file)
        # is_preprocessing_data = True
    logger.debug('==> Size of train data   : %d ' % len(train_dataset))
    
    # dev
    dev_file = os.path.join(args.data,'sst_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SSTDataset(dev_dir, vocab, args.num_classes, args.fine_grain)
        torch.save(dev_dataset, dev_file)
        # is_preprocessing_data = True
    logger.debug('==> Size of dev data   : %d ' % len(dev_dataset))

    # test
    test_file = os.path.join(args.data,'sst_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SSTDataset(test_dir, vocab, args.num_classes, args.fine_grain)
        torch.save(test_dataset, test_file)
        # is_preprocessing_data = True
    logger.debug('==> Size of test data    : %d ' % len(test_dataset))


    # initialize model, criterion/loss_function, optimizer
    criterion = nn.NLLLoss()
    model = SentimentTreeLSTM(
                vocab.size(),
                args.input_dim,
                args.mem_dim,
                args.num_classes,
                args.freeze_embed,
                criterion,
                device,
                args.dropout,
                args.n
            )

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.data, 'sst_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:
        # load glove embeddings and vocab
        glove_vocab, glove_emb = utils.load_word_vectors(
            os.path.join(args.glove, 'glove.840B.300d'))
        logger.debug('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
        emb = torch.zeros(vocab.size(), glove_emb.size(1), dtype=torch.float, device=device)
        emb.normal_(0, 0.05)
        # zero out the embeddings for padding and other special words if they are absent in vocab
        for idx, item in enumerate([Constants.PAD_WORD, Constants.UNK_WORD,
                                    Constants.BOS_WORD, Constants.EOS_WORD]):
            emb[idx].zero_()
        
        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(word)]
        # is_preprocessing_data = True
        torch.save(emb, emb_file)
    if is_preprocessing_data:
        print ('done preprocessing data, quit program to prevent memory leak.')
        print ('please run again.')
        quit()
    # plug these into embedding matrix inside model
    # python原地操作的后缀为 _,处理高维数据时可帮助减少内存
    model.emb.weight.data.copy_(emb)

    model.to(device), criterion.to(device)
    if args.optim == 'adam':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()), lr=args.lr, weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                         model.parameters()), lr=args.lr, weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     model.parameters()), lr=args.lr, weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    # create trainer object for training and testing
    trainer = SentimentTrainer(args, model, criterion, optimizer, device)

    best = -float('inf')
    for epoch in range(args.epochs):
        train_loss = trainer.train(train_dataset)
        train_loss, train_pred = trainer.test(train_dataset)
        dev_loss, dev_pred = trainer.test(dev_dataset)
        #test_loss, test_pred = trainer.test(test_dataset)
        
        train_acc = metrics.sentiment_accuracy_score(train_pred, train_dataset.labels)
        dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels)
        #test_acc = metrics.sentiment_accuracy_score(test_pred, test_dataset.labels)
        logger.info('==> Epoch {}, Train \tLoss: {} \tAccuracy: {}'.format(
            epoch, train_loss, train_acc))
        logger.info('==> Epoch {}, Dev \tLoss: {} \tAccuracy: {}'.format(
            epoch, dev_loss, dev_acc))
        #logger.info('==> Epoch {}, Test \tLoss: {}\tAccuracy: {}'.format(
            #epoch, test_loss, test_acc))

        if best < dev_acc:
            best = dev_acc
            checkpoint = {
                'model': trainer.model.state_dict(),
                'optim': trainer.optimizer,
                'train_acc': train_acc, 'dev_acc': dev_acc,
                'args': args, 'epoch': epoch
            }
            logger.debug('==> New optimum found, checkpointing everything now...')
            torch.save(checkpoint, '%s.pt' % os.path.join(args.save, args.expname))
Пример #22
0
        generator.cuda()
        discriminator.cuda()

    summary(generator, (opt.channels, opt.img_height, opt.img_width))
    summary(discriminator, [(opt.channels, opt.img_height, opt.img_width),
                            (opt.channels, opt.img_height, opt.img_width)])


def infer(opt):
    cuda = True if torch.cuda.is_available() else False
    FloatTensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    generator = Generator()
    generator.load_state_dict(torch.load(opt.load_model))

    if cuda:
        generator.cuda()

    sample = load_img(opt)
    sample = Variable(sample.unsqueeze(0).type(FloatTensor))
    gen_img = generator(sample)

    sample = torch.cat((sample.data, gen_img.data), -1)
    save_image(sample, "images/infer.png", nrow=1, normalize=True)


if __name__ == '__main__':
    opt = parse_args()
    infer(opt)
    # display_network(opt)
Пример #23
0
def train():
    os.makedirs("images", exist_ok=True)
    os.makedirs("checkpoints", exist_ok=True)

    cuda = True if torch.cuda.is_available() else False
    Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    # get configs and dataloader
    opt = parse_args()
    data_loader = mnist_loader(opt)

    # Initialize generator and discriminator
    generator = Generator(opt)
    discriminator = Discriminator(opt)

    if cuda:
        generator.cuda()
        discriminator.cuda()

    # Optimizers
    optimizer_G = torch.optim.RMSprop(generator.parameters(), lr=opt.lr)
    optimizer_D = torch.optim.RMSprop(discriminator.parameters(), lr=opt.lr)

    for epoch in range(opt.epochs):
        for i, (imgs, _) in enumerate(data_loader):

            # Configure input
            z = Variable(
                Tensor(np.random.normal(0, 1,
                                        (imgs.shape[0], opt.latent_dim))))
            gen_imgs = generator(z)
            real_imgs = Variable(imgs.type(Tensor))

            # ------------------
            # Train Discriminator
            # ------------------

            optimizer_D.zero_grad()
            d_loss = -torch.mean(discriminator(real_imgs)) + torch.mean(
                discriminator(gen_imgs.detach()))

            d_loss.backward()
            optimizer_D.step()

            # Clip weights of discriminator
            for p in discriminator.parameters():
                p.data.clamp_(-opt.clip_value, opt.clip_value)

            # ------------------
            # Train Generator
            # ------------------

            if i % opt.n_critic == 0:
                optimizer_G.zero_grad()
                g_loss = -torch.mean(discriminator(gen_imgs))

                g_loss.backward()
                optimizer_G.step()

            # ------------------
            # Log Information
            # ------------------

            print("[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]" %
                  (epoch, opt.epochs, i, len(data_loader), d_loss.item(),
                   g_loss.item()))

            batches_done = epoch * len(data_loader) + i
            if batches_done % opt.sample_interval == 0:
                save_image(gen_imgs.data[:25],
                           "images/%d.png" % batches_done,
                           nrow=5,
                           normalize=True)

            if batches_done % opt.checkpoint_interval == 0:
                torch.save(generator.state_dict(),
                           "checkpoints/generator_%d.pth" % epoch)
                # torch.save(discriminator.state_dict(), "checkpoints/discriminator_%d.pth" % epoch)

    torch.save(generator.state_dict(), "checkpoints/generator_done.pth")
    print("Training Process has been Done!")
Пример #24
0
def main():
    global args 
    args = parse_args()

    # global logger
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    
    formatter = logging.Formatter("[%(asctime)s] %(levelname)s:%(name)s:%(message)s")
    # file logger
    fh = logging.FileHandler(os.path.join(args.save, args.expname)+'.log', mode='w')
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    # console logger
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    ch.setFormatter(formatter)
    logger.addHandler(ch)

    if not torch.cuda.is_available() and args.cuda:
        args.cuda = False
        logger.info("CUDA is unavailable, convert to cpu mode")

    if args.sparse and args.wd != 0:
        logger.error('Sparsity and weight decay are incompatible, pick one!')
        exit()

    logger.debug(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    # set directory
    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # load vocabulary
    vocab_path = os.path.join(args.data, "vocab.npy")
    vocab = Vocab(
        filename=vocab_path, 
        labels=[constants.PAD_WORD, constants.UNK_WORD, constants.BOS_WORD, constants.EOS_WORD]
    )
    logger.debug('==> vocabulary size : %d ' % len(vocab))

    # load train dataset
    train_file = os.path.join(train_dir, "ERdata.pt")
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = ERDataset(train_dir, vocab, 2)
        torch.save(train_dataset, train_file)
    logger.debug('==> train data size: %d' % len(train_dataset))

    # load dev dataset
    dev_file = os.path.join(dev_dir, "ERdata.pt")
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = ERDataset(dev_dir, vocab, 2)
        torch.save(dev_dataset, dev_file)
    logger.debug('==> dev data size: %d' % len(dev_dataset))

    # load test dataset   
    test_file = os.path.join(test_dir, "ERdata.pt")
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = ERDataset(test_dir, vocab, 2)
        torch.save(test_dataset, test_file)
    logger.debug('==> test data size: %d' % len(test_dataset))

    # trainer: 
    # tree model
    model = TreeModel(
        len(vocab),
        args.input_dim,
        args.mem_dim,
        2,  # 0-1 prediction
        args.sparse,
        args.freeze_embed
    )

    # criterion
    criterion = nn.KLDivLoss()
    if args.cuda:
        model.cuda(), criterion.cuda()

    # optimizer
    if args.optim == 'adam':
        optimizer = optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()), 
            lr=args.lr, weight_decay=args.wd
        )
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(
            filter(lambda p: p.requires_grad, model.parameters()), 
            lr=args.lr, weight_decay=args.wd
        )
    elif args.optim == 'sgd':
        optimizer = optim.SGD(
            filter(lambda p: p.requires_grad, model.parameters()), 
            lr=args.lr, weight_decay=args.wd
        )
    else:
        raise Exception("Unknown optimizer")

    # metrics
    metrics = Metrics(2)  # 0-1 prediction

    # embeddings
    sent_emb_path = os.path.join(args.data, "sent_emb.pt")
    raw_sent_emb_path = os.path.join(args.glove, 'glove.840B.300d.txt')

    sent_emb = load_word_vectors(sent_emb_path, vocab, raw_sent_emb_path)
    
    logger.debug('==> sentence embedding size: %d * %d' % (sent_emb.size()[0], sent_emb.size()[1]))
    if args.cuda:
        sent_emb.cuda()
    model.sent_emb.weight.data.copy_(sent_emb)

    trainer = Trainer(args, model, criterion, optimizer)

    # train and test
    best = float("-inf")
    for epoch in range(args.epochs):
        train_loss = trainer.train(train_dataset)

        train_loss, train_pred = trainer.test(train_dataset)
        dev_loss, dev_pred = trainer.test(dev_dataset)
        test_loss, test_pred = trainer.test(test_dataset)

        train_pearson = metrics.pearson(train_pred, train_dataset.labels)
        train_mse = metrics.mse(train_pred, train_dataset.labels)
        logger.info('==> Epoch {}, Train \tLoss: {}\tPearson: {}\tMSE: {}'.format(epoch, train_loss, train_pearson, train_mse))
        
        dev_pearson = metrics.pearson(dev_pred, dev_dataset.labels)
        dev_mse = metrics.mse(dev_pred, dev_dataset.labels)
        logger.info('==> Epoch {}, Dev \tLoss: {}\tPearson: {}\tMSE: {}'.format(epoch, dev_loss, dev_pearson, dev_mse))

        test_pearson = metrics.pearson(test_pred, test_dataset.labels)
        test_mse = metrics.mse(test_pred, test_dataset.labels)
        logger.info('==> Epoch {}, Test \tLoss: {}\tPearson: {}\tMSE: {}'.format(epoch, test_loss, test_pearson, test_mse))

        if best < dev_pearson:
            best = dev_pearson
            checkpoint = {
                'model': trainer.model.state_dict(), 
                'optim': trainer.optimizer,
                'pearson': dev_pearson, 'mse': dev_mse,
                'args': args, 'epoch': epoch
                }
            logger.debug('==> New optimum found, checkpointing everything now...')
            torch.save(checkpoint, '%s.pt' % os.path.join(args.save, args.expname))
Пример #25
0
        '',
        'steps':
        1 + info.splits['train_examples'] // params.training.batch_size
    })
    model.callbacks.append(progress)

    params.logdir = os.path.join(params.logdir, 'multi_mnist')
    print('config:', params)
    model_dir = os.path.join(params.logdir, model.name)

    ckpt = tf.train.Checkpoint(optimizer=model.optimizer, net=model)
    manager = tf.train.CheckpointManager(ckpt, model_dir, max_to_keep=3)
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print("Restored from {}".format(manager.latest_checkpoint))
        init_epoch = params.training.batch_size * model.optimizer.iterations.numpy(
        ) // info.splits['train_examples']
    else:
        print("Initializing from scratch.")
        init_epoch = 0

    if args.train:
        train(model, model_log, manager, init_epoch, train_set, test_set)
    else:
        evaluate(model, model_log, test_set)


if __name__ == "__main__":
    args, params = parse_args()
    main(args, params)
Пример #26
0
def prepare_to_train(data=None, glove=None):
    args = parse_args()
    if data is not None:
        args.data = data
    if glove is not None:
        args.glove = glove

    args.input_dim, args.mem_dim = 300, 150
    args.hidden_dim, args.num_classes = 50, 5
    args.cuda = args.cuda and torch.cuda.is_available()
    if args.sparse and args.wd != 0:
        print('Sparsity and weight decay are incompatible, pick one!')
        exit()
    print(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    numpy.random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    sick_vocab_file = os.path.join(args.data, 'sick.vocab')
    if not os.path.isfile(sick_vocab_file):
        token_files_a = [
            os.path.join(split, 'a.toks')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files_b = [
            os.path.join(split, 'b.toks')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files = token_files_a + token_files_b
        sick_vocab_file = os.path.join(args.data, 'sick.vocab')
        build_vocab(token_files, sick_vocab_file)

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=sick_vocab_file,
                  data=[
                      Constants.PAD_WORD, Constants.UNK_WORD,
                      Constants.BOS_WORD, Constants.EOS_WORD
                  ])
    print('==> SICK vocabulary size : %d ' % vocab.size())

    # load SICK dataset splits
    train_file = os.path.join(args.data, 'sick_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SICKDataset(train_dir, vocab, args.num_classes)
        torch.save(train_dataset, train_file)
    print('==> Size of train data   : %d ' % len(train_dataset))
    dev_file = os.path.join(args.data, 'sick_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SICKDataset(dev_dir, vocab, args.num_classes)
        torch.save(dev_dataset, dev_file)
    print('==> Size of dev data     : %d ' % len(dev_dataset))
    test_file = os.path.join(args.data, 'sick_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SICKDataset(test_dir, vocab, args.num_classes)
        torch.save(test_dataset, test_file)
    print('==> Size of test data    : %d ' % len(test_dataset))

    # initialize model, criterion/loss_function, optimizer
    model = SimilarityTreeLSTM(args.cuda, vocab.size(), args.input_dim,
                               args.mem_dim, args.hidden_dim, args.num_classes,
                               args.sparse)
    criterion = nn.KLDivLoss()
    if args.cuda:
        model.cuda(), criterion.cuda()
    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.data, 'sick_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:
        # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(
            os.path.join(args.glove, 'glove.840B.300d'))
        print('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
        emb = torch.Tensor(vocab.size(),
                           glove_emb.size(1)).normal_(-0.05, 0.05)
        # zero out the embeddings for padding and other special words if they are absent in vocab
        for idx, item in enumerate([
                Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD,
                Constants.EOS_WORD
        ]):
            emb[idx].zero_()
        for word in vocab.labelToIdx.keys():
            if glove_vocab.get_index(word):
                emb[vocab.get_index(word)] = glove_emb[glove_vocab.get_index(
                    word)]
        torch.save(emb, emb_file)
    # plug these into embedding matrix inside model
    if args.cuda:
        emb = emb.cuda()
    model.childsumtreelstm.emb.state_dict()['weight'].copy_(emb)

    # create trainer object for training and testing
    #trainer = Trainer(args, model, criterion, optimizer)

    best = -float('inf')

    return (args, best, train_dataset, dev_dataset, test_dataset, metrics,
            optimizer, criterion, model)
Пример #27
0
def main():
    global args
    args = parse_args()
    # global logger
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        "[%(asctime)s] %(levelname)s:%(name)s:%(message)s")
    # file logger
    fh = logging.FileHandler(os.path.join(args.save, args.expname) + '.log',
                             mode='w')
    fh.setLevel(logging.INFO)
    fh.setFormatter(formatter)
    logger.addHandler(fh)
    # console logger
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    ch.setFormatter(formatter)
    logger.addHandler(ch)
    # argument validation
    args.cuda = args.cuda and torch.cuda.is_available()
    device = torch.device("cuda:0" if args.cuda else "cpu")
    if args.sparse and args.wd != 0:
        logger.error('Sparsity and weight decay are incompatible, pick one!')
        exit()
    logger.debug(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    sick_vocab_file = os.path.join(args.data, 'sick.vocab')
    if not os.path.isfile(sick_vocab_file):
        token_files_b = [
            os.path.join(split, 'b.toks')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files_a = [
            os.path.join(split, 'a.toks')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files = token_files_a + token_files_b
        sick_vocab_file = os.path.join(args.data, 'sick.vocab')
        utils.build_vocab(token_files, sick_vocab_file)

    # get vocab object from vocab file previously written, create dictionary from list of unique words
    vocab = Vocab(filename=sick_vocab_file,
                  data=[
                      Constants.PAD_WORD, Constants.UNK_WORD,
                      Constants.BOS_WORD, Constants.EOS_WORD
                  ])
    logger.debug('==> SICK vocabulary size : %d ' % vocab.size())

    # load SICK dataset splits
    train_file = os.path.join(args.data, 'sick_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SICKDataset(train_dir, vocab, args.num_classes)
        torch.save(train_dataset, train_file)
    logger.debug('==> Size of train data   : %d ' % len(train_dataset))
    dev_file = os.path.join(args.data, 'sick_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SICKDataset(dev_dir, vocab, args.num_classes)
        torch.save(dev_dataset, dev_file)
    logger.debug('==> Size of dev data     : %d ' % len(dev_dataset))
    test_file = os.path.join(args.data, 'sick_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SICKDataset(test_dir, vocab, args.num_classes)
        torch.save(test_dataset, test_file)
    logger.debug('==> Size of test data    : %d ' % len(test_dataset))

    # initialize model, criterion/loss_function, optimizer
    model = SimilarityTreeLSTM(vocab.size(), args.input_dim, args.mem_dim,
                               args.hidden_dim, args.num_classes, args.sparse,
                               args.freeze_embed)
    criterion = nn.KLDivLoss()

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.data, 'sick_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:
        # load glove embeddings and vocab
        glove_vocab, glove_emb = utils.load_word_vectors(
            os.path.join(args.glove, 'glove.840B.300d'))
        logger.debug('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
        emb = torch.zeros(vocab.size(),
                          glove_emb.size(1),
                          dtype=torch.float,
                          device=device)
        emb.normal_(0, 0.05)
        # zero out the embeddings for padding and other special words if they are absent in vocab
        for idx, item in enumerate([
                Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD,
                Constants.EOS_WORD
        ]):
            emb[idx].zero_()
        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(
                    word)]
        torch.save(emb, emb_file)
    # plug these into embedding matrix inside model
    model.emb.weight.data.copy_(emb)

    model.to(device), criterion.to(device)
    if args.optim == 'adam':
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(filter(lambda p: p.requires_grad,
                                         model.parameters()),
                                  lr=args.lr,
                                  weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     model.parameters()),
                              lr=args.lr,
                              weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    # create trainer object for training and testing
    trainer = Trainer(args, model, criterion, optimizer, device)

    best = -float('inf')
    for epoch in range(args.epochs):
        train_loss = trainer.train(train_dataset)
        train_loss, train_pred = trainer.test(train_dataset)
        dev_loss, dev_pred = trainer.test(dev_dataset)
        test_loss, test_pred = trainer.test(test_dataset)

        train_pearson = metrics.pearson(train_pred, train_dataset.labels)
        train_mse = metrics.mse(train_pred, train_dataset.labels)
        logger.info(
            '==> Epoch {}, Train \tLoss: {}\tPearson: {}\tMSE: {}'.format(
                epoch, train_loss, train_pearson, train_mse))
        dev_pearson = metrics.pearson(dev_pred, dev_dataset.labels)
        dev_mse = metrics.mse(dev_pred, dev_dataset.labels)
        logger.info(
            '==> Epoch {}, Dev \tLoss: {}\tPearson: {}\tMSE: {}'.format(
                epoch, dev_loss, dev_pearson, dev_mse))
        test_pearson = metrics.pearson(test_pred, test_dataset.labels)
        test_mse = metrics.mse(test_pred, test_dataset.labels)
        logger.info(
            '==> Epoch {}, Test \tLoss: {}\tPearson: {}\tMSE: {}'.format(
                epoch, test_loss, test_pearson, test_mse))

        if best < test_pearson:
            best = test_pearson
            checkpoint = {
                'model': trainer.model.state_dict(),
                'optim': trainer.optimizer,
                'pearson': test_pearson,
                'mse': test_mse,
                'args': args,
                'epoch': epoch
            }
            logger.debug(
                '==> New optimum found, checkpointing everything now...')
            torch.save(checkpoint,
                       '%s.pt' % os.path.join(args.save, args.expname))
Пример #28
0
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            test_loss, test_pred, _ = trainer.test(test_dataset)

            train_acc = metrics.sentiment_accuracy_score(
                train_pred, train_dataset.labels)
            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Train loss   : %f \t' % train_loss, end="")
            print('Epoch ', epoch, 'train percentage ', train_acc)
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            print('Epoch ', epoch, 'test percentage ', test_acc)


if __name__ == "__main__":
    args = parse_args(type=1)
    # log to console and file
    logger1 = log_util.create_logger(os.path.join('logs', args.name),
                                     print_console=True)
    logger1.info("LOG_FILE")  # log using loggerba
    # attach log to stdout (print function)
    s1 = log_util.StreamToLogger(logger1)
    sys.stdout = s1
    print(
        '_________________________________start___________________________________'
    )
    main()
    log_link = log_util.up_gist(os.path.join('logs', args.name + '.log'),
                                args.name, __file__)
    print(log_link)
Пример #29
0
import sys
import traceback
import random
import config
import utils
from model import Model
from dataloader import Dataloader
from checkpoints import Checkpoints
import torch

from itertools import combinations

import numpy as np
import pdb

args, config_file = config.parse_args()
# Data Loading
if args.train == 'face_cls':
    from test_cls import Tester
    from train_cls import Trainer

if args.train == 'face_margin':
    from test_margin import Tester
    from train_margin import Trainer

if args.dataset_train == 'ClassSamplesDataLoader':
    from train_classload import Trainer


def main():
    # parse the arguments
Пример #30
0
def main():
    global args
    args = parse_args(type=1)
    print(args.name)
    print(args.model_name)

    args.input_dim = 300

    if args.mem_dim == 0:
        if args.model_name == 'dependency':
            args.mem_dim = 168
        elif args.model_name == 'constituency':
            args.mem_dim = 150
        elif args.model_name == 'lstm':
            args.mem_dim = 168
        elif args.model_name == 'bilstm':
            args.mem_dim = 168

    if args.num_classes == 0:
        if args.fine_grain:
            args.num_classes = 5  # 0 1 2 3 4
        else:
            args.num_classes = 3  # 0 1 2 (1 neutral)
    elif args.num_classes == 2:
        # assert False # this will not work
        assert not args.fine_grain

    args.cuda = args.cuda and torch.cuda.is_available()
    # args.cuda = False
    print(args)
    # torch.manual_seed(args.seed)
    # if args.cuda:
    # torch.cuda.manual_seed(args.seed)

    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    token_files = [
        os.path.join(split, 'sents.toks')
        for split in [train_dir, dev_dir, test_dir]
    ]
    vocab_file = os.path.join(args.data, 'vocab-cased.txt')  # use vocab-cased
    # build_vocab(token_files, vocab_file) NO, DO NOT BUILD VOCAB,  USE OLD VOCAB

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=vocab_file)
    print('==> SST vocabulary size : %d ' % vocab.size())

    # Load SST dataset splits

    is_preprocessing_data = False  # let program turn off after preprocess data

    # train
    train_file = os.path.join(args.data, 'sst_train.pth')
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = SSTDataset(train_dir, vocab, args.num_classes,
                                   args.fine_grain, args.model_name)
        torch.save(train_dataset, train_file)
        is_preprocessing_data = True

    # dev
    dev_file = os.path.join(args.data, 'sst_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = SSTDataset(dev_dir, vocab, args.num_classes,
                                 args.fine_grain, args.model_name)
        torch.save(dev_dataset, dev_file)
        is_preprocessing_data = True

    # test
    test_file = os.path.join(args.data, 'sst_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = SSTDataset(test_dir, vocab, args.num_classes,
                                  args.fine_grain, args.model_name)
        torch.save(test_dataset, test_file)
        is_preprocessing_data = True

    criterion = nn.NLLLoss()
    # initialize model, criterion/loss_function, optimizer

    model = DMNWraper(args.cuda, args.input_dim, args.mem_dim, criterion,
                      args.train_subtrees, args.num_classes, args.embdrop)

    embedding_model = nn.Embedding(vocab.size(), args.input_dim)

    if args.cuda:
        embedding_model = embedding_model.cuda()

    if args.cuda:
        model.cuda(), criterion.cuda()

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    if args.embedding == 'glove':
        emb_torch = 'sst_embed.pth'
        emb_vector = 'glove.840B.300d'
        emb_vector_path = os.path.join(args.glove, emb_vector)
        assert os.path.isfile(emb_vector_path + '.txt')
    elif args.embedding == 'paragram':
        emb_torch = 'sst_embed_paragram.pth'
        emb_vector = 'paragram_300_sl999'
        emb_vector_path = os.path.join(args.paragram, emb_vector)
        assert os.path.isfile(emb_vector_path + '.txt')
    elif args.embedding == 'paragram_xxl':
        emb_torch = 'sst_embed_paragram_xxl.pth'
        emb_vector = 'paragram-phrase-XXL'
        emb_vector_path = os.path.join(args.paragram, emb_vector)
        assert os.path.isfile(emb_vector_path + '.txt')
    else:
        assert False

    emb_file = os.path.join(args.data, emb_torch)
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:

        # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(emb_vector_path)
        print('==> Embedding vocabulary size: %d ' % glove_vocab.size())

        emb = torch.zeros(vocab.size(), glove_emb.size(1))

        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(
                    word)]
            else:
                emb[vocab.getIndex(word)] = torch.Tensor(
                    emb[vocab.getIndex(word)].size()).normal_(-0.05, 0.05)
        torch.save(emb, emb_file)
        is_preprocessing_data = True  # flag to quit
        print('done creating emb, quit')

    if is_preprocessing_data:
        print('quit program')
        quit()

    # plug these into embedding matrix inside model
    if args.cuda:
        emb = emb.cuda()
    embedding_model.state_dict()['weight'].copy_(emb)

    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        # optimizer   = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr, weight_decay=args.wd)
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.wd)
    elif args.optim == 'adam_combine':
        optimizer = optim.Adam([{
            'params': model.parameters(),
            'lr': args.lr,
            'weight_decay': args.wd
        }, {
            'params': embedding_model.parameters(),
            'lr': args.emblr,
            'weight_decay': args.embwd
        }])
        args.manually_emb = 0
    elif args.optim == 'adagrad_combine':
        optimizer = optim.Adagrad([{
            'params': model.parameters(),
            'lr': args.lr,
            'weight_decay': args.wd
        }, {
            'params': embedding_model.parameters(),
            'lr': args.emblr,
            'weight_decay': args.embwd
        }])
        args.manually_emb = 0
    elif args.optim == 'adam_combine_v2':
        model.embedding_model = embedding_model
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
        args.manually_emb = 0
    metrics = Metrics(args.num_classes)
    utils.count_param(model)

    trainer = SentimentTrainer(args, model, embedding_model, criterion,
                               optimizer)

    trainer.set_initial_emb(emb)
    question_idx = vocab.labelToIdx['sentiment']
    question_idx = torch.Tensor([question_idx])
    trainer.set_question(question_idx)

    # trainer = SentimentTrainer(args, model, embedding_model ,criterion, optimizer)

    mode = args.mode
    if mode == 'DEBUG':
        for epoch in range(args.epochs):
            # print a tree
            tree, sent, label = dev_dataset[3]
            utils.print_span(tree, sent, vocab)
            quit()

            dev_loss = trainer.train(dev_dataset)
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            test_loss, test_pred, _ = trainer.test(test_dataset)

            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Dev loss   : %f \t' % dev_loss, end="")
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
    elif mode == "PRINT_TREE":
        for i in range(0, 10):
            ttree, tsent, tlabel = dev_dataset[i]
            utils.print_tree(ttree, 0)
            print('_______________')
        print('break')
        quit()
    elif mode == 'EVALUATE':
        filename = args.name + '.pth'
        epoch = args.epochs
        model_name = str(epoch) + '_model_' + filename
        embedding_name = str(epoch) + '_embedding_' + filename
        model = torch.load(os.path.join(args.saved, model_name))
        embedding_model = torch.load(os.path.join(args.saved, embedding_name))

        trainer = SentimentTrainer(args, model, embedding_model, criterion,
                                   optimizer)
        trainer.set_question(question_idx)
        test_loss, test_pred, subtree_metrics = trainer.test(test_dataset)
        test_acc = metrics.sentiment_accuracy_score(
            test_pred, test_dataset.labels, num_classes=args.num_classes)
        print('Epoch with max dev:' + str(epoch) + ' |test percentage ' +
              str(test_acc))
        print('____________________' + str(args.name) + '___________________')
        print_list = subtree_metrics.print_list
        torch.save(print_list,
                   os.path.join(args.saved, args.name + 'printlist.pth'))
        utils.print_trees_file(args,
                               vocab,
                               test_dataset,
                               print_list,
                               name='tree')
    elif mode == "EXPERIMENT":
        # dev_loss, dev_pred = trainer.test(dev_dataset)
        # dev_acc = metrics.sentiment_accuracy_score(dev_pred, dev_dataset.labels, num_classes=args.num_classes)
        max_dev = 0
        max_dev_epoch = 0
        filename = args.name + '.pth'
        for epoch in range(args.epochs):
            # train_loss, train_pred, _ = trainer.test(train_dataset)
            train_loss_while_training = trainer.train(train_dataset)
            train_loss, train_pred, _ = trainer.test(train_dataset)
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            dev_acc = metrics.sentiment_accuracy_score(
                dev_pred, dev_dataset.labels, num_classes=args.num_classes)
            train_acc = metrics.sentiment_accuracy_score(
                train_pred, train_dataset.labels, num_classes=args.num_classes)
            print('==> Train loss   : %f \t' % train_loss_while_training,
                  end="")
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            print('Epoch %d dev percentage %f ' % (epoch, dev_acc))
            print('Train acc %f ' % (train_acc))
            if dev_acc > max_dev:
                print('update best dev acc %f ' % (dev_acc))
                max_dev = dev_acc
                max_dev_epoch = epoch
                utils.mkdir_p(args.saved)
                torch.save(
                    model,
                    os.path.join(args.saved,
                                 str(epoch) + '_model_' + filename))
                torch.save(
                    embedding_model,
                    os.path.join(args.saved,
                                 str(epoch) + '_embedding_' + filename))
            gc.collect()
        print('epoch ' + str(max_dev_epoch) + ' dev score of ' + str(max_dev))
        print('eva on test set ')
        model = torch.load(
            os.path.join(args.saved,
                         str(max_dev_epoch) + '_model_' + filename))
        embedding_model = torch.load(
            os.path.join(args.saved,
                         str(max_dev_epoch) + '_embedding_' + filename))
        trainer = SentimentTrainer(args, model, embedding_model, criterion,
                                   optimizer)
        trainer.set_question(question_idx)
        test_loss, test_pred, _ = trainer.test(test_dataset)
        test_acc = metrics.sentiment_accuracy_score(
            test_pred, test_dataset.labels, num_classes=args.num_classes)
        print('Epoch with max dev:' + str(max_dev_epoch) +
              ' |test percentage ' + str(test_acc))
        print('____________________' + str(args.name) + '___________________')
    else:
        for epoch in range(args.epochs):
            train_loss = trainer.train(train_dataset)
            train_loss, train_pred, _ = trainer.test(train_dataset)
            dev_loss, dev_pred, _ = trainer.test(dev_dataset)
            test_loss, test_pred, _ = trainer.test(test_dataset)

            train_acc = metrics.sentiment_accuracy_score(
                train_pred, train_dataset.labels)
            dev_acc = metrics.sentiment_accuracy_score(dev_pred,
                                                       dev_dataset.labels)
            test_acc = metrics.sentiment_accuracy_score(
                test_pred, test_dataset.labels)
            print('==> Train loss   : %f \t' % train_loss, end="")
            print('Epoch ', epoch, 'train percentage ', train_acc)
            print('Epoch ', epoch, 'dev percentage ', dev_acc)
            print('Epoch ', epoch, 'test percentage ', test_acc)
Пример #31
0
def pytest_funcarg__test_conf(request):
    args = "--mode train".split(" ")
    return config.parse_args(raw_args = args)
Пример #32
0
def main():
    global args
    args = parse_args()
    args.input_dim, args.mem_dim = 200, 150
    args.hidden_dim, args.num_classes = 50, 2
    args.cuda = args.cuda and torch.cuda.is_available()
    if args.sparse and args.wd != 0:
        print('Sparsity and weight decay are incompatible!')
        exit()
    print(args)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    numpy.random.seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.benchmark = True
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    word_vectors_path = os.path.join(
        args.glove, '/data1/qspace/yananlu/embedding/huge.readable')
    train_dir = os.path.join(args.data, 'train/')
    dev_dir = os.path.join(args.data, 'dev/')
    test_dir = os.path.join(args.data, 'test/')

    # write unique words from all token files
    xianer_vocab_file = os.path.join(args.data, 'xianer.vocab')
    if not os.path.isfile(xianer_vocab_file):
        token_files_a = [
            os.path.join(split, 'a.toks')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files_b = [
            os.path.join(split, 'b.toks')
            for split in [train_dir, dev_dir, test_dir]
        ]
        token_files = token_files_a + token_files_b
        xianer_vocab_file = os.path.join(args.data, 'xianer.vocab')
        build_vocab(token_files, xianer_vocab_file)

    # get vocab object from vocab file previously written
    vocab = Vocab(filename=xianer_vocab_file,
                  data=[
                      config.PAD_WORD, config.UNK_WORD, config.BOS_WORD,
                      config.EOS_WORD
                  ])
    print('==> Xianer vocabulary size : %d ' % vocab.size())

    # load Xianer dataset splits
    train_file = os.path.join(args.data, 'xianer_train.pth')  # quora_train.pth
    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
    else:
        train_dataset = QuoraDataset(train_dir, vocab, args.num_classes)
        torch.save(train_dataset, train_file)
    print('==> Size of train data   : %d ' % len(train_dataset))

    dev_file = os.path.join(args.data, 'xianer_dev.pth')
    if os.path.isfile(dev_file):
        dev_dataset = torch.load(dev_file)
    else:
        dev_dataset = QuoraDataset(dev_dir, vocab, args.num_classes)
        torch.save(dev_dataset, dev_file)
    print('==> Size of dev data     : %d ' % len(dev_dataset))

    test_file = os.path.join(args.data, 'xianer_test.pth')
    if os.path.isfile(test_file):
        test_dataset = torch.load(test_file)
    else:
        test_dataset = QuoraDataset(test_dir, vocab, args.num_classes)
        torch.save(test_dataset, test_file)
    print('==> Size of test data    : %d ' % len(test_dataset))

    # initialize model, criterion/loss_function, optimizer
    model = SimilarityTreeLSTM(
        args.cuda,
        vocab,  # vocab.size()
        args.input_dim,
        args.mem_dim,
        args.hidden_dim,
        args.num_classes,
        args.sparse)
    # criterion = nn.KLDivLoss()
    criterion = nn.CrossEntropyLoss()  # nn.MSELoss()
    if args.cuda:
        model.cuda(), criterion.cuda()
    if args.optim == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.lr,
                               weight_decay=args.wd)
    elif args.optim == 'adagrad':
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=args.lr,
                                  weight_decay=args.wd)
    elif args.optim == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.wd)
    metrics = Metrics(args.num_classes)

    # for words common to dataset vocab and GLOVE, use GLOVE vectors
    # for other words in dataset vocab, use random normal vectors
    emb_file = os.path.join(args.data, 'xianer_embed.pth')
    if os.path.isfile(emb_file):
        emb = torch.load(emb_file)
    else:  # load glove embeddings and vocab
        glove_vocab, glove_emb = load_word_vectors(word_vectors_path)
        print('==> GLOVE vocabulary size: %d ' % glove_vocab.size())
        emb = torch.Tensor(vocab.size(),
                           glove_emb.size(1)).normal_(-0.05, 0.05)
        # zero out the embeddings for padding and other special words if they are absent in vocab
        for idx, item in enumerate([
                config.PAD_WORD, config.UNK_WORD, config.BOS_WORD,
                config.EOS_WORD
        ]):
            emb[idx].zero_()
        for word in vocab.labelToIdx.keys():
            if glove_vocab.getIndex(word):
                emb[vocab.getIndex(word)] = glove_emb[glove_vocab.getIndex(
                    word)]
        torch.save(emb, emb_file)
    # plug these into embedding matrix inside model
    if args.cuda:
        emb = emb.cuda()
    model.childsumtreelstm.emb.state_dict()['weight'].copy_(emb)

    # create trainer object for training and testing
    trainer = Trainer(args, model, criterion, optimizer)
    best = -float('inf')
    for epoch in range(args.epochs):
        _ = trainer.train(train_dataset)

        train_loss, train_pred, train_score = trainer.test(train_dataset,
                                                           plot_flag=False)
        train_pearson = metrics.pearson(train_pred, train_dataset.labels)
        train_mse = metrics.mse(train_pred, train_dataset.labels)
        train_accu = metrics.accuracy(train_pred, train_dataset.labels)
        train_f1 = metrics.f1(train_pred, train_dataset.labels)  # GEOFF
        print('==> Train Loss: {}\tPearson: {}\tMSE: {}\tAccu: {}\tF1: {}'.
              format(train_loss, train_pearson, train_mse, train_accu,
                     train_f1))

        dev_loss, dev_pred, dev_score = trainer.test(dev_dataset,
                                                     plot_flag=False)
        dev_pearson = metrics.pearson(dev_pred, dev_dataset.labels)
        dev_mse = metrics.mse(dev_pred, dev_dataset.labels)
        dev_accu = metrics.accuracy(dev_pred, dev_dataset.labels)
        dev_f1 = metrics.f1(dev_pred, dev_dataset.labels)
        print(
            '==> Dev Loss: {}\tPearson: {}\tMSE: {}\tAccu: {}\tF1: {}'.format(
                dev_loss, dev_pearson, dev_mse, dev_accu, dev_f1))

        test_loss, test_pred, test_score = trainer.test(test_dataset,
                                                        plot_flag=False)
        test_pearson = metrics.pearson(test_pred, test_dataset.labels)
        test_mse = metrics.mse(test_pred, test_dataset.labels)
        test_accu = metrics.accuracy(test_pred, test_dataset.labels)
        test_f1 = metrics.f1(test_pred, test_dataset.labels)
        print(
            '==> Test Loss: {}\tPearson: {}\tMSE: {}\tAccu: {}\tF1: {}'.format(
                test_loss, test_pearson, test_mse, test_accu, test_f1))

        if best < dev_f1:  # xianer use dev
            best = dev_f1
            checkpoint = {
                'model': trainer.model.state_dict(),
                'optim': trainer.optimizer,
                'pearson': dev_pearson,
                'mse': dev_mse,
                'f1': dev_f1,
                'accuracy': dev_accu,
                'args': args,
                'epoch': epoch
            }
            print('==> New optimum found.')
            torch.save(
                checkpoint,
                '%s.pt' % os.path.join(args.save, args.expname + '.pth'))
            remove_dirs()  # clear attention dir
            trainer.test(dev_dataset, plot_flag=True)  # plot attention
            numpy.savetxt(os.path.join(args.data, 'dev/dev.predict'),
                          dev_score.cpu().numpy())  # save predict result
            numpy.savetxt(os.path.join(args.data, 'train/train.predict'),
                          train_score.cpu().numpy())
            numpy.savetxt(os.path.join(args.data, 'test/test.predict'),
                          test_score.cpu().numpy())
Пример #33
0
def parse_conf(model, test_corp):
    args = "--mode train --embed 50 --hidden 30 --minbatch 15".split(" ")
    args += ["--model", model]
    conf = config.parse_args(raw_args = args)
    conf.corpus = test_corp
    return conf
Пример #34
0
        # 00471 20160317 002
        #
        # 01332 20150102 101 1:10
        # 01332 20150105 002 1 - 1
        # 01332 20150520 002 4 - 1
        #               16.75
        request_5519.start_time = 0
        request_5519.req_num = 1
        request_5519.restore_type = message.RESTORE_RIGHT_AFTER
        m = self.client.send_and_receive(request_5519)
        self.assertEqual(len(m.data), 1)

    def tearDown(self):
        self.client.close()

def runtest(ns):
    if ns.message.keys() == all_messages.keys():
        suite = unittest.TestLoader().loadTestsFromTestCase(TestMessage)
    else:
        suite = unittest.TestSuite(map(TestMessage,
                      ('test_message_{}'.format(x) for x in ns.message.keys())))

    unittest.TextTestRunner(verbosity=ns.verbosity).run(suite)

if __name__ == '__main__':
    import sys

    ns = config.parse_args(sys.argv[1:], receive_push=False)

    runtest(ns)
Пример #35
0
def pytest_funcarg__test_conf_v2(request):
    args = "--mode train --embed 50 --hidden 30 --minbatch 2 --model v2 --epoch 2 --minor_word 0".split(" ")
    conf = config.parse_args(raw_args = args)
    conf.corpus = corpus.open(test_file)
    return conf
#!/usr/bin/env python
import os
import logging

import config


def convert_plist_file():
    plist_file_path = os.path.realpath(args.plist_file_path)
    logger.info(plist_file_path)
    logger.debug(config.OUTPUT_DIRECTORY)


if __name__ == '__main__':
    args = config.parse_args()

    logging.basicConfig(filename=config.LOGGING_FILE, level=config.LOGGING_LEVEL)
    logger = logging.getLogger(__name__)

    convert_plist_file()
Пример #37
0
def main():
    
    

    args=parse_args()
    print(args)


    num_classes = 7
    
    data_dir = args.data_dir #,'train_texts.blk')
    train_file=os.path.join(data_dir,'train_data.pth')
    
    #val_dir = args.val_data #'val_texts.blk')
    val_file= os.path.join(data_dir,'val_data.pth')

    
    vocab_file="../data/vocab.txt"
    vocab = Vocab(filename=vocab_file)
    

    if os.path.isfile(train_file):
        train_dataset = torch.load(train_file)
        
    else:
        train_dataset = WebKbbDataset(vocab, num_classes,os.path.join(data_dir,'train_texts.blk'),os.path.join(data_dir,'train_labels.blk'))

        torch.save(train_dataset, train_file)
    
    if os.path.isfile(val_file):
        val_dataset = torch.load(val_file)
        
    else:
        val_dataset = WebKbbDataset(vocab, num_classes,os.path.join(data_dir,'val_texts.blk'),os.path.join(data_dir,'val_labels.blk'))
        torch.save(val_dataset, val_file)
    


    
    
    

    vocab_size=vocab.size()
    in_dim=200
    mem_dim=200
    hidden_dim=200
    num_classes=7
    sparsity=True
    freeze=args.freeze_emb
    epochs=args.epochs
    lr=args.lr
    pretrain=args.pretrain
    
    
    
    cuda_flag=True

    if not torch.cuda.is_available():
        cuda_flag=False
                        
    model = DomTreeLSTM(vocab_size,in_dim, mem_dim, hidden_dim, num_classes, sparsity, freeze)
    criterion = nn.CrossEntropyLoss()

    if pretrain:
        
        emb_file = os.path.join('../data', 'emb.pth')
        if os.path.isfile(emb_file):
            emb = torch.load(emb_file)
            print(emb.size())
            print("Embedding weights loaded")
        else:
            print("Embedding file not found")
        
        model.emb.weight.data.copy_(emb)

    optimizer = optimizer = optim.Adagrad(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
    
    trainer = Trainer(model, criterion, optimizer,train_dataset,val_dataset,cuda_flag=cuda_flag)
    
    for epoch in range(epochs):

        trainer.train()
        
        trainer.test()
Пример #38
0
                                               arg.augm_scal)
                coord, vector = make_input_tps_param(tps_param_dic)
                coord, vector = coord.to(device), vector.to(device)
                image_spatial_t, _ = ThinPlateSpline(original, coord, vector,
                                                     original.shape[3], device)
                image_appearance_t = K.ColorJitter(arg.brightness,
                                                   arg.contrast,
                                                   arg.saturation,
                                                   arg.hue)(original)
                image, reconstruction, mu, shape_stream_parts, heat_map = model(
                    original, image_spatial_t, image_appearance_t, coord,
                    vector)


if __name__ == '__main__':
    arg = DotMap(vars(parse_args()))
    main(arg)


class LayerNorm(nn.Module):
    def __init__(self, num_features, eps=1e-5, affine=True):
        super(LayerNorm, self).__init__()
        self.num_features = num_features
        self.affine = affine
        self.eps = eps

        if self.affine:
            self.gamma = nn.Parameter(torch.Tensor(num_features).uniform_())
            self.beta = nn.Parameter(torch.zeros(num_features))

    def forward(self, x):
Пример #39
0
        counts = np.array([ np.float32(v) for v in row.split() ])
    return counts

def log_softmax(output):
    if output.owner.op == T.nnet.softmax_op:
        x = output.owner.inputs[0]
        k = T.max(x,axis=1,keepdims=True)
        sum_x = T.log(T.sum(T.exp(x - k),axis=1,keepdims=True)) + k
        print >> sys.stderr, "Stable log softmax"
        return x - sum_x
    else:
        return T.log(softmax)


if __name__ == "__main__":
    config.parse_args()
    
    X = T.matrix('X')
    P = Parameters()
    predict = model.build(P)
    _,outputs = predict(X)
    counts = load_counts()
    predict = theano.function(
            inputs = [X],
            outputs = log_softmax(outputs) - T.log(counts/T.sum(counts))
        )
    

    if predict != None:
        stream = data_io.context(ark_io.parse_binary(sys.stdin),left=5,right=5)
        for name,frames in stream:
Пример #40
0
import os

from dataloder import Dataset
from solver import Solver
from config import parse_args, read_conf_file

args_cmd = parse_args()
args_yml = read_conf_file(args_cmd.config)

data = Dataset(args_yml)

if __name__ == '__main__':
    module = args_cmd.module

    os.environ['CUDA_VISIBLE_DEVICES'] = args_cmd.GPU

    if module == 'test_dataset':
        data.test_dataset()
    elif module == 'create_dataset':
        data.create_dataset()
    elif module == 'train':
        solver = Solver(args_yml)
        solver.train()
    elif module == 'test_dir':
        solver = Solver(args_cmd)
        solver.test_dir()

    for i in range(len(devList)):
        strLabel = devList[i]
        fpTuple = getfpTuple(strLabel, x_day_dir)
        label2Data[i] = fpTuple

        oneData, oneLabel = getOneDevData(fpTuple, i, params)
        allData.extend(oneData)
        allLabel.extend(oneLabel)

    splitRatio = params['splitRatio']
    trainData, trainLabels, valData, valLabels, testData, testLabels = splitData(opts, splitRatio, allData, allLabel)
    return trainData, trainLabels, valData, valLabels, testData, testLabels


def test_read_one_data(opts):
    allDataSize = 1000
    splitRatio = {'train': 0.7, 'val': 0.2, 'test': 0.1}
    train_ind, val_ind, test_ind = getSplitIndex(allDataSize, splitRatio)

    # Load a dataset
    x_day_dir = opts.input
    trainData, trainLabels, valData, valLabels, testData, testLabels = getData(opts, x_day_dir)
    print(trainData.shape, valData.shape, testData.shape)
    print(trainLabels.shape)


if __name__ == "__main__":
    opts = config.parse_args(sys.argv)
    test_read_one_data(opts)
    print('all test passed!')
Пример #42
0
"""

from config import parse_args
from iterator import DiscoveryIterator, SmartIterator
from keras.optimizers import RMSprop
from models import ReferringRelationshipsModel
from utils.eval_utils import format_results_eval
from utils.visualization_utils import objdict
from utils.eval_utils import get_metrics
from utils.train_utils import get_loss_func
import json
import os

if __name__ == '__main__':
    # Parse command line arguments.
    args = parse_args(evaluation=True)
    models_dir = os.path.dirname(args.model_checkpoint)
    params = objdict(
        json.load(open(os.path.join(models_dir, "args.json"), "r")))
    params.batch_size = args.batch_size
    params.discovery = args.discovery
    params.shuffle = False

    # If the dataset does exists, alert the user.
    if not os.path.isdir(args.data_dir):
        raise ValueError('The directory %s doesn\'t exist. '
                         'Exiting evaluation!' % args.data_dir)

    # Make sure the dataset and images exist.
    for hdf5_file in [
            os.path.join(args.data_dir, 'images.hdf5'),