Пример #1
0
    def __init__(self, input_size, kernel_size, stride):
        self.input_size = input_size
        self.kernel_size = kernel_size
        self.stride = stride
        self.indices = None
        self.X = None

        self.out_h = get_dim(input_size[1], kernel_size[0], 0, stride)
        self.out_w = get_dim(input_size[2], kernel_size[1], 0, stride)
Пример #2
0
    if args.train_file is None:
        raise ValueError('train_file is not specified.')

    if args.dev_file is None:
        raise ValueError('dev_file is not specified.')

    if args.rnn_type == 'lstm':
        args.rnn_layer = lasagne.layers.LSTMLayer
    elif args.rnn_type == 'gru':
        args.rnn_layer = lasagne.layers.GRULayer
    else:
        raise NotImplementedError('rnn_type = %s' % args.rnn_type)

    if args.embedding_file is not None:
        dim = utils.get_dim(args.embedding_file)
        if (args.embedding_size is not None) and (args.embedding_size != dim):
            raise ValueError('embedding_size = %d, but %s has %d dims.' %
                             (args.embedding_size, args.embedding_file, dim))
        args.embedding_size = dim
    elif args.embedding_size is None:
        raise RuntimeError(
            'Either embedding_file or embedding_size needs to be specified.')

    if args.log_file is None:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(message)s',
                            datefmt='%m-%d %H:%M')
    else:
        logging.basicConfig(filename=args.log_file,
                            filemode='w',
Пример #3
0
    if args.train_file is None:
        raise ValueError('train_file is not specified.')

    if args.dev_file is None:
        raise ValueError('dev_file is not specified.')

    if args.rnn_type == 'lstm':
        args.rnn_layer = lasagne.layers.LSTMLayer
    elif args.rnn_type == 'gru':
        args.rnn_layer = lasagne.layers.GRULayer
    else:
        raise NotImplementedError('rnn_type = %s' % args.rnn_type)

    if args.embedding_file is not None:
        dim = utils.get_dim(args.embedding_file)
        if (args.embedding_size is not None) and (args.embedding_size != dim):
            raise ValueError('embedding_size = %d, but %s has %d dims.' %
                             (args.embedding_size, args.embedding_file, dim))
        args.embedding_size = dim
    elif args.embedding_size is None:
        raise RuntimeError('Either embedding_file or embedding_size needs to be specified.')

    if args.log_file is None:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(message)s', datefmt='%m-%d %H:%M')
    else:
        logging.basicConfig(filename=args.log_file,
                            filemode='w', level=logging.DEBUG,
                            format='%(asctime)s %(message)s', datefmt='%m-%d %H:%M')
Пример #4
0
def qa_model(debug=False,
             test_only=False,
             prepare_model=False,
             random_seed=1013,
             train_file=None,
             dev_file=None,
             pre_trained=None,
             model_file='model.pkl.gz',
             log_file=None,
             embedding_file=None,
             max_dev=None,
             relabeling=True,
             embedding_size=None,
             hidden_size=128,
             bidir=True,
             num_layers=1,
             rnn_type='gru',
             att_func='bilinear',
             batch_size=32,
             num_epoches=100,
             eval_iter=100,
             dropout_rate=0.2,
             optimizer='sgd',
             learning_rate=0.1,
             grad_clipping=10.0):

    args = namedtuple(
        "args",
        "debug, test_only, prepare_model, random_seed, train_file, dev_file, pre_trained, model_file, log_file, embedding_file, max_dev, relabeling, embedding_size, hidden_size, bidir, num_layers, rnn_type, att_func, batch_size, num_epoches, eval_iter, dropout_rate, optimizer, learning_rate, grad_clipping"
    )
    args.debug = debug
    args.test_only = test_only
    args.prepare_model = prepare_model
    args.random_seed = random_seed
    args.train_file = train_file
    args.dev_file = dev_file
    args.pre_trained = pre_trained
    args.model_file = model_file
    args.log_file = log_file
    args.embedding_file = embedding_file
    args.max_dev = max_dev
    args.relabeling = relabeling
    args.embedding_size = embedding_size
    args.hidden_size = hidden_size
    args.bidir = bidir
    args.num_layers = num_layers
    args.rnn_type = rnn_type
    args.att_func = att_func
    args.batch_size = batch_size
    args.num_epoches = num_epoches
    args.eval_iter = eval_iter
    args.dropout_rate = dropout_rate
    args.optimizer = optimizer
    args.learning_rate = learning_rate
    args.grad_clipping = grad_clipping

    # args = config.get_args()
    np.random.seed(args.random_seed)
    lasagne.random.set_rng(np.random.RandomState(args.random_seed))

    if args.train_file is None:
        raise ValueError('train_file is not specified.')

    if args.dev_file is None:
        raise ValueError('dev_file is not specified.')

    if args.rnn_type == 'lstm':
        args.rnn_layer = lasagne.layers.LSTMLayer
    elif args.rnn_type == 'gru':
        args.rnn_layer = lasagne.layers.GRULayer
    else:
        raise NotImplementedError('rnn_type = %s' % args.rnn_type)

    if args.embedding_file is not None:
        dim = utils.get_dim(args.embedding_file)
        if (args.embedding_size is not None) and (args.embedding_size != dim):
            raise ValueError('embedding_size = %d, but %s has %d dims.' %
                             (args.embedding_size, args.embedding_file, dim))
        args.embedding_size = dim
    elif args.embedding_size is None:
        raise RuntimeError(
            'Either embedding_file or embedding_size needs to be specified.')

    if args.log_file is None:
        logging.basicConfig(level=logging.DEBUG,
                            format='%(asctime)s %(message)s',
                            datefmt='%m-%d %H:%M')
    else:
        logging.basicConfig(filename=args.log_file,
                            filemode='w',
                            level=logging.DEBUG,
                            format='%(asctime)s %(message)s',
                            datefmt='%m-%d %H:%M')

    logging.info(' '.join(sys.argv))
    return main(args)
Пример #5
0
layers = []
total_time = 15
layer_1 = conv1().to('cuda')
layers.append(init_layers(total_time=total_time, C=4, H=400, W=400))
layer_2 = pool1().to('cuda')
layers.append(init_layers(total_time=total_time, C=4, H=66, W=66))
layer_3 = conv2().to('cuda')
layers.append(init_layers(total_time=total_time, C=20, H=54, W=54))
layer_4 = pool2().to('cuda')
layers.append(init_layers(total_time=total_time, C=20, H=10, W=10))
layer_5 = conv3().to('cuda')
layers.append(init_layers(total_time=total_time, C=20, H=6, W=6))
network = [layer_1, layer_2, layer_3, layer_4, layer_5]
network_len = len(network)
layer_filt_dimension = get_dim(network[0::2])

thresh = torch.Tensor([.5, .5, .5])
image_names = os.listdir(train_data)
data_len = len(image_names)
thds_per_dim = 10

for j in trange(data_len):
    image = DoG(image_name=image_names[j])
    image = np.reshape(image, (160000, ))
    image = freq(15, image, 0.05)
    image = torch.from_numpy(image).to('cuda')
    image = image.float()

    for i in range(stdp_params['max_iter']):
        for t in range(total_time):
Пример #6
0
def load_dataset(dataset, train_size, valid_size, test_size):
    """Load the dataset passed in argument with the corresponding sizes for the training, validation and testing set."""

    if dataset == 'mnist_012':
        root = './data/mnist'
        num_classes = 3

        trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
        train_valid_set = datasets.MNIST(root=root, train=True, transform=trans)
        test_set = datasets.MNIST(root=root, train=False, transform=trans)

        train_valid_set = MNIST_bis(dataset=train_valid_set, size=train_size+valid_size, digits_to_keep=[0,1,2])
        test_set = MNIST_bis(dataset=test_set, size=test_size, digits_to_keep=[0,1,2])

        train_sampler, valid_sampler = train_valid_split(dataset=train_valid_set, train_size=train_size)

        train_loader = DataLoader(dataset=train_valid_set, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True)
        valid_loader = DataLoader(dataset=train_valid_set, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True)
        test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, drop_last=True)

    elif dataset == 'mnist_rot':
        root = './data/mnist'
        num_classes = 9

        train_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
        test_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), transforms.RandomRotation((0,360)), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
        train_valid_set = datasets.MNIST(root=root, train=True, transform=train_trans)
        test_set = datasets.MNIST(root=root, train=False, transform=test_trans)

        train_valid_set_bis = MNIST_bis(dataset=train_valid_set, size=train_size+valid_size, digits_to_keep=[0,1,2,3,4,5,6,7,8])
        test_set = MNIST_bis(dataset=test_set, size=test_size, digits_to_keep=[0,1,2,3,4,5,6,7,8])

        train_sampler, valid_sampler = train_valid_split(dataset=train_valid_set_bis, train_size=train_size)

        train_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True)
        valid_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True)
        test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, drop_last=True)

    elif dataset == 'mnist_trans':
        root = './data/mnist'
        num_classes = 9

        train_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
        test_trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((26,26)), RandomTranslation(horizontal=6, vertical=6), transforms.ToTensor(), transforms.Normalize(mean=MNIST_MEAN, std=MNIST_STD)])
        train_valid_set = datasets.MNIST(root=root, train=True, transform=train_trans)
        test_set = datasets.MNIST(root=root, train=False, transform=test_trans)
        
        train_valid_set_bis = MNIST_bis(dataset=train_valid_set, size=train_size+valid_size, digits_to_keep=[0,1,2,3,4,5,6,7,8])
        test_set = MNIST_bis(dataset=test_set, size=test_size, digits_to_keep=[0,1,2,3,4,5,6,7,8])

        train_sampler, valid_sampler = train_valid_split(dataset=train_valid_set_bis, train_size=train_size)

        train_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True)
        valid_loader = DataLoader(dataset=train_valid_set_bis, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True)
        test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True, drop_last=True)

    elif dataset == 'eth80':
        root = './data/eth80'
        num_classes = 8

        trans = transforms.Compose([transforms.Grayscale(num_output_channels=1), transforms.Resize((50,50)), transforms.ToTensor(), transforms.Normalize(mean=ETH80_MEAN, std=ETH80_STD)])
        complete_set = datasets.ImageFolder(root=root, transform=trans)
        class_names = complete_set.classes

        train_sampler, valid_sampler, test_sampler = train_valid_test_split(dataset=complete_set, train_size=train_size, valid_size=valid_size)
        
        train_loader = DataLoader(dataset=complete_set, batch_size=BATCH_SIZE, sampler=train_sampler, num_workers=4, pin_memory=True, drop_last=True) 
        valid_loader = DataLoader(dataset=complete_set, batch_size=BATCH_SIZE, sampler=valid_sampler, num_workers=4, pin_memory=True, drop_last=True) 
        test_loader = DataLoader(dataset=complete_set, batch_size=BATCH_SIZE, sampler=test_sampler, num_workers=4, pin_memory=True, drop_last=True)

    else:
        raise ValueError('Specified dataset does not exist.')

    logger.debug('Class frequency train loader: {} validation loader: {} test loader: {}'.format(
        count_class_freq(train_loader, num_classes),count_class_freq(valid_loader, num_classes), count_class_freq(test_loader, num_classes))
        )
    logging.info('Loaded {} dataset with the split {}-{}-{} for the [train]-[valid]-[test] setup.'.format(dataset, len(train_loader)*BATCH_SIZE, len(valid_loader)*BATCH_SIZE, len(test_loader)*BATCH_SIZE))


    return train_loader, valid_loader, test_loader, get_dim(train_loader)
Пример #7
0
def linear(inputs, output_size, trainable):
    in_dim = get_dim(inputs)

    w = get_rand_variable([in_dim, output_size], 1/np.sqrt(in_dim), trainable=trainable)
    b = get_const_variable([output_size], 0.0, trainable=trainable)
    return tf.matmul(inputs, w) + b
Пример #8
0
def init():
  path = config.data_path
  config.embedding_file = os.path.join(path, config.embedding_file)
  config.train_file = os.path.join(path, config.train_file)
  config.dev_file = os.path.join(path, config.dev_file)
  config.test_file = os.path.join(path, config.test_file)
  
  dim = utils.get_dim(config.embedding_file)
  config.embedding_size = dim

  # Config log
  if config.log_file is None:
    logging.basicConfig(level=logging.DEBUG,
                      format='%(asctime)s %(message)s', datefmt='%m-%d %H:%M')
  else:
    logging.basicConfig(filename=config.log_file,
                      filemode='w', level=logging.DEBUG,
                      format='%(asctime)s %(message)s', datefmt='%m-%d %H:%M')
  # Load data
  logging.info('-' * 50)
  logging.info('Load data files..')
  if config.debug:
    logging.info('*' * 10 + ' Train')
    train_examples = utils.load_data(config.train_file, 1000)
    logging.info('*' * 10 + ' Dev')
    dev_examples = utils.load_data(config.dev_file, 100)
  else:
    logging.info('*' * 10 + ' Train')
    train_examples = utils.load_data(config.train_file)
    logging.info('*' * 10 + ' Dev')
    dev_examples = utils.load_data(config.dev_file)

  config.num_train = len(train_examples[0])
  config.num_dev = len(dev_examples[0])

  # Build dictionary
  logging.info('-' * 50)
  logging.info('Build dictionary..')
  word_dict = utils.build_dict(train_examples[0] + train_examples[1])
  entity_markers = list(set( [w for w in word_dict.keys()
                            if w.startswith('@entity')] + train_examples[2] ))
  entity_markers = ['<unk_entity>'] + entity_markers
  entity_dict = {w: index for (index, w) in enumerate(entity_markers)}
  logging.info('Entity markers: %d' % len(entity_dict))
  config.num_labels = len(entity_dict)

  logging.info('-' * 50)
  logging.info('Load embedding file..')
  embeddings = utils.gen_embeddings(word_dict, config.embedding_size, config.embedding_file)
  (config.vocab_size, config.embedding_size) = embeddings.shape

  # Log parameters
  flags = config.__dict__['__flags']
  flag_str = "\n"
  for k in flags:
    flag_str += "\t%s:\t%s\n" % (k, flags[k])
  logging.info(flag_str)

  # Vectorize test data
  logging.info('-' * 50)
  logging.info('Vectorize test data..')
  # d: document, q: question, a:answer
  # l: whether the entity label occurs in the document
  dev_d, dev_q, dev_l, dev_a = utils.vectorize(dev_examples, word_dict, entity_dict)
  assert len(dev_d) == config.num_dev
  all_dev = utils.gen_examples(dev_d, dev_q, dev_l, dev_a, config.batch_size)

  if config.test_only:
      return embeddings, all_dev, None

  # Vectorize training data
  logging.info('-' * 50)
  logging.info('Vectorize training data..')
  train_d, train_q, train_l, train_a = utils.vectorize(train_examples, word_dict, entity_dict)
  assert len(train_d) == config.num_train
  all_train = utils.gen_examples(train_d, train_q, train_l, train_a, config.batch_size)

  return embeddings, all_dev, all_train
Пример #9
0
def load_saved_dataset(name, data_path=SAVED_DATA):
    """Load the saved data."""

    train_data = torch.from_numpy(
        np.load(
            os.path.join(data_path, name, 'dataset',
                         name + '_train_signals.npy'))).float()
    valid_data = torch.from_numpy(
        np.load(
            os.path.join(data_path, name, 'dataset',
                         name + '_val_signals.npy'))).float()
    test_data = torch.from_numpy(
        np.load(
            os.path.join(data_path, name, 'dataset',
                         name + '_test_signals.npy'))).float()

    train_labels = torch.from_numpy(
        np.load(
            os.path.join(data_path, name, 'dataset',
                         name + '_train_labels.npy'))).long()
    valid_labels = torch.from_numpy(
        np.load(
            os.path.join(data_path, name, 'dataset',
                         name + '_val_labels.npy'))).long()
    test_labels = torch.from_numpy(
        np.load(
            os.path.join(data_path, name, 'dataset',
                         name + '_test_labels.npy'))).long()

    train_dataset = TensorDataset(train_data, train_labels)
    valid_dataset = TensorDataset(valid_data, valid_labels)
    test_dataset = TensorDataset(test_data, test_labels)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=4,
                              drop_last=True)
    valid_loader = DataLoader(dataset=valid_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=4,
                              drop_last=True)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE,
                             shuffle=False,
                             num_workers=4,
                             drop_last=True)

    dim = int(np.sqrt(get_dim(train_data)))

    laplacian_matrix = torch.from_numpy(
        np.load(
            os.path.join(data_path, name, 'dataset',
                         name + '_laplacian.npy'))).float()
    shifted_laplacian_matrix = shift_laplacian(laplacian_matrix,
                                               dim).to(DEVICE)

    if name == 'mnist_012':
        num_classes = 3
    elif name == 'eth80':
        num_classes = 8
    else:
        num_classes = 9

    logger.info(
        'Class frequency \ntrain loader: {} \nvalidation loader: {} \ntest loader: {}'
        .format(count_class_freq(train_loader, num_classes),
                count_class_freq(valid_loader, num_classes),
                count_class_freq(test_loader, num_classes)))
    logging.info(
        'Loaded saved {} dataset with the split {}-{}-{} for the [train]-[valid]-[test] setup.'
        .format(name,
                len(train_loader) * BATCH_SIZE,
                len(valid_loader) * BATCH_SIZE,
                len(test_loader) * BATCH_SIZE))

    return train_loader, valid_loader, test_loader, dim, laplacian_matrix, shifted_laplacian_matrix