示例#1
0
文件: utils.py 项目: Modasshir/Mozi
def gpu_to_cpu_model(model):
    for layer in model.layers:
        for member, value in layer.__dict__.items():
            if is_shared_var(value):
                layer.__dict__[member] = T._shared(np.array(value.get_value(), floatX),
                                          name=value.name, borrow=False)
        for i in xrange(len(layer.params)):
            if is_shared_var(layer.params[i]):
                layer.params[i] = T._shared(np.array(layer.params[i].get_value(), floatX),
                                          name=layer.params[i].name, borrow=False)
    return model
示例#2
0
def gpu_to_cpu_model(model):
    for layer in model.layers:
        for member, value in list(layer.__dict__.items()):
            if is_shared_var(value):
                layer.__dict__[member] = T._shared(np.array(value.get_value(), floatX),
                                          name=value.name, borrow=False)
        for i in range(len(layer.params)):
            if is_shared_var(layer.params[i]):
                layer.params[i] = T._shared(np.array(layer.params[i].get_value(), floatX),
                                          name=layer.params[i].name, borrow=False)
    return model
示例#3
0
    def test_gemv2(self):
        ''' test vector1+dot(vector2,matrix) '''
        v1 = theano.shared(numpy.array(numpy.random.rand(5), dtype='float32'))
        v2 = tensor._shared(numpy.array(numpy.random.rand(2), dtype='float32'))
        m = theano.shared(numpy.array(numpy.random.rand(5, 2),
                                      dtype='float32'))

        no_gpu_f = theano.function([], v2 + theano.dot(v1, m),
                                   mode=mode_without_gpu)
        gpu_f = theano.function([], v2 + theano.dot(v1, m),
                                mode=mode_with_gpu)
        # gpu_f2 is needed to test the case when the input is not on the gpu
        # but the output is moved to the gpu.
        gpu_f2 = theano.function(
            [], tcn.gpu_from_host(v2 + theano.dot(v1, m)),
            mode=mode_with_gpu)

        # Assert they produce the same output
        assert numpy.allclose(no_gpu_f(), gpu_f(), atol=self.atol)
        assert numpy.allclose(no_gpu_f(), gpu_f2(), atol=self.atol)
        # Assert that the gpu version actually uses gpu
        assert sum([node.op is gpu_gemv_inplace for node in
                    gpu_f2.maker.fgraph.toposort()]) == 1
        assert sum([node.op is gpu_gemv_inplace for node in
                    gpu_f.maker.fgraph.toposort()]) == 1
def arrays_to_tensors(self, arrays):

    #err = self.to_err_tensor()
    #return DatasetTensors(pos=self.to_tensor(), err=err, serr=err)
    if type(arrays) == np.ndarray:
        return tt._shared(arrays.T)

    elif type(arrays) == DatasetArrays:
        return DatasetTensors(arrays_to_tensors(arrays.pos), arrays_to_tensors(arrays.err), arrays_to_tensors(serr))
示例#5
0
文件: utils.py 项目: ylfzr/sbvae
def sharedX(value, name=None, borrow=True, keep_on_cpu=False):
    """ Transform value into a shared variable of type floatX """
    if keep_on_cpu:
        return T._shared(theano._asarray(value, dtype=theano.config.floatX),
                         name=name,
                         borrow=borrow)
    return theano.shared(theano._asarray(value, dtype=theano.config.floatX),
                         name=name,
                         borrow=borrow)
示例#6
0
 def gen_vec(n, name, device='cpu'):
     self.rng = numpy.random.RandomState(123)
     vals = self.rng.uniform(size=(n,), low=-.0005,
                          high=.0005).astype('float32')
     if device=='gpu':
         var = theano.shared(vals, name=name)
         print_mem(name)
     else:
         var = TT._shared(vals, name=name)
     return var
示例#7
0
def sharedX(value, name=None, borrow=True, keep_on_cpu=False):
    """ Transform value into a shared variable of type floatX """
    if keep_on_cpu:
        return T._shared(theano._asarray(value, dtype=theano.config.floatX),
                         name=name,
                         borrow=borrow)

    return theano.shared(theano._asarray(value, dtype=theano.config.floatX),
                         name=name,
                         borrow=borrow)
示例#8
0
def test_local_gpu_subtensor():
    # Test shared forced on CPU.
    t = tensor._shared(np.zeros(20, "float32"))
    f = theano.function([], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test graph input.
    t = tensor.fmatrix()
    f = theano.function([t], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test multiple use of the input
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test multiple use of the input + input as output
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test shared forced on CPU end we do computation on the output of
    # the subtensor.
    t = tensor._shared(np.zeros(20, "float32"))
    f = theano.function([], t[3:4] + 1, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
    # Our optimizer isn't smart enough to move to the GPU Elemwise.
    # If it where just a little bit smarter, it could wrongly move it to the GPU.
    # If it where super smart, it would know it should not move it to the GPU.
    assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
示例#9
0
def test_local_gpu_subtensor():
    # Test shared forced on CPU.
    t = tensor._shared(np.zeros(20, "float32"))
    f = theano.function([], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test graph input.
    t = tensor.fmatrix()
    f = theano.function([t], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test multiple use of the input
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test multiple use of the input + input as output
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test shared forced on CPU end we do computation on the output of
    # the subtensor.
    t = tensor._shared(np.zeros(20, "float32"))
    f = theano.function([], t[3:4] + 1, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
    # Our optimizer isn't smart enough to move to the GPU Elemwise.
    # If it where just a little bit smarter, it could wrongly move it to the GPU.
    # If it where super smart, it would know it should not move it to the GPU.
    assert any([isinstance(node.op, tensor.Elemwise) for node in topo])
示例#10
0
 def gen_mat(nin, nout, name, device='cpu', scale=.01):
     # NOTE : assumes tanh
     self.rng = numpy.random.RandomState(123)
     vals = self.rng.uniform(size=(nin, nout), low=-scale,
                        high=scale).astype('float32')
     if device=='gpu':
         var = theano.shared(vals, name=name)
         print_mem(name)
     else:
         var = TT._shared(vals, name=name)
     return var
示例#11
0
def test_local_gpu_subtensor():
    # Test shared forced on CPU.
    t = tensor._shared(numpy.zeros(20, "float32"))
    f = theano.function([], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, cuda.GpuSubtensor) for node in topo])

    # Test graph input.
    t = tensor.fmatrix()
    f = theano.function([t], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, cuda.GpuSubtensor) for node in topo])

    # Test multiple use of the input
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, cuda.GpuSubtensor) for node in topo])

    # Test multiple use of the input + input as output
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, cuda.GpuSubtensor) for node in topo])

    # Test shared forced on CPU end we do computation on the output of
    # the subtensor.
    t = tensor._shared(numpy.zeros(20, "float32"))
    f = theano.function([], t[3:4] + 1, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, cuda.GpuSubtensor) for node in topo])
    assert any([isinstance(node.op, cuda.GpuElemwise) for node in topo])
示例#12
0
文件: test_opt.py 项目: ip01/Theano
def test_local_gpu_subtensor():
    # Test shared forced on CPU.
    t = tensor._shared(numpy.zeros(20, "float32"))
    f = theano.function([], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test graph input.
    t = tensor.fmatrix()
    f = theano.function([t], t[3:4], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test multiple use of the input
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test multiple use of the input + input as output
    # We want the subtensor to be on the GPU to prevent multiple transfer.
    t = tensor.fmatrix()
    f = theano.function([t], [t[3:4], t + 1, t], mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert not any([type(node.op) is tensor.Subtensor for node in topo])
    assert any([isinstance(node.op, GpuSubtensor) for node in topo])

    # Test shared forced on CPU end we do computation on the output of
    # the subtensor.
    t = tensor._shared(numpy.zeros(20, "float32"))
    f = theano.function([], t[3:4] + 1, mode=mode_with_gpu)
    topo = f.maker.fgraph.toposort()
    assert any([type(node.op) is tensor.Subtensor for node in topo])
    assert not any([isinstance(node.op, GpuSubtensor) for node in topo])
    assert any([isinstance(node.op, GpuElemwise) for node in topo])
示例#13
0
def categorical_sampler(rstream, p, draw_shape, dtype="int32"):
    if not isinstance(p, theano.Variable):
        p = tensor._shared(numpy.asarray(p, dtype=theano.config.floatX))
    if p.ndim != 1:
        raise NotImplementedError()
    if draw_shape.ndim != 1:
        raise TypeError()
    op = Categorical(
        False, tensor.TensorType(broadcastable=(False,) * tensor.get_vector_length(draw_shape), dtype=dtype)
    )
    rstate = rstream.new_shared_rstate()
    new_rstate, out = op(rstate, p, draw_shape)
    rstream.add_default_update(out, rstate, new_rstate)
    return out
示例#14
0
def shared_dataset_x(data_x, borrow=True):
    """Function that loads the dataset into shared variables.

    The reason we store our dataset in shared variables is to allow
    Theano to copy it into the GPU memory (when code is run on GPU).
    Since copying data into the GPU is slow, copying a minibatch 
    everytime is needed (the default behaviour if the data is not in
    a shared variables) would lead to a large decrease in performance.
    """

    shared_x = T._shared(np.asarray(data_x, dtype=theano.config.floatX),
                         borrow=borrow)
    #shared_x = theano.shared(data_x, borrow = borrow)

    return shared_x
示例#15
0
def categorical_sampler(rstream, p, draw_shape, dtype='int32'):
    if not isinstance(p, theano.Variable):
        p = tensor._shared(numpy.asarray(p, dtype=theano.config.floatX))
    if p.ndim != 1:
        raise NotImplementedError()
    if draw_shape.ndim != 1:
        raise TypeError()
    op = Categorical(
        False,
        tensor.TensorType(broadcastable=(False, ) *
                          tensor.get_vector_length(draw_shape),
                          dtype=dtype))
    rstate = rstream.new_shared_rstate()
    new_rstate, out = op(rstate, p, draw_shape)
    rstream.add_default_update(out, rstate, new_rstate)
    return out
    def build_model(self):
        """ Builds then returns the pyMC model. """
        M = pm.Model()

        with M:
            # The three values here are div and deathrate
            # Assume just one IC50 for simplicity
            lIC50 = pm.Normal("IC50s", 2.0)

            Emin_growth = pm.Uniform("Emin_growth",
                                     lower=0.0,
                                     upper=self.Emax_growth)
            Emax_death = pm.Lognormal("Emax_death", -2.0, 2.0)

            # Import drug concentrations into theano vector
            drugCs = T._shared(self.drugCs)

            # Drug term since we're using constant IC50 and hill slope
            drugTerm = 1.0 / (1.0 +
                              T.pow(10.0,
                                    (lIC50 - drugCs) * pm.Lognormal("hill")))

            # Do actual conversion to parameters for each drug condition
            growthV = self.Emax_growth + (Emin_growth -
                                          self.Emax_growth) * drugTerm

            # Calculate the growth rate
            # _Assuming deathrate in the absence of drug is zero
            GR = growthV - Emax_death * drugTerm

            # Calculate the number of live cells
            lnum = T.exp(GR * self.time)

            # Normalize live cell data to control, as is similar to measurements
            # Residual between model prediction and measurement
            residual = self.lObs - (lnum / lnum[0])

            pm.Normal("dataFitlnum", sd=T.std(residual), observed=residual)

        return M
示例#17
0
def theanoCore(timeV, div, deathRate, apopfrac, d):
    """ Assemble the core growth model. """
    # Make a vector of time and one for time-constant values
    timeV = T._shared(timeV)
    constV = T.ones_like(timeV)  # pylint: disable=no-member

    # Calculate the growth rate
    GR = T.outer(div - deathRate, constV)
    # cGDd is used later
    cGRd = T.outer(deathRate * apopfrac, constV) / (GR + d)

    # b is the rate straight to death
    b = T.outer(deathRate * (1 - apopfrac), constV)

    lnum = T.exp(GR * timeV)

    # Number of early apoptosis cells at start is 0.0
    eap = cGRd * (lnum - T.exp(-d * timeV))

    # Calculate dead cells via apoptosis and via necrosis
    deadnec = b * (lnum - 1) / GR
    deadapop = d * cGRd * (lnum - 1) / GR + cGRd * (T.exp(-d * timeV) - 1)

    return (lnum, eap, deadapop, deadnec)
示例#18
0

parser = argparse.ArgumentParser()
parser.add_argument("source", type=str, help="Pickled network to steal params from.")
parser.add_argument("dest", type=str, help="File to place new network in.")
parser.add_argument(
    "--cpu", "-c", dest="cpu", action="store_const", const=True, default=False, help="Convert network to run on a CPU."
)
args = parser.parse_args()

print "loading model..."
f = file(args.source, "rb")
old_network = cPickle.load(f)
f.close()

params = old_network.params
if args.cpu:
    print "converting gpu parameters..."
    new_params = []
    for param in params:
        param = T._shared(param.get_value())
        new_params.append(param)
    params = new_params

new_network = network(batch_size=None, params=params)

print "saving model..."
f = file(args.dest, "wb")
cPickle.dump(new_network, f, protocol=cPickle.HIGHEST_PROTOCOL)
f.close()
示例#19
0
    def __init__(self, options, channel, data, model):
        """
        Parameters:
            options: Dictionary
            `options` is expected to contain the following keys:
                `cbs` -> int
                    Number of samples to consider at a time when computing
                    some property of the model
                `gbs` -> int
                    Number of samples over which to compute the gradients
                `mbs` -> int
                    Number of samples over which to compute the krylov
                    subspace
                `ebs` -> int
                    Number of samples over which to evaluate the training
                    error
                `seed` -> int
                    Random number generator seed
                `profile` -> bool
                    Flag, if profiling should be on or not
                `verbose` -> int
                    Verbosity level
                `lbfgsIters' -> int
                `krylovDim` -> int
            channel: jobman channel or None
            data: dictionary-like object return by numpy.load containing the
                data
            model : model
        """
        n_params = len(model.params)
        self.data = data
        xdata = theano.shared(data['train_x'], name='xdata')
        ydata = theano.shared(data['train_y'], name='ydata')
        self.xdata = xdata
        self.ydata = ydata
        shared_data = [xdata, ydata]
        self.rng = numpy.random.RandomState(options['seed'])
        n_samples = data['train_x'].shape[0]
        self.grad_batches = n_samples // options['gbs']
        self.metric_batches = n_samples // options['mbs']
        self.eval_batches = n_samples // options['ebs']

        self.verbose = options['verbose']
        rng = numpy.random.RandomState(options['seed'])
        self.rng = rng
        self.options = options
        self.channel = channel
        self.model = model
        n_dimensions = options['krylovDim']
        self.n_dimensions = n_dimensions
        if options['device'] == 'gpu':
            cfn_subspaces = \
                [theano.shared(numpy.zeros(
                                (n_dimensions,) + shp, dtype='float32'),
                               name='cfn{%s|%d}' % (str(param.name), i))
                 for i, (shp, param) in enumerate(zip(model.params_shape,
                                                      model.params))]
            old_deltas = \
                [theano.shared(numpy.zeros(shp, dtype='float32'),
                               name='delta{%s|%d}' % (str(param.name), i))
                 for i, (shp, param) in
                            enumerate(zip(model.params_shape, model.params))]
            self.gs = [
                theano.shared(numpy.zeros(shp, dtype=theano.config.floatX))
                for shp in model.params_shape
            ]
        else:
            cfn_subspaces = \
                [TT._shared(numpy.zeros(
                                (n_dimensions,) + shp, dtype='float32'),
                               name='cfn{%s|%d}' % (str(param.name), i))
                 for i, (shp, param) in enumerate(zip(model.params_shape,
                                                      model.params))]
            old_deltas = \
                [TT._shared(numpy.zeros(shp, dtype='float32'),
                               name='delta{%s|%d}' % (str(param.name), i))
                 for i, (shp, param) in
                            enumerate(zip(model.params_shape, model.params))]
            self.gs = [
                TT._shared(numpy.zeros(shp, dtype=theano.config.floatX))
                for shp in model.params_shape
            ]
        self.cfn_subspaces = cfn_subspaces
        self.old_deltas = old_deltas

        self.permg = self.rng.permutation(self.grad_batches)
        self.permr = self.rng.permutation(self.metric_batches)
        self.perme = self.rng.permutation(self.eval_batches)
        self.k = 0
        self.posg = 0
        self.posr = 0
        self.pose = 0

        # Step 1. Compile function for computing eucledian gradients
        print 'Constructing grad function'
        loc_inputs = [x.type(name='locx') for x in model.inputs]

        def grad_step(*args):
            idx = TT.cast(args[0], 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']]
                       for x in loc_inputs]
            replace = dict(zip(model.inputs, nw_inps))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            gs = TT.grad(nw_cost, model.params)
            nw_gs = [op + np for op, np in zip(args[1:1 + n_params], gs)]
            return [args[0] + const(1)] + \
                    nw_gs

        ig = [
            TT.unbroadcast(TT.alloc(const(0), 1, *shp), 0)
            for shp in model.params_shape
        ]
        idx0 = TT.unbroadcast(const([0]), 0)
        n_steps = options['gbs'] // options['cbs']
        rvals, updates = scan(grad_step,
                              states=[idx0] + ig,
                              n_steps=n_steps,
                              name='grad_loop',
                              mode=gpu_mode,
                              profile=options['profile'])

        nw_gs = [x[0] / const(n_steps) for x in rvals[1:1 + n_params]]
        updates.update(dict(zip(self.gs, nw_gs)))
        gdx = TT.iscalar('gdx')
        grad_inps = zip(loc_inputs, [
            x[gdx * options['gbs']:(gdx + 1) * options['gbs']]
            for x in shared_data
        ])
        print 'Compiling grad function'
        self.compute_eucledian_gradients = theano.function(
            [gdx], [],
            updates=updates,
            givens=dict(grad_inps),
            name='compute_eucledian_gradients',
            mode=gpu_mode,
            profile=options['profile'])

        # Step 2. Compile function for Computing Riemannian gradients
        if options['device'] == 'gpu':
            mode = gpu_mode

            def compute_Gv(*args):
                idx0 = const([0])
                ep = [
                    TT.alloc(const(0), 1, *shp) for shp in model.params_shape
                ]

                def Gv_step(*gv_args):
                    idx = TT.cast(gv_args[0], 'int32')
                    nw_inps = [x[idx * options['cbs']: \
                                 (idx + 1) * options['cbs']] for x in
                               loc_inputs]
                    replace = dict(zip(model.inputs, nw_inps))
                    nw_cost, nw_preactiv_out = safe_clone(
                        [model.train_cost, model.preactiv_out], replace)
                    nw_gvs = TT.Lop(
                        nw_preactiv_out, model.params,
                        TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params,
                               args))

                    Gvs = [
                        ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs)
                    ]
                    return [gv_args[0] + const(1)] + Gvs

                states = [idx0] + ep
                n_steps = options['mbs'] // options['cbs']
                rvals, updates = scan(Gv_step,
                                      states=states,
                                      n_steps=n_steps,
                                      mode=theano.Mode(linker='cvm'),
                                      name='Gv_step',
                                      profile=options['profile'])

                final_Gvs = [x[0] / const(n_steps) for x in rvals[1:]]
                return final_Gvs, updates
        else:
            mode = cpu_mode

            def compute_Gv(*args):
                cgv = [
                    theano.shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                  name='cgv%d' % idx)
                    for idx, shp in enumerate(model.params_shape)
                ]
                print_mem('allocated mem for cgv')
                idx0 = const([0])
                ep = [
                    TT.alloc(const(0), 1, *shp) for shp in model.params_shape
                ]

                def Gv_step(*gv_args):
                    idx = TT.cast(gv_args[0], 'int32')
                    nw_inps = [x[idx * options['cbs']: \
                                 (idx + 1) * options['cbs']] for x in
                               loc_inputs]
                    replace = dict(zip(model.inputs, nw_inps))
                    nw_cost, nw_preactiv_out = safe_clone(
                        [model.train_cost, model.preactiv_out], replace)
                    nw_gvs = TT.Lop(
                        nw_preactiv_out, model.params,
                        TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params,
                               cgv))

                    Gvs = [
                        ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs)
                    ]
                    return [gv_args[0] + const(1)] + Gvs

                states = [idx0] + ep
                n_steps = options['mbs'] // options['cbs']
                rvals, updates = scan(Gv_step,
                                      states=states,
                                      n_steps=n_steps,
                                      mode=gpu_mode,
                                      name='Gv_step',
                                      profile=options['profile'])

                final_Gvs = [
                    TT.as_tensor_variable(x[0]) / const(n_steps)
                    for x in rvals[1:]
                ]
                grad_inps = zip(loc_inputs, shared_data)
                loc_fn = theano.function([],
                                         final_Gvs,
                                         updates=updates,
                                         givens=dict(grad_inps),
                                         on_unused_input='warn',
                                         mode=gpu_mode,
                                         name='loc_fn',
                                         profile=options['profile'])
                fake_op = FakeGPUShell(cgv, loc_fn, len(cgv))

                return fake_op(*args), {}

        rvals, updates = krylov_subspace(compute_Gv,
                                         self.gs,
                                         old_deltas,
                                         n_dimensions,
                                         model.params_shape,
                                         profile=options['profile'],
                                         device=options['device'])

        gdx = TT.iscalar('gdx')
        grad_inps = zip(loc_inputs, [
            x[gdx * options['mbs']:(gdx + 1) * options['mbs']]
            for x in shared_data
        ])
        updates.update(dict(zip(cfn_subspaces, rvals)))
        self.update_krylov_subspace = theano.function(
            [gdx], [],
            updates=updates,
            givens=dict(grad_inps),
            profile=options['profile'],
            on_unused_input='warn',
            name='update_krylov_subspace',
            mode=mode)

        alphas = tensor.vector('alphas')
        deltas = []
        nw_params = []
        if options['device'] == 'gpu':
            params = model.params
        else:
            params = model.cpu_params

        for param, subspace in zip(params, cfn_subspaces):
            alpha_reshuffle = [0] + ['x'] * param.ndim
            delta = (alphas.dimshuffle(*alpha_reshuffle) * \
                        subspace).sum(axis=0)
            nw_param = param + delta
            nw_params.append(nw_param)
            deltas.append(delta)

        print 'constructing evaluation function'
        ebdx = TT.iscalar('ebdx')

        updates_dict = dict(zip(model.params + old_deltas, nw_params + deltas))
        if options['device'] != 'gpu':
            updates_dict.update(dict(zip(model.cpu_params, nw_params)))

        self.update_params = theano.function([alphas],
                                             updates=updates_dict,
                                             name='update_params',
                                             allow_input_downcast=True,
                                             mode=mode,
                                             profile=options['profile'])

        n_steps = options['ebs'] // options['cbs']

        def ls_cost_step(_idx, acc):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']] for x in loc_inputs]
            replace = dict(
                zip(model.inputs + model.params, nw_inps + nw_params))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            return [_idx + const(1), acc + nw_cost]

        states = [
            TT.constant(numpy.float32([0])),
            TT.constant(numpy.float32([0]))
        ]
        rvals, _ = scan(ls_cost_step,
                        states=states,
                        n_steps=n_steps,
                        name='ls_cost_step',
                        mode=gpu_mode,
                        profile=options['profile'])
        fcost = rvals[1][0] / const(n_steps)

        def ls_grad_step(_idx, gws):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [
                x[idx * options['cbs']:(idx + 1) * options['cbs']]
                for x in loc_inputs
            ]
            replace = dict(
                zip(model.inputs + model.params, nw_inps + nw_params))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            nw_gs = TT.grad(nw_cost, alphas)
            return _idx + numpy.float32(1), gws + nw_gs

        states = [
            TT.constant(numpy.float32([0])),
            TT.constant(numpy.zeros((1, n_dimensions), dtype='float32'))
        ]
        rvals, _ = scan(ls_grad_step,
                        states=states,
                        n_steps=n_steps,
                        name='ls_grad_step',
                        mode=gpu_mode,
                        profile=options['profile'])

        fgrad = rvals[1][0] / const(n_steps)

        grad_inps = zip(loc_inputs, [
            x[ebdx * options['ebs']:(ebdx + 1) * options['ebs']]
            for x in shared_data
        ])
        self.lbfgs_fn = theano.function(
            [alphas, ebdx],
            #theano.printing.Print('fcost')(fcost),
            fcost,
            givens=grad_inps,
            allow_input_downcast=True,
            on_unused_input='warn',
            name='lbfgs_fn',
            profile=options['profile'],
            mode=gpu_mode)
        self.lbfgs_grad = theano.function([alphas, ebdx],
                                          fgrad,
                                          givens=grad_inps,
                                          on_unused_input='warn',
                                          allow_input_downcast=True,
                                          name='lbfgs_grad',
                                          profile=options['profile'],
                                          mode=gpu_mode)

        n_steps = options['ebs'] // options['cbs']

        def ls_error(_idx, acc):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']] for x in loc_inputs]
            replace = dict(zip(model.inputs, nw_inps))
            nw_cost = TT.cast(safe_clone(model.err, replace=replace),
                              'float32')
            return [_idx + const(1), acc + nw_cost]

        states = [
            TT.constant(numpy.float32([0])),
            TT.constant(numpy.float32([0]))
        ]
        rvals, _ = scan(ls_error,
                        states=states,
                        n_steps=n_steps,
                        name='ls_err_step',
                        mode=cpu_mode,
                        profile=options['profile'])
        ferr = rvals[1][0] / const(n_steps)
        self.compute_error = theano.function([],
                                             ferr,
                                             givens=dict(
                                                 zip(loc_inputs, shared_data)),
                                             name='compute_err',
                                             mode=gpu_mode,
                                             on_unused_input='warn',
                                             profile=options['profile'])
示例#20
0
                    required=True,
                    help='the path to the gpu model pickle file')
parser.add_argument('--cpu_model',
                    metavar='Path',
                    required=True,
                    help='''path to save the cpu model pickle file''')
args = parser.parse_args()

print('loading gpu mlp..')
fin = open(args.gpu_model)
gpu_model = cPickle.load(fin)

mlp = MLP(input_dim=gpu_model.input_dim)
for layer in gpu_model.layers:
    layerW = T._shared(np.array(layer.W.get_value(), floatX),
                       name=layer.W.name,
                       borrow=False)
    layerb = T._shared(np.array(layer.b.get_value(), floatX),
                       name=layer.b.name,
                       borrow=False)
    mlp_layer = getattr(layers, layer.__class__.__name__)(dim=layer.dim,
                                                          name=layer.name,
                                                          W=layerW,
                                                          b=layerb)
    mlp.add_layer(mlp_layer)
    print 'mlp layer', mlp_layer.name, mlp_layer.dim
print 'layers', mlp.layers

fout = open(args.cpu_model, 'wb')
cPickle.dump(mlp, fout)
print('Done!')
示例#21
0
 def shared(self, x, name=None):
     return tensor._shared(x, name)
示例#22
0
    def __init__(self,
                 options,
                 channel,
                 data,
                 model):
        """
        Parameters:
            options: Dictionary
            `options` is expected to contain the following keys:
                `cbs` -> int
                    Number of samples to consider at a time when computing
                    some property of the model
                `gbs` -> int
                    Number of samples over which to compute the gradients
                `mbs` -> int
                    Number of samples over which to compute the krylov
                    subspace
                `ebs` -> int
                    Number of samples over which to evaluate the training
                    error
                `seed` -> int
                    Random number generator seed
                `profile` -> bool
                    Flag, if profiling should be on or not
                `verbose` -> int
                    Verbosity level
                `lbfgsIters' -> int
                `krylovDim` -> int
            channel: jobman channel or None
            data: dictionary-like object return by numpy.load containing the
                data
            model : model
        """
        n_params = len(model.params)
        self.data = data
        xdata = theano.shared(data['train_x'],
                              name='xdata')
        ydata = theano.shared(data['train_y'],
                          name='ydata')
        self.xdata = xdata
        self.ydata = ydata
        shared_data = [xdata, ydata]
        self.rng = numpy.random.RandomState(options['seed'])
        n_samples = data['train_x'].shape[0]
        self.grad_batches = n_samples // options['gbs']
        self.metric_batches = n_samples // options['mbs']
        self.eval_batches = n_samples // options['ebs']

        self.verbose = options['verbose']
        rng = numpy.random.RandomState(options['seed'])
        self.rng = rng
        self.options = options
        self.channel = channel
        self.model = model
        n_dimensions = options['krylovDim']
        self.n_dimensions = n_dimensions
        if options['device']=='gpu':
            cfn_subspaces = \
                [theano.shared(numpy.zeros(
                                (n_dimensions,) + shp, dtype='float32'),
                               name='cfn{%s|%d}' % (str(param.name), i))
                 for i, (shp, param) in enumerate(zip(model.params_shape,
                                                      model.params))]
            old_deltas = \
                [theano.shared(numpy.zeros(shp, dtype='float32'),
                               name='delta{%s|%d}' % (str(param.name), i))
                 for i, (shp, param) in
                            enumerate(zip(model.params_shape, model.params))]
            self.gs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX))
                   for shp in model.params_shape]
        else:
            cfn_subspaces = \
                [TT._shared(numpy.zeros(
                                (n_dimensions,) + shp, dtype='float32'),
                               name='cfn{%s|%d}' % (str(param.name), i))
                 for i, (shp, param) in enumerate(zip(model.params_shape,
                                                      model.params))]
            old_deltas = \
                [TT._shared(numpy.zeros(shp, dtype='float32'),
                               name='delta{%s|%d}' % (str(param.name), i))
                 for i, (shp, param) in
                            enumerate(zip(model.params_shape, model.params))]
            self.gs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX))
                   for shp in model.params_shape]
        self.cfn_subspaces = cfn_subspaces
        self.old_deltas = old_deltas

        self.permg = self.rng.permutation(self.grad_batches)
        self.permr = self.rng.permutation(self.metric_batches)
        self.perme = self.rng.permutation(self.eval_batches)
        self.k = 0
        self.posg = 0
        self.posr = 0
        self.pose = 0

        # Step 1. Compile function for computing eucledian gradients
        print 'Constructing grad function'
        loc_inputs = [x.type(name='locx') for x in model.inputs]
        def grad_step(*args):
            idx = TT.cast(args[0], 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']]
                       for x in loc_inputs]
            replace = dict(zip(model.inputs, nw_inps))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            gs = TT.grad(nw_cost, model.params)
            nw_gs = [op + np for op, np in zip(args[1: 1 + n_params], gs)]
            return [args[0] + const(1)] + \
                    nw_gs

        ig = [TT.unbroadcast(TT.alloc(const(0), 1, *shp),0)
              for shp in model.params_shape]
        idx0 = TT.unbroadcast(const([0]),0)
        n_steps = options['gbs'] // options['cbs']
        rvals, updates = scan(grad_step,
                              states=[idx0] + ig,
                              n_steps=n_steps,
                              name='grad_loop',
                              mode=gpu_mode,
                              profile=options['profile'])

        nw_gs = [x[0] / const(n_steps) for x in rvals[1: 1 + n_params]]
        updates.update(dict(zip(self.gs, nw_gs)))
        gdx = TT.iscalar('gdx')
        grad_inps = zip(loc_inputs,
                        [x[gdx*options['gbs']:(gdx+1)*options['gbs']] for x
                         in shared_data])
        print 'Compiling grad function'
        self.compute_eucledian_gradients = theano.function(
            [gdx],
            [],
            updates=updates,
            givens=dict(grad_inps),
            name='compute_eucledian_gradients',
            mode=gpu_mode,
            profile=options['profile'])

        # Step 2. Compile function for Computing Riemannian gradients
        if options['device'] == 'gpu':
            mode=gpu_mode
            def compute_Gv(*args):
                idx0 = const([0])
                ep = [TT.alloc(const(0), 1, *shp)
                      for shp in model.params_shape]

                def Gv_step(*gv_args):
                    idx = TT.cast(gv_args[0], 'int32')
                    nw_inps = [x[idx * options['cbs']: \
                                 (idx + 1) * options['cbs']] for x in
                               loc_inputs]
                    replace = dict(zip(model.inputs, nw_inps))
                    nw_cost, nw_preactiv_out = safe_clone([model.train_cost,
                                                           model.preactiv_out],
                                                          replace)
                    nw_gvs = TT.Lop(nw_preactiv_out, model.params,
                                  TT.Rop(TT.grad(nw_cost, nw_preactiv_out),
                                         model.params, args))

                    Gvs = [ogv + ngv
                           for (ogv, ngv) in zip(gv_args[1:], nw_gvs)]
                    return [gv_args[0] + const(1)] + Gvs
                states = [idx0] + ep
                n_steps = options['mbs'] // options['cbs']
                rvals, updates = scan(Gv_step,
                                      states=states,
                                      n_steps=n_steps,
                                      mode=theano.Mode(linker='cvm'),
                                      name='Gv_step',
                                      profile=options['profile'])

                final_Gvs = [x[0] / const(n_steps) for x in rvals[1:]]
                return final_Gvs, updates
        else:
            mode = cpu_mode
            def compute_Gv(*args):
                cgv = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                     name ='cgv%d'%idx)
                           for idx, shp in enumerate(model.params_shape)]
                print_mem('allocated mem for cgv')
                idx0 = const([0])
                ep = [TT.alloc(const(0), 1, *shp)
                      for shp in model.params_shape]

                def Gv_step(*gv_args):
                    idx = TT.cast(gv_args[0], 'int32')
                    nw_inps = [x[idx * options['cbs']: \
                                 (idx + 1) * options['cbs']] for x in
                               loc_inputs]
                    replace = dict(zip(model.inputs, nw_inps))
                    nw_cost, nw_preactiv_out = safe_clone([model.train_cost,
                                                           model.preactiv_out],
                                                          replace)
                    nw_gvs = TT.Lop(nw_preactiv_out, model.params,
                                  TT.Rop(TT.grad(nw_cost, nw_preactiv_out),
                                         model.params, cgv))

                    Gvs = [ogv + ngv
                           for (ogv, ngv) in zip(gv_args[1:], nw_gvs)]
                    return [gv_args[0] + const(1)] + Gvs
                states = [idx0] + ep
                n_steps = options['mbs'] // options['cbs']
                rvals, updates = scan(Gv_step,
                                      states=states,
                                      n_steps=n_steps,
                                      mode=gpu_mode,
                                      name='Gv_step',
                                      profile=options['profile'])

                final_Gvs = [TT.as_tensor_variable(x[0]) / const(n_steps) for x in rvals[1:]]
                grad_inps = zip(loc_inputs, shared_data)
                loc_fn = theano.function([],
                                         final_Gvs,
                                         updates = updates,
                                         givens = dict(grad_inps),
                                         on_unused_input='warn',
                                         mode=gpu_mode,
                                         name='loc_fn',
                                         profile = options['profile'])
                fake_op = FakeGPUShell(cgv, loc_fn, len(cgv))

                return fake_op(*args), {}



        rvals, updates = krylov_subspace(
            compute_Gv,
            self.gs,
            old_deltas,
            n_dimensions,
            model.params_shape,
            profile=options['profile'],
            device=options['device'])

        gdx = TT.iscalar('gdx')
        grad_inps = zip(loc_inputs,
                        [x[gdx*options['mbs']:(gdx+1)*options['mbs']] for x
                         in shared_data])
        updates.update(dict(zip(cfn_subspaces, rvals)))
        self.update_krylov_subspace = theano.function(
            [gdx],
            [],
            updates=updates,
            givens=dict(grad_inps),
            profile=options['profile'],
            on_unused_input='warn',
            name='update_krylov_subspace',
            mode=mode)

        alphas = tensor.vector('alphas')
        deltas = []
        nw_params = []
        if options['device'] == 'gpu':
            params = model.params
        else:
            params = model.cpu_params

        for param, subspace in zip(params, cfn_subspaces):
            alpha_reshuffle = [0] + ['x'] * param.ndim
            delta = (alphas.dimshuffle(*alpha_reshuffle) * \
                        subspace).sum(axis=0)
            nw_param = param + delta
            nw_params.append(nw_param)
            deltas.append(delta)

        print 'constructing evaluation function'
        ebdx = TT.iscalar('ebdx')

        updates_dict = dict(zip(model.params + old_deltas,
                                nw_params + deltas))
        if options['device'] != 'gpu':
            updates_dict.update(dict(zip(model.cpu_params, nw_params)))

        self.update_params = theano.function([alphas],
                                             updates = updates_dict,
                                             name='update_params',
                                             allow_input_downcast=True,
                                             mode=mode,
                                             profile=options['profile'])

        n_steps = options['ebs'] // options['cbs']
        def ls_cost_step(_idx, acc):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']] for x in loc_inputs]
            replace = dict(zip(model.inputs + model.params, nw_inps +
                               nw_params))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            return [_idx + const(1),
                    acc + nw_cost]

        states = [TT.constant(numpy.float32([0])),
                  TT.constant(numpy.float32([0]))]
        rvals, _ = scan(ls_cost_step,
                        states = states,
                        n_steps = n_steps,
                        name='ls_cost_step',
                        mode=gpu_mode,
                        profile = options['profile'])
        fcost = rvals[1][0] / const(n_steps)

        def ls_grad_step(_idx, gws):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [x[idx * options['cbs']: (idx + 1) * options['cbs']]
                       for x in loc_inputs]
            replace = dict(zip(model.inputs + model.params, nw_inps +
                               nw_params))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            nw_gs = TT.grad(nw_cost, alphas)
            return _idx + numpy.float32(1), gws + nw_gs

        states = [TT.constant(numpy.float32([0])),
                  TT.constant(numpy.zeros((1, n_dimensions),dtype='float32'))]
        rvals, _ = scan(ls_grad_step,
                        states = states,
                        n_steps = n_steps,
                        name = 'ls_grad_step',
                        mode = gpu_mode,
                        profile=options['profile'])

        fgrad = rvals[1][0] / const(n_steps)

        grad_inps = zip(loc_inputs,
                        [x[ebdx*options['ebs']:(ebdx+1)*options['ebs']] for x
                         in shared_data])
        self.lbfgs_fn = theano.function([alphas, ebdx],
                                   #theano.printing.Print('fcost')(fcost),
                                    fcost,
                                   givens=grad_inps,
                                   allow_input_downcast=True,
                                   on_unused_input='warn',
                                   name='lbfgs_fn',
                                   profile=options['profile'],
                                   mode=gpu_mode)
        self.lbfgs_grad = theano.function([alphas, ebdx],
                                     fgrad,
                                     givens=grad_inps,
                                     on_unused_input='warn',
                                     allow_input_downcast=True,
                                     name='lbfgs_grad',
                                     profile=options['profile'],
                                     mode=gpu_mode)

        n_steps = options['ebs'] // options['cbs']
        def ls_error(_idx, acc):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']] for x in loc_inputs]
            replace = dict(zip(model.inputs, nw_inps))
            nw_cost = TT.cast(safe_clone(
                model.err, replace=replace), 'float32')
            return [_idx + const(1), acc + nw_cost]

        states = [TT.constant(numpy.float32([0])),
                  TT.constant(numpy.float32([0]))]
        rvals, _ = scan(ls_error,
                        states = states,
                        n_steps = n_steps,
                        name='ls_err_step',
                        mode=cpu_mode,
                        profile = options['profile'])
        ferr = rvals[1][0] / const(n_steps)
        self.compute_error = theano.function([],
                           ferr,
                           givens=dict(zip(loc_inputs, shared_data)),
                           name='compute_err',
                           mode=gpu_mode,
                           on_unused_input='warn',
                           profile=options['profile'])
示例#23
0
import pynet.layer as layers

floatX = theano.config.floatX

parser = argparse.ArgumentParser(description='''Convert gpu pickle pynet model to cpu pickle pynet model''')
parser.add_argument('--gpu_model', metavar='Path', required=True, help='the path to the gpu model pickle file')
parser.add_argument('--cpu_model', metavar='Path', required=True, help='''path to save the cpu model pickle file''')
args = parser.parse_args()

print ('loading gpu autoencoder..')
fin = open(args.gpu_model)
gpu_model = cPickle.load(fin)

ae = AutoEncoder(input_dim=gpu_model.input_dim)
for layer in gpu_model.encode_layers:
    layerW = T._shared(np.array(layer.W.get_value(), floatX),
                        name=layer.W.name, borrow=False)
    layerb = T._shared(np.array(layer.b.get_value(), floatX),
                        name=layer.b.name, borrow=False)
    encode_layer = getattr(layers, layer.__class__.__name__)(dim=layer.dim, name=layer.name,
                                                            W=layerW, b=layerb)
    ae.add_encode_layer(encode_layer)
    print 'encode layer', encode_layer.name, encode_layer.dim
print 'encode layers', ae.encode_layers

for ae_layer, gpu_layer in zip(reversed(ae.encode_layers), gpu_model.decode_layers):
    gpu_decode_layer_b = T._shared(np.array(gpu_layer.b.get_value(), floatX),
                        name=gpu_layer.b.name, borrow=False)
    decode_layer = getattr(layers, gpu_layer.__class__.__name__)(name=gpu_layer.name, dim=gpu_layer.dim,
                                                                W=ae_layer.W.T, b=gpu_decode_layer_b)
    ae.add_decode_layer(decode_layer)
    print 'decode layer', decode_layer.name, decode_layer.dim
    def shared_dataset(data_xy):
        data_x, data_y = data_xy
        shared_x = T._shared(numpy.asarray(data_x, dtype=theano.config.floatX),borrow=True)
        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX),borrow=True)

        return shared_x, T.cast(shared_y, 'int32')
示例#25
0
def load_label(labels):

    shared_y = T._shared(np.asarray(labels, dtype=theano.config.floatX),
                         borrow=True)

    return T.cast(shared_y, 'int32')
示例#26
0
 def to_err_tensor(self):
     '''Return the positions of points in the dataset as a Theano tensor'''
     arr = self.to_err_array()
     return tt._shared(arr)
示例#27
0
                    required=True,
                    help='the path to the gpu model pickle file')
parser.add_argument('--cpu_model',
                    metavar='Path',
                    required=True,
                    help='''path to save the cpu model pickle file''')
args = parser.parse_args()

print('loading gpu autoencoder..')
fin = open(args.gpu_model)
gpu_model = cPickle.load(fin)

ae = AutoEncoder(input_dim=gpu_model.input_dim)
for layer in gpu_model.encode_layers:
    layerW = T._shared(np.array(layer.W.get_value(), floatX),
                       name=layer.W.name,
                       borrow=False)
    layerb = T._shared(np.array(layer.b.get_value(), floatX),
                       name=layer.b.name,
                       borrow=False)
    encode_layer = getattr(layers, layer.__class__.__name__)(dim=layer.dim,
                                                             name=layer.name,
                                                             W=layerW,
                                                             b=layerb)
    ae.add_encode_layer(encode_layer)
    print 'encode layer', encode_layer.name, encode_layer.dim
print 'encode layers', ae.encode_layers

for ae_layer, gpu_layer in zip(reversed(ae.encode_layers),
                               gpu_model.decode_layers):
    gpu_decode_layer_b = T._shared(np.array(gpu_layer.b.get_value(), floatX),
示例#28
0
import pynet.layer as layers

floatX = theano.config.floatX

parser = argparse.ArgumentParser(description='''Convert gpu pickle pynet model to cpu pickle pynet model''')
parser.add_argument('--gpu_model', metavar='Path', required=True, help='the path to the gpu model pickle file')
parser.add_argument('--cpu_model', metavar='Path', required=True, help='''path to save the cpu model pickle file''')
args = parser.parse_args()

print ('loading gpu mlp..')
fin = open(args.gpu_model)
gpu_model = cPickle.load(fin)

mlp = MLP(input_dim=gpu_model.input_dim)
for layer in gpu_model.layers:
    layerW = T._shared(np.array(layer.W.get_value(), floatX),
                        name=layer.W.name, borrow=False)
    layerb = T._shared(np.array(layer.b.get_value(), floatX),
                        name=layer.b.name, borrow=False)
    mlp_layer = getattr(layers, layer.__class__.__name__)(dim=layer.dim, name=layer.name,
                                                            W=layerW, b=layerb)
    mlp.add_layer(mlp_layer)
    print 'mlp layer', mlp_layer.name, mlp_layer.dim
print 'layers', mlp.layers

fout = open(args.cpu_model, 'wb')
cPickle.dump(mlp, fout)
print ('Done!')
fin.close()
fout.close()
示例#29
0
 def shared(self, x):
     return tensor._shared(x)
示例#30
0
 def __call__(self, shape, name=None):
     return T._shared(np.ones(shape) * self.c, name=name)
示例#31
0
    def __init__(self, options, channel, data, model):
        """
        Parameters:
            options: Dictionary
            `options` is expected to contain the following keys:
                `cbs` -> int
                    Number of samples to consider at a time when computing
                    some property of the model
                `gbs` -> int
                    Number of samples over which to compute the gradients
                `mbs` -> int
                    Number of samples over which to compute the metric
                `ebs` -> int
                    Number of samples over which to evaluate the training
                    error
                `mreg` -> float
                    Regularization added to the metric
                `mrtol` -> float
                    Relative tolerance for inverting the metric
                `miters` -> int
                    Number of iterations
                `seed` -> int
                    Random number generator seed
                `profile` -> bool
                    Flag, if profiling should be on or not
                `verbose` -> int
                    Verbosity level
                `lr` -> float
                    Learning rate
            channel: jobman channel or None
            data: dictionary-like object return by numpy.load containing the
                data
            model : model
        """
        n_params = len(model.params)
        self.data = data
        self.model = model
        if options['device'] == 'gpu':
            xdata = theano.shared(data['train_x'], name='xdata')
            print_mem('xdata')
            self.ydata = TT._shared(data['train_y'], name='ydata')
            self.xdata = xdata
            self.shared_data = [xdata, self.ydata]
            self.cpu_shared_data = []
        else:
            xdata = theano.shared(data['train_x'][:options['gbs']],
                                  name='xdata')
            print_mem('xdata')
            self.ydata = TT._shared(data['train_y'][:options['gbs']],
                                    name='ydata')
            self.xdata = xdata
            self.shared_data = [xdata, self.ydata]
            cxdata = TT._shared(data['train_x'], name='cpu_xdata',
                                borrow=True)
            self.cydata = TT._shared(data['train_y'], name='cpu_ydata',
                                     borrow=True)
            cydata = TT.cast(self.cydata, 'int32')
            self.cxdata = cxdata
            self.cpu_shared_data = [cxdata, cydata]

        self.options = options

        self.rng = numpy.random.RandomState(options['seed'])
        n_samples = data['train_x'].shape[0]
        self.n_samples = n_samples
        self.grad_batches = n_samples // options['gbs']
        self.metric_batches = n_samples // options['mbs']
        self.eval_batches = n_samples // options['ebs']

        self.verbose = options['verbose']
        # Store eucledian gradients
        cst = time.time()
        if options['device'] == 'gpu':
            self.gs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                 name ='g%d'%idx)
                       for idx, shp in enumerate(model.params_shape)]
            # Store riemannian gradients
            self.rs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                 name='r%d'%idx)
                       for idx, shp in enumerate(model.params_shape)]
            # Store jacobi diagonal
            self.js = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                 name='j%d'%idx)
                       for idx, shp in enumerate(model.params_shape)]
        else:
            self.gs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                 name ='g%d'%idx)
                       for idx, shp in enumerate(model.params_shape)]
            # Store riemannian gradients
            self.rs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                 name='r%d'%idx)
                       for idx, shp in enumerate(model.params_shape)]
            # Store jacobi diagonal
            self.js = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                 name='j%d'%idx)
                       for idx, shp in enumerate(model.params_shape)]


        self.permg = self.rng.permutation(self.grad_batches)
        self.permr = self.rng.permutation(self.metric_batches)
        self.perme = self.rng.permutation(self.eval_batches)
        self.k = 0
        self.posg = 0
        self.posr = 0
        self.pose = 0
        self.device = options['device']
        if self.device == 'gpu':
            self.init_gpu(options, channel, data, model)
        else:
            self.init_cpu(options, channel, data, model)
        self.old_norm=1
示例#32
0
    def _ _init__(self, options, channel, data):

        self.rng = numpy.random.RandomState(options['seed'])
        self.srng = RandomStreams(self.rng.randint(1e5))
        self.nin = data['train_x'].shape[2]
        self.in_shape = (options['cbs'], self.nin)
        self.options = options
        if isinstance(options['hid'], str):
            self.nhid = eval(options['nhid'])
        else:
            self.nhid = options['nhid']
        self.nout = data['train_y'].shape[2]
        def gen_mat(nin, nout, name, device='cpu', scale=.01):
            # NOTE : assumes tanh
            self.rng = numpy.random.RandomState(123)
            vals = self.rng.uniform(size=(nin, nout), low=-scale,
                               high=scale).astype('float32')
            if device=='gpu':
                var = theano.shared(vals, name=name)
                print_mem(name)
            else:
                var = TT._shared(vals, name=name)
            return var

        def gen_vec(n, name, device='cpu'):
            self.rng = numpy.random.RandomState(123)
            vals = self.rng.uniform(size=(n,), low=-.0005,
                                 high=.0005).astype('float32')
            if device=='gpu':
                var = theano.shared(vals, name=name)
                print_mem(name)
            else:
                var = TT._shared(vals, name=name)
            return var
        ##### PARAMS
        Wxx = gen_mat(self.nhid, self.nhid, name='Wxx', device='gpu')
        Wux = gen_mat(self.nin, self.nhid, name='Wux', device='gpu')
        Wxy = gen_mat(self.nhid, self.nout, name='Wxy', device='gpu')
        Wuy = gen_mat(self.nin, self.nout, name='Wuy', device='gpu')
        bx = gen_vec(self.nhid, name='bx', device='gpu')
        self.h0 = gen_mat(options['cbs'], self.nhid, name='h0',
                          device='gpu', scale=0)
        self.params = [Wxx, Wux, Wxy, Wuy, bx, self.h0]
        self.params_shape = [(self.nhid, self.nhid),
                             (self.nin, self.nhid),
                             (self.nhid, self.nout),
                             (self.nin, self.nout),
                             (self.nhid),
                             (options['cbs'], self.nhid) ]

        self.cparams =[]
        self.x = TT.tensor3('X')
        self.y = TT.tensor3('y')

        self.inputs = [self.x, self.y]

        def step(u_t, h_tm1, Wxx, Wux, Wxy, Wuy):
            h_t = TT.tanh(TT.dot(u_t, Wux) + TT.dot(h_tm1, Wxx))
            y_t = TT.dot(h_t, Wxy) + TT.dot(u_t, Wuy)
            return h_t, y_t
        _hid0 = TT.alloc(numpy.float32(0),
                        numpy.int32(options['seqlen']+1),
                        numpy.int32(options['cbs']),
                        numpy.int32(self.nhid))
        hid0 = TT.set_subtensor(hid0[0], self.h0)

        [H,Y], _ = scan(step,
                        self.x,
                        [hid0, None],
                        [Wxx, Wux, Wxy, Wuy],
                        n_sptes = options['seqlen'])
        # TODO : compute 3D cost ...

        if options['device'] == 'cpu/gpu':
            self.cpu_params = [
                TT._shared(x.get_value(), name=x.name) for x in self.params]
            self.err = safe_clone(self.err,
                                  updates=zip(self.params, self.cpu_params))
            self.valid_xdata = TT._shared(data['valid_x'],
                                          name='valid_xdata',
                                          borrow=True)
            self.test_xdata = TT._shared(data['test_x'],
                                         name='test_xdata',
                                         borrow=True)
            mode = cpu_mode
        else:
            self.valid_xdata = theano.shared(data['valid_x'],
                                          name='valid_xdata',
                                          borrow=True)
            self.test_xdata = theano.shared(data['test_x'],
                                         name='test_xdata',
                                         borrow=True)
            mode = gpu_mode
        self.valid_ydata = TT.cast(
            TT._shared(data['valid_y'], name='valid_ydata',
                       borrow=True), 'int32')
        self.test_ydata = TT.cast(
            TT._shared(data['test_y'], name='test_xdata',
                       borrow=True), 'int32')

        givens = {}
        givens[self.x] = self.valid_xdata
        givens[self.y] = self.valid_ydata

        self.valid_eval_func = theano.function([],
                                               ferr,
                                               givens=givens,
                                               name='valid_eval_fn',
                                               profile=options['profile'],
                                               mode=mode)

        givens[self.x] = self.test_xdata
        givens[self.y] = self.test_ydata
        self.test_eval_func = theano.function([],
                                    ferr,
                                    givens=givens,
                                    name='test_fn',
                                    profile=options['profile'],
                                    mode=mode)
示例#33
0
    def __init__(self, options, channel, data, model):
        """
        Parameters:
            options: Dictionary
            `options` is expected to contain the following keys:
                `cbs` -> int
                    Number of samples to consider at a time when computing
                    some property of the model
                `gbs` -> int
                    Number of samples over which to compute the gradients
                `mbs` -> int
                    Number of samples over which to compute the metric
                `ebs` -> int
                    Number of samples over which to evaluate the training
                    error
                `mreg` -> float
                    Regularization added to the metric
                `mrtol` -> float
                    Relative tolerance for inverting the metric
                `miters` -> int
                    Number of iterations
                `seed` -> int
                    Random number generator seed
                `profile` -> bool
                    Flag, if profiling should be on or not
                `verbose` -> int
                    Verbosity level
                `lr` -> float
                    Learning rate
            channel: jobman channel or None
            data: dictionary-like object return by numpy.load containing the
                data
            model : model
        """
        n_params = len(model.params)
        self.data = data

        if options['device'] != 'gpu':
            xdata = theano.shared(data['train_x'][:options['gbs']],
                                  name='xdata')
            ydata = TT._shared(data['train_y'][:options['gbs']], name='ydata')
            self.xdata = xdata
            self.ydata = ydata
            shared_data = [xdata, ydata]
        else:
            self.cpu_shared_data = []
            xdata = theano.shared(data['train_x'], name='xdata')
            ydata = TT._shared(data['train_y'], name='ydata')
            self.xdata = xdata
            self.ydata = ydata
            shared_data = [xdata, ydata]

        self.rng = numpy.random.RandomState(options['seed'])
        n_samples = data['train_x'].shape[0]
        self.grad_batches = n_samples // options['gbs']
        self.metric_batches = n_samples // options['mbs']
        self.eval_batches = n_samples // options['ebs']

        self.verbose = options['verbose']
        if options['device'] != 'gpu':
            # Store eucledian gradients
            self.gs = [
                TT._shared(numpy.zeros(shp, dtype=theano.config.floatX))
                for shp in model.params_shape
            ]
            # Store riemannian gradients
            self.rs = [
                TT._shared(numpy.zeros(shp, dtype=theano.config.floatX))
                for shp in model.params_shape
            ]
        else:
            # Store eucledian gradients
            self.gs = [
                theano.shared(numpy.zeros(shp, dtype=theano.config.floatX))
                for shp in model.params_shape
            ]
            # Store riemannian gradients
            self.rs = [
                theano.shared(numpy.zeros(shp, dtype=theano.config.floatX))
                for shp in model.params_shape
            ]

        self.permg = self.rng.permutation(self.grad_batches)
        self.permr = self.rng.permutation(self.metric_batches)
        self.perme = self.rng.permutation(self.eval_batches)
        self.k = 0
        self.posg = 0
        self.posr = 0
        self.pose = 0

        # Step 1. Compile function for computing eucledian gradients

        # inputs
        gbdx = TT.iscalar('grad_batch_idx')
        print 'Constructing grad function'
        srng = RandomStreams(numpy.random.randint(1e5))
        loc_inputs = [x.type() for x in model.inputs]

        def grad_step(*args):
            idx = TT.cast(args[0], 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']]
                       for x in loc_inputs]
            replace = dict(zip(model.inputs, nw_inps))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            gs = TT.grad(nw_cost, model.params)
            nw_gs = [op + np for op, np in zip(args[1:1 + n_params], gs)]
            return [args[0] + const(1)] + \
                    nw_gs

        ig = [
            TT.unbroadcast(TT.alloc(const(0), 1, *shp), 0)
            for shp in model.params_shape
        ]
        idx0 = TT.unbroadcast(const([0]), 0)
        n_steps = options['gbs'] // options['cbs']
        rvals, updates = scan(grad_step,
                              states=[idx0] + ig,
                              n_steps=n_steps,
                              name='grad_loop',
                              profile=options['profile'])

        nw_gs = [x[0] / const(n_steps) for x in rvals[1:1 + n_params]]

        # updates
        updates.update(dict(zip(self.gs, nw_gs)))
        # givens
        if options['device'] == 'gpu':
            grad_inps = [(x,
                          y[gbdx * options['gbs']:(gbdx + 1) * options['gbs']])
                         for x, y in zip(loc_inputs, shared_data)]
        else:
            grad_inps = zip(loc_inputs, shared_data)

        print 'Compiling grad function'
        self.compute_eucledian_gradients = theano.function(
            [gbdx], [],
            updates=updates,
            givens=dict(grad_inps),
            name='compute_eucledian_gradients',
            mode=gpu_mode,
            on_unused_input='warn',
            profile=options['profile'])

        # Step 2. Compile function for Computing Riemannian gradients
        rbdx = TT.iscalar('riemmanian_batch_idx')
        rbpos = rbdx * options['mbs']

        if options['device'] == 'gpu':
            mode = gpu_mode

            def compute_Gv(*args):
                idx0 = const([0])
                ep = [
                    TT.alloc(const(0), 1, *shp) for shp in model.params_shape
                ]

                def Gv_step(*gv_args):
                    idx = TT.cast(gv_args[0], 'int32')
                    nw_inps = [x[idx * options['cbs']: \
                                 (idx + 1) * options['cbs']] for x in
                               loc_inputs]
                    replace = dict(zip(model.inputs, nw_inps))
                    nw_outs = safe_clone(model.outs, replace)
                    final_results = dict(
                        zip(model.params, [None] * len(model.params)))
                    for nw_out, out_operator in zip(nw_outs,
                                                    model.outs_operator):
                        loc_params = [
                            x for x in model.params
                            if x in theano.gof.graph.inputs([nw_out])
                        ]
                        loc_args = [
                            x for x, y in zip(args, model.params)
                            if y in theano.gof.graph.inputs([nw_out])
                        ]
                        if out_operator == 'softmax':
                            factor = const(options['cbs']) * nw_out
                        elif out_operator == 'sigmoid':
                            factor = const(
                                options['cbs']) * nw_out * (1 - nw_out)
                        else:
                            factor = const(options['cbs'])

                        loc_Gvs = TT.Lop(nw_out, loc_params,
                                         TT.Rop(nw_out, loc_params, loc_args) /\
                                         factor)

                        for lp, lgv in zip(loc_params, loc_Gvs):
                            if final_results[lp] is None:
                                final_results[lp] = lgv
                            else:
                                final_results[lp] += lgv

                    Gvs = [
                        ogv + final_results[param]
                        for (ogv, param) in zip(gv_args[1:], model.params)
                    ]
                    return [gv_args[0] + const(1)] + Gvs

                    nw_cost, nw_preactiv_out = safe_clone(
                        [model.train_cost, model.preactiv_out], replace)
                    nw_gvs = TT.Lop(
                        nw_preactiv_out, model.params,
                        TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params,
                               args))

                    Gvs = [
                        ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs)
                    ]
                    return [gv_args[0] + const(1)] + Gvs

                states = [idx0] + ep
                n_steps = options['mbs'] // options['cbs']
                rvals, updates = scan(Gv_step,
                                      states=states,
                                      n_steps=n_steps,
                                      mode=theano.Mode(linker='cvm'),
                                      name='Gv_step',
                                      profile=options['profile'])

                final_Gvs = [x[0] / const(n_steps) for x in rvals[1:]]
                return final_Gvs, updates
        else:
            mode = cpu_mode

            def compute_Gv(*args):
                cgv = [
                    theano.shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                  name='cgv%d' % idx)
                    for idx, shp in enumerate(model.params_shape)
                ]
                print_mem('allocated mem for cgv')
                idx0 = const([0])
                ep = [
                    TT.alloc(const(0), 1, *shp) for shp in model.params_shape
                ]

                def Gv_step(*gv_args):
                    idx = TT.cast(gv_args[0], 'int32')
                    nw_inps = [x[idx * options['cbs']: \
                                 (idx + 1) * options['cbs']] for x in
                               loc_inputs]
                    replace = dict(zip(model.inputs, nw_inps))
                    nw_outs = safe_clone(model.outs, replace)
                    final_results = dict(
                        zip(model.params, [None] * len(model.params)))
                    for nw_out, out_operator in zip(nw_outs,
                                                    model.outs_operator):
                        loc_params = [
                            x for x in model.params
                            if x in theano.gof.graph.inputs([nw_out])
                        ]
                        loc_args = [
                            x for x, y in zip(cgv, model.params)
                            if y in theano.gof.graph.inputs([nw_out])
                        ]
                        if out_operator == 'softmax':
                            factor = const(options['cbs']) * nw_out
                        elif out_operator == 'sigmoid':
                            factor = const(
                                options['cbs']) * nw_out * (1 - nw_out)
                        else:
                            factor = const(options['cbs'])

                        loc_Gvs = TT.Lop(nw_out, loc_params,
                                         TT.Rop(nw_out, loc_params, loc_args) /\
                                         factor)

                        for lp, lgv in zip(loc_params, loc_Gvs):
                            if final_results[lp] is None:
                                final_results[lp] = lgv
                            else:
                                final_results[lp] += lgv

                    Gvs = [
                        ogv + final_results[param]
                        for (ogv, param) in zip(gv_args[1:], model.params)
                    ]
                    return [gv_args[0] + const(1)] + Gvs

                states = [idx0] + ep
                n_steps = options['mbs'] // options['cbs']
                rvals, updates = scan(Gv_step,
                                      states=states,
                                      n_steps=n_steps,
                                      mode=gpu_mode,
                                      name='Gv_step',
                                      profile=options['profile'])
                final_Gvs = [
                    TT.as_tensor_variable(x[0]) / const(n_steps)
                    for x in rvals[1:]
                ]
                grad_inps = zip(loc_inputs, shared_data)
                loc_fn = theano.function([],
                                         final_Gvs,
                                         updates=updates,
                                         givens=dict(grad_inps),
                                         on_unused_input='warn',
                                         mode=gpu_mode,
                                         name='loc_fn',
                                         profile=options['profile'])
                fake_op = FakeGPUShell(cgv, loc_fn, len(cgv))

                return fake_op(*args), {}

        print 'Constructing riemannian gradient function'
        norm_grads = TT.sqrt(sum(TT.sum(x**2) for x in self.gs))
        rvals = minres.minres(compute_Gv, [x / norm_grads for x in self.gs],
                              rtol=options['mrtol'],
                              shift=-options['mreg'],
                              maxit=options['miters'],
                              mode=mode,
                              profile=options['profile'])
        nw_rs = [x * norm_grads for x in rvals[0]]
        flag = rvals[1]
        niters = rvals[2]
        rel_residual = rvals[3]
        rel_Aresidual = rvals[4]
        Anorm = rvals[5]
        Acond = rvals[6]
        xnorm = rvals[7]
        Axnorm = rvals[8]
        updates = rvals[9]

        norm_ord0 = TT.max(abs(nw_rs[0]))
        for r in nw_rs[1:]:
            norm_ord0 = TT.maximum(norm_ord0, TT.max(abs(r)))

        updates.update(dict(zip(self.rs, nw_rs)))
        grad_inps = [(x, y[rbdx * options['mbs']:(rbdx + 1) * options['mbs']])
                     for x, y in zip(loc_inputs[:1], shared_data[:1])]
        print 'Compiling riemannian gradient function'
        self.compute_riemannian_gradients = theano.function(
            [rbdx], [
                flag, niters, rel_residual, rel_Aresidual, Anorm, Acond, xnorm,
                Axnorm, norm_grads, norm_ord0
            ],
            updates=updates,
            givens=dict(grad_inps),
            name='compute_riemannian_gradients',
            on_unused_input='warn',
            mode=mode,
            profile=options['profile'])

        # Step 3. Compile function for evaluating cost and updating
        # parameters
        print 'constructing evaluation function'
        lr = TT.scalar('lr')
        self.lr = numpy.float32(options['lr'])
        ebdx = TT.iscalar('eval_batch_idx')
        nw_ps = [p - lr * r for p, r in zip(model.params, self.rs)]

        def cost_step(_idx, acc):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']] for x in loc_inputs]
            replace = dict(zip(model.inputs + model.params, nw_inps + nw_ps))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            return [_idx + const(1), acc + nw_cost]

        acc0 = const([0])
        idx0 = const([0])
        n_steps = options['ebs'] // options['cbs']
        rvals, updates = scan(cost_step,
                              states=[idx0, acc0],
                              n_steps=n_steps,
                              name='cost_loop',
                              mode=gpu_mode,
                              profile=options['profile'])

        final_cost = rvals[1] / const(n_steps)
        if options['device'] == 'gpu':
            grad_inps = [(x,
                          y[ebdx * options['ebs']:(ebdx + 1) * options['ebs']])
                         for x, y in zip(loc_inputs, shared_data)]
        else:
            grad_inps = zip(loc_inputs, shared_data)

        print 'compling evaluation function'
        self.eval_fn = theano.function([ebdx, lr],
                                       final_cost,
                                       givens=dict(grad_inps),
                                       on_unused_input='warn',
                                       updates=updates,
                                       name='eval_fn',
                                       mode=gpu_mode,
                                       profile=options['profile'])

        update_dict = dict(zip(model.params, nw_ps))
        if options['device'] != 'gpu':
            update_dict.update(dict(zip(model.cparams, nw_ps)))
        self.update_params = theano.function([lr], [],
                                             updates=update_dict,
                                             name='update_params',
                                             on_unused_input='warn',
                                             mode=mode,
                                             profile=options['profile'])
        self.options = options
        self.old_cost = 1e6
        self.device = options['device']
        n_steps = options['ebs'] // options['cbs']

        def ls_error(_idx, acc):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']] for x in loc_inputs]
            replace = dict(zip(model.inputs, nw_inps))
            nw_cost = TT.cast(safe_clone(model.err, replace=replace),
                              'float32')
            return [_idx + const(1), acc + nw_cost]

        states = [
            TT.constant(numpy.float32([0])),
            TT.constant(numpy.float32([0]))
        ]
        rvals, _ = scan(ls_error,
                        states=states,
                        n_steps=n_steps,
                        name='ls_err_step',
                        mode=cpu_mode,
                        profile=options['profile'])
        ferr = rvals[1][0] / const(n_steps)
        self.compute_error = theano.function([ebdx],
                                             ferr,
                                             givens=dict(grad_inps),
                                             name='compute_err',
                                             mode=gpu_mode,
                                             on_unused_input='warn',
                                             profile=options['profile'])
示例#34
0
def _shared(val, borrow=True):
    return T._shared(array(val, dtype=floatX), borrow=borrow)
示例#35
0
def main(ancestralfile, bamfile, treefile, maxgenotype=3):
    bamFiles = bamfile.split(',')
    maxGenotype = maxgenotype

    knownSites, knownNodes, knownMatrix = readAncestral(ancestralfile)
    branches = readTree(treefile, knownNodes)
    knownSamples = parseBAMs(bamFiles, knownSites)

    exists = (knownSites.T[1] >= 0.001) & np.any(knownSamples > 0, 1)
    knownSites = knownSites[exists]
    knownMatrix = knownMatrix[:, exists]
    knownSamples = knownSamples[exists]
    for br in branches :
        diff_sites = knownMatrix[br[0].astype(int)] != knownMatrix[br[1].astype(int)]
        snv = knownMatrix[br[1].astype(int), diff_sites]
        presence = knownSamples[diff_sites, snv]
        if np.sum(presence > 0)*100 <= presence.shape[0] :
            knownMatrix[br[1].astype(int)] = 100
            branches[branches.T[0] == br[1], 0] = br[0]
            br[:2] = -1
    branches = branches[branches.T[0] > -1]
    exists = [len(set(np.unique(mat)) - {100}) > 1 for mat in knownMatrix.T]
    knownSites = knownSites[exists]
    knownMatrix = knownMatrix[:, exists].astype(np.int8)
    knownSamples = knownSamples[exists]
    weights = knownSites.T[1].astype(float)

    weights *= np.sum(knownSamples, 1)/4
    knownSamples /= np.sum(knownSamples, 1)[:, np.newaxis]

    branches2 = t._shared(branches)
    weights2 = t._shared(weights)
    knownMatrix2 = t._shared(knownMatrix)
    knownSamples2 = t._shared(knownSamples)

    for nGenotype in np.arange(2, maxGenotype + 1):
        sys.stderr.write(
            '\n----------\nRunning MCMC with assumption of {0} genotype(s) present in the sample.\n'.format(nGenotype))
        ng = np.max([1, nGenotype])
        genotypes = np.zeros([ng, knownMatrix.shape[1]], dtype = np.int8)
        genotypes2 = t._shared(genotypes)

        with pm.Model() as model:
            brs = pm.Flat('brs', shape=nGenotype if ng > 1 else ())
            props2 = pm.Dirichlet('props2', a=1./np.ones(ng, dtype=float)) \
                if ng > 1 else pm.DiscreteUniform('props2', upper=1, lower=1)

            props = pm.Deterministic('props', props2*(1-0.05*ng) + 0.05)
            sigma = pm.Gamma('sigma', alpha=0.5, beta=2)

            lk = pm.Deterministic('lk', getGenotypesAndLK(genotypes2, knownMatrix2, branches2, weights2, knownSamples2,\
                                                          sigma, brs, props))

            pm.Potential('likelihood', lk)

            step_br = TreeWalker(brs, branches)
            step_others = pm.step_methods.Metropolis(vars=[sigma, props])
            trace = pm.sample(progressbar=True, draws=5000, tune=15000, step=[step_br, step_others], chains=8, cores=8,
                              compute_convergence_checks=False)

        trace_logp = np.array([ np.mean([ t['likelihood'] for t in strace ], 0) for strace in trace._straces.values() ])
        sys.stderr.write('Done.\n----------\n'.format(nGenotype))
        # select traces
        trace_id = np.argmax(trace_logp.T[0])
        logp = trace_logp[trace_id]

        sys.stdout.write(
            '----------\nNo. Genotypes:\t{0}\tlogp:\t{1}\thybrid_score:\t{2}\n'.format(nGenotype, logp[0], logp[1]))
        sigma = trace.get_values('sigma', chains=trace_id)
        sigma = np.sort(sigma)
        sys.stdout.write('Sigma\tMean:\t{0:.6E}\tCI95%:\t[ {1:.6E} - {2:.6E} ]\n'.format(np.mean(sigma),
                                                                                         sigma[int(sigma.size * 0.025)],
                                                                                         sigma[
                                                                                             int(sigma.size * 0.975)]))

        if nGenotype < 2:
            br_locs = trace.get_values('brs', chains=trace_id)[:, np.newaxis]
            props = trace.get_values('props', chains=trace_id)[:, np.newaxis]
        else:
            br_locs = trace.get_values('brs', chains=trace_id)
            props = trace.get_values('props', chains=trace_id)

        props /= np.sum(props, 1)[:, np.newaxis]
        for id, (br_loc, prop) in enumerate(zip(br_locs.T, props.T)):
            prop = np.sort(prop)
            if nGenotype > 0:
                brs, locs = br_loc.astype(int), br_loc % 1
                brNames, brCounts = np.unique(brs, return_counts=True)
                brCounts = brCounts.astype(float) / np.sum(brCounts)
                idx = np.argsort(-brCounts)
                brNames, brCounts = brNames[idx], brCounts[idx]
                sys.stdout.write(
                    '\tGenotype {0}:\tMean proportion:\t{1:.4f}\tCI95%:\t[ {2:.4f} - {3:.4f} ]\n'.format(id + 1,
                                                                                                         np.mean(prop), \
                                                                                                         prop[int(
                                                                                                             prop.size * 0.025)],
                                                                                                         prop[int(
                                                                                                             prop.size * 0.975)]))
                for br, cnt in zip(brNames, brCounts):
                    if cnt >= 0.01 or cnt >= 0.3 * brCounts[0]:
                        lc = np.sort(locs[brs == br])
                        sys.stdout.write(
                            '\t\t{0:.2f} %\t{1} - {2}\tLocation:\t{3:.4f}\tCI95%:\t[ {4:.4f} - {5:.4f} ]\n'.format(
                                cnt * 100, \
                                knownNodes[int(branches[br, 0])], \
                                knownNodes[int(branches[br, 1])], \
                                np.mean(lc), lc[int(lc.size * 0.025)], \
                                lc[int(lc.size * 0.975)]))
    sys.stderr.write('All DONE\n')
示例#36
0
 def __call__(self, shape, name=None):
     return T._shared(np_rng.normal(loc=self.loc,
                                    scale=self.scale,
                                    size=shape),
                      name=name)
示例#37
0
 def shared(self, x):
     return tensor._shared(x)
示例#38
0
文件: natSGD.py 项目: cc13ny/galatea
    def __init__(self, options, channel, data, model):
        """
        Parameters:
            options: Dictionary
            `options` is expected to contain the following keys:
                `cbs` -> int
                    Number of samples to consider at a time when computing
                    some property of the model
                `gbs` -> int
                    Number of samples over which to compute the gradients
                `mbs` -> int
                    Number of samples over which to compute the metric
                `ebs` -> int
                    Number of samples over which to evaluate the training
                    error
                `mreg` -> float
                    Regularization added to the metric
                `mrtol` -> float
                    Relative tolerance for inverting the metric
                `miters` -> int
                    Number of iterations
                `seed` -> int
                    Random number generator seed
                `profile` -> bool
                    Flag, if profiling should be on or not
                `verbose` -> int
                    Verbosity level
                `lr` -> float
                    Learning rate
            channel: jobman channel or None
            data: dictionary-like object return by numpy.load containing the
                data
            model : model
        """
        n_params = len(model.params)
        self.data = data

        if options['device'] != 'gpu':
            xdata = theano.shared(data['train_x'][:options['gbs']],
                                  name='xdata')
            ydata = TT._shared(data['train_y'][:options['gbs']],
                               name='ydata')
            self.xdata = xdata
            self.ydata = ydata
            shared_data = [xdata, ydata]
        else:
            self.cpu_shared_data = []
            xdata = theano.shared(data['train_x'], name='xdata')
            ydata = TT._shared(data['train_y'], name='ydata')
            self.xdata = xdata
            self.ydata = ydata
            shared_data = [xdata, ydata]

        self.rng = numpy.random.RandomState(options['seed'])
        n_samples = data['train_x'].shape[0]
        self.grad_batches = n_samples // options['gbs']
        self.metric_batches = n_samples // options['mbs']
        self.eval_batches = n_samples // options['ebs']

        self.verbose = options['verbose']
        if options['device'] != 'gpu':
            # Store eucledian gradients
            self.gs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX))
                       for shp in model.params_shape]
            # Store riemannian gradients
            self.rs = [TT._shared(numpy.zeros(shp, dtype=theano.config.floatX))
                       for shp in model.params_shape]
        else:
            # Store eucledian gradients
            self.gs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX))
                       for shp in model.params_shape]
            # Store riemannian gradients
            self.rs = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX))
                       for shp in model.params_shape]

        self.permg = self.rng.permutation(self.grad_batches)
        self.permr = self.rng.permutation(self.metric_batches)
        self.perme = self.rng.permutation(self.eval_batches)
        self.k = 0
        self.posg = 0
        self.posr = 0
        self.pose = 0

        # Step 1. Compile function for computing eucledian gradients

        # inputs
        gbdx = TT.iscalar('grad_batch_idx')
        print 'Constructing grad function'
        srng = RandomStreams(numpy.random.randint(1e5))
        loc_inputs = [x.type() for x in model.inputs]
        def grad_step(*args):
            idx = TT.cast(args[0], 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']]
                       for x in loc_inputs]
            replace = dict(zip(model.inputs, nw_inps))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            gs = TT.grad(nw_cost, model.params)
            nw_gs = [op + np for op, np in zip(args[1: 1 + n_params], gs)]
            return [args[0] + const(1)] + \
                    nw_gs

        ig = [TT.unbroadcast(TT.alloc(const(0), 1, *shp),0)
              for shp in model.params_shape]
        idx0 = TT.unbroadcast(const([0]),0)
        n_steps = options['gbs'] // options['cbs']
        rvals, updates = scan(grad_step,
                              states=[idx0] + ig,
                              n_steps=n_steps,
                              name='grad_loop',
                              profile=options['profile'])

        nw_gs = [x[0] / const(n_steps) for x in rvals[1: 1 + n_params]]

        # updates
        updates.update(dict(zip(self.gs, nw_gs)))
        # givens
        if options['device'] == 'gpu':
            grad_inps = [(x, y[gbdx*options['gbs']:(gbdx+1)*options['gbs']])
                     for x,y in zip(loc_inputs, shared_data)]
        else:
            grad_inps = zip(loc_inputs, shared_data)

        print 'Compiling grad function'
        self.compute_eucledian_gradients = theano.function(
            [gbdx],
            [],
            updates=updates,
            givens=dict(grad_inps),
            name='compute_eucledian_gradients',
            mode=gpu_mode,
            on_unused_input='warn',
            profile=options['profile'])

        # Step 2. Compile function for Computing Riemannian gradients
        rbdx = TT.iscalar('riemmanian_batch_idx')
        rbpos = rbdx * options['mbs']

        if options['device'] == 'gpu':
            mode=gpu_mode
            def compute_Gv(*args):
                idx0 = const([0])
                ep = [TT.alloc(const(0), 1, *shp)
                      for shp in model.params_shape]

                def Gv_step(*gv_args):
                    idx = TT.cast(gv_args[0], 'int32')
                    nw_inps = [x[idx * options['cbs']: \
                                 (idx + 1) * options['cbs']] for x in
                               loc_inputs]
                    replace = dict(zip(model.inputs, nw_inps))
                    nw_outs = safe_clone(model.outs, replace)
                    final_results = dict(zip(model.params, [None] * len(model.params)))
                    for nw_out, out_operator in zip(nw_outs, model.outs_operator):
                        loc_params = [x for x in model.params
                                      if x in theano.gof.graph.inputs([nw_out])]
                        loc_args = [x for x, y in zip(args, model.params)
                                    if y in theano.gof.graph.inputs([nw_out])]
                        if out_operator == 'softmax':
                            factor = const(options['cbs']) * nw_out
                        elif out_operator == 'sigmoid':
                            factor = const(options['cbs']) * nw_out * (1 - nw_out)
                        else:
                            factor = const(options['cbs'])

                        loc_Gvs = TT.Lop(nw_out, loc_params,
                                         TT.Rop(nw_out, loc_params, loc_args) /\
                                         factor)

                        for lp, lgv in zip(loc_params, loc_Gvs):
                            if final_results[lp] is None:
                                final_results[lp] = lgv
                            else:
                                final_results[lp] += lgv

                    Gvs = [ogv + final_results[param]
                           for (ogv, param) in zip(gv_args[1:], model.params)]
                    return [gv_args[0] + const(1)] + Gvs

                    nw_cost, nw_preactiv_out = safe_clone([model.train_cost,
                                                           model.preactiv_out],
                                                          replace)
                    nw_gvs = TT.Lop(nw_preactiv_out, model.params,
                                  TT.Rop(TT.grad(nw_cost, nw_preactiv_out),
                                         model.params, args))

                    Gvs = [ogv + ngv
                           for (ogv, ngv) in zip(gv_args[1:], nw_gvs)]
                    return [gv_args[0] + const(1)] + Gvs
                states = [idx0] + ep
                n_steps = options['mbs'] // options['cbs']
                rvals, updates = scan(Gv_step,
                                      states=states,
                                      n_steps=n_steps,
                                      mode=theano.Mode(linker='cvm'),
                                      name='Gv_step',
                                      profile=options['profile'])

                final_Gvs = [x[0] / const(n_steps) for x in rvals[1:]]
                return final_Gvs, updates
        else:
            mode = cpu_mode
            def compute_Gv(*args):
                cgv = [theano.shared(numpy.zeros(shp, dtype=theano.config.floatX),
                                     name ='cgv%d'%idx)
                           for idx, shp in enumerate(model.params_shape)]
                print_mem('allocated mem for cgv')
                idx0 = const([0])
                ep = [TT.alloc(const(0), 1, *shp)
                      for shp in model.params_shape]

                def Gv_step(*gv_args):
                    idx = TT.cast(gv_args[0], 'int32')
                    nw_inps = [x[idx * options['cbs']: \
                                 (idx + 1) * options['cbs']] for x in
                               loc_inputs]
                    replace = dict(zip(model.inputs, nw_inps))
                    nw_outs = safe_clone(model.outs, replace)
                    final_results = dict(zip(model.params, [None] * len(model.params)))
                    for nw_out, out_operator in zip(nw_outs, model.outs_operator):
                        loc_params = [x for x in model.params
                                      if x in theano.gof.graph.inputs([nw_out])]
                        loc_args = [x for x, y in zip(cgv, model.params)
                                    if y in theano.gof.graph.inputs([nw_out])]
                        if out_operator == 'softmax':
                            factor = const(options['cbs']) * nw_out
                        elif out_operator == 'sigmoid':
                            factor = const(options['cbs']) * nw_out * (1 - nw_out)
                        else:
                            factor = const(options['cbs'])

                        loc_Gvs = TT.Lop(nw_out, loc_params,
                                         TT.Rop(nw_out, loc_params, loc_args) /\
                                         factor)

                        for lp, lgv in zip(loc_params, loc_Gvs):
                            if final_results[lp] is None:
                                final_results[lp] = lgv
                            else:
                                final_results[lp] += lgv

                    Gvs = [ogv + final_results[param]
                           for (ogv, param) in zip(gv_args[1:], model.params)]
                    return [gv_args[0] + const(1)] + Gvs
                states = [idx0] + ep
                n_steps = options['mbs'] // options['cbs']
                rvals, updates = scan(Gv_step,
                                      states=states,
                                      n_steps=n_steps,
                                      mode=gpu_mode,
                                      name='Gv_step',
                                      profile=options['profile'])
                final_Gvs = [TT.as_tensor_variable(x[0]) / const(n_steps) for x in rvals[1:]]
                grad_inps = zip(loc_inputs, shared_data)
                loc_fn = theano.function([],
                                         final_Gvs,
                                         updates = updates,
                                         givens = dict(grad_inps),
                                         on_unused_input='warn',
                                         mode=gpu_mode,
                                         name='loc_fn',
                                         profile = options['profile'])
                fake_op = FakeGPUShell(cgv, loc_fn, len(cgv))

                return fake_op(*args), {}



        print 'Constructing riemannian gradient function'
        norm_grads = TT.sqrt(sum(TT.sum(x ** 2) for x in self.gs))
        rvals = minres.minres(
            compute_Gv,
            [x / norm_grads for x in self.gs],
            rtol=options['mrtol'],
            shift= -options['mreg'],
            maxit=options['miters'],
            mode=mode,
            profile=options['profile'])
        nw_rs = [x * norm_grads for x in rvals[0]]
        flag = rvals[1]
        niters = rvals[2]
        rel_residual = rvals[3]
        rel_Aresidual = rvals[4]
        Anorm = rvals[5]
        Acond = rvals[6]
        xnorm = rvals[7]
        Axnorm = rvals[8]
        updates = rvals[9]

        norm_ord0 = TT.max(abs(nw_rs[0]))
        for r in nw_rs[1:]:
            norm_ord0 = TT.maximum(norm_ord0,
                                   TT.max(abs(r)))


        updates.update(dict(zip(self.rs, nw_rs)))
        grad_inps = [(x, y[rbdx * options['mbs']:
                           (rbdx + 1) * options['mbs']])
                     for x,y in zip(loc_inputs[:1], shared_data[:1])]
        print 'Compiling riemannian gradient function'
        self.compute_riemannian_gradients = theano.function(
            [rbdx],
            [flag,
             niters,
             rel_residual,
             rel_Aresidual,
             Anorm,
             Acond,
             xnorm,
             Axnorm,
             norm_grads,
             norm_ord0],
            updates=updates,
            givens=dict(grad_inps),
            name='compute_riemannian_gradients',
            on_unused_input='warn',
            mode=mode,
            profile=options['profile'])

        # Step 3. Compile function for evaluating cost and updating
        # parameters
        print 'constructing evaluation function'
        lr = TT.scalar('lr')
        self.lr = numpy.float32(options['lr'])
        ebdx = TT.iscalar('eval_batch_idx')
        nw_ps = [p - lr * r for p, r in zip(model.params, self.rs)]

        def cost_step(_idx, acc):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']] for x in loc_inputs]
            replace = dict(zip(model.inputs + model.params, nw_inps + nw_ps))
            nw_cost = safe_clone(model.train_cost, replace=replace)
            return [_idx + const(1),
                    acc + nw_cost]

        acc0 = const([0])
        idx0 = const([0])
        n_steps = options['ebs'] // options['cbs']
        rvals, updates = scan(cost_step,
                              states=[idx0, acc0],
                              n_steps=n_steps,
                              name='cost_loop',
                              mode=gpu_mode,
                              profile=options['profile'])

        final_cost = rvals[1] / const(n_steps)
        if options['device'] == 'gpu':
            grad_inps = [(x, y[ebdx * options['ebs']:
                           (ebdx + 1) * options['ebs']])
                     for x,y in zip(loc_inputs, shared_data)]
        else:
            grad_inps = zip(loc_inputs, shared_data)

        print 'compling evaluation function'
        self.eval_fn = theano.function(
            [ebdx, lr],
            final_cost,
            givens=dict(grad_inps),
            on_unused_input='warn',
            updates = updates,
            name='eval_fn',
            mode=gpu_mode,
            profile=options['profile'])

        update_dict = dict(zip(model.params, nw_ps))
        if options['device'] != 'gpu':
            update_dict.update(dict(zip(model.cparams, nw_ps)))
        self.update_params = theano.function(
            [lr],
            [],
            updates=update_dict,
            name='update_params',
            on_unused_input='warn',
            mode=mode,
            profile=options['profile'])
        self.options = options
        self.old_cost = 1e6
        self.device = options['device']
        n_steps = options['ebs'] // options['cbs']
        def ls_error(_idx, acc):
            idx = TT.cast(_idx, 'int32')
            nw_inps = [x[idx * options['cbs']: \
                         (idx + 1) * options['cbs']] for x in loc_inputs]
            replace = dict(zip(model.inputs, nw_inps))
            nw_cost = TT.cast(safe_clone(
                model.err, replace=replace), 'float32')
            return [_idx + const(1), acc + nw_cost]

        states = [TT.constant(numpy.float32([0])),
                  TT.constant(numpy.float32([0]))]
        rvals, _ = scan(ls_error,
                        states = states,
                        n_steps = n_steps,
                        name='ls_err_step',
                        mode=cpu_mode,
                        profile = options['profile'])
        ferr = rvals[1][0] / const(n_steps)
        self.compute_error = theano.function([ebdx],
                           ferr,
                           givens=dict(grad_inps),
                           name='compute_err',
                           mode=gpu_mode,
                           on_unused_input='warn',
                           profile=options['profile'])
示例#39
0
def load_data(data):

    return T._shared(np.asarray(data, dtype=theano.config.floatX), borrow=True)
示例#40
0
                    help="Pickled network to steal params from.")
parser.add_argument("dest", type=str, help="File to place new network in.")
parser.add_argument("--cpu",
                    "-c",
                    dest="cpu",
                    action='store_const',
                    const=True,
                    default=False,
                    help="Convert network to run on a CPU.")
args = parser.parse_args()

print "loading model..."
f = file(args.source, 'rb')
old_network = cPickle.load(f)
f.close()

params = old_network.params
if args.cpu:
    print "converting gpu parameters..."
    new_params = []
    for param in params:
        param = T._shared(param.get_value())
        new_params.append(param)
    params = new_params

new_network = network(batch_size=None, params=params)

print "saving model..."
f = file(args.dest, 'wb')
cPickle.dump(new_network, f, protocol=cPickle.HIGHEST_PROTOCOL)
f.close()
示例#41
0
		query_model = f(query_model)
		result = np.argsort(-np.dot(query_model, doc_model.T), axis = 1)
		query_docs_ranking = {}
		''' speedup '''
		for q_idx in range(len(query_list)):
			docs_ranking = []
			for doc_idx in result[q_idx]:
				docs_ranking.append(doc_list[doc_idx])
			query_docs_ranking[query_list[q_idx]] = docs_ranking
		
		''' query 
		for query_key, query_vec in  zip(query_list, query_model):
			print len(query_docs_ranking.keys())
			query_result = np.argsort(-(query_vec * doc_model).sum(axis = 1))
			docs_ranking = []
			for doc_`idx in query_result:
				docs_ranking.append(doc_list[doc_idx])
				query_docs_ranking[query_key] = docs_ranking
			
		mAP = eval.mean_average_precision(query_docs_ranking)	
		print mAP, qry_lambda, rel_qry_lambda
		'''
		mAP = qry_eval.mean_average_precision(query_docs_ranking)	
		mAP_list.append(mAP)
	return max(mAP_list)

if __name__ == "__main__":
	with open("relevance_model_RM.pkl", "rb") as file : rel_query_model = Pickle.load(file)[:720]
	theano_rel_query_model = _shared(rel_query_model)
	calculate(theano_rel_query_model, 720)
示例#42
0
def n_star_inference(n_stars,
                     iteration,
                     elem_err=False,
                     n_init=20000,
                     n_samples=1000,
                     max_stars=100):
    ## Define which stars to use
    these_stars = np.arange(max_stars)[iteration * n_stars:(iteration + 1) *
                                       n_stars]

    ## Load in mock dataset
    mock_data = np.load(mock_data_file)  #dataset
    mu_times = mock_data.f.obs_time[these_stars]  #time of birth
    sigma_times = mock_data.f.obs_time_err[these_stars]  #error on age
    all_els = mock_data.f.elements

    full_abundances = mock_data.f.abundances[
        these_stars]  # chemical element abundances for data
    full_errors = mock_data.f.abundance_errs[
        these_stars]  # error on abundances

    # Filter out correct elements:
    els = ['C', 'Fe', 'He', 'Mg', 'N', 'Ne', 'O', 'Si']  # TNG elements
    n_els = len(els)
    el_indices = np.zeros(len(els), dtype=int)
    for e, el in enumerate(els):
        for j in range(len(all_els)):
            if els[e] == str(all_els[j]):
                el_indices[e] = j
                break
            if j == len(all_els) - 1:
                print("Failed to find element %s" % el)
    obs_abundances = full_abundances[:, el_indices]
    obs_errors = full_errors[:, el_indices]

    # Now standardize dataset
    norm_data = (obs_abundances - output_mean) / output_std
    norm_sd = obs_errors / output_std

    data_obs = norm_data.ravel()
    data_sd = np.asarray(norm_sd).ravel()

    std_times_mean = (mu_times - input_mean[-1]) / input_std[-1]
    std_times_width = sigma_times / input_std[-1]

    # Define stacked local priors
    Local_prior_mean = np.vstack([
        np.hstack([std_Theta_prior_mean, std_times_mean[i]])
        for i in range(n_stars)
    ])
    Local_prior_sigma = np.vstack([
        np.hstack([std_Theta_prior_width, std_times_width[i]])
        for i in range(n_stars)
    ])

    # Bound variables to ensure they don't exit the training parameter space
    lowBound = tt._shared(np.asarray([-5, std_log_SFR_crit, -5, std_min_time]))
    upBound = tt._shared(np.asarray([5, 5, 5, std_max_time]))

    # Create stacked mean and variances
    loc_mean = np.hstack([
        np.asarray(std_Theta_prior_mean).reshape(1, -1) *
        np.ones([n_stars, 1]),
        std_times_mean.reshape(-1, 1)
    ])
    loc_std = np.hstack([
        np.asarray(std_Theta_prior_width).reshape(1, -1) *
        np.ones([n_stars, 1]),
        std_times_width.reshape(-1, 1)
    ])

    # Share theano variables
    w0 = tt._shared(w_array_0)
    b0 = tt._shared(b_array_0)
    w1 = tt._shared(w_array_1)
    b1 = tt._shared(b_array_1)
    ones_tensor = tt.ones([n_stars, 1])
    b0_all = ma.matrix_dot(ones_tensor, b0)
    b1_all = ma.matrix_dot(ones_tensor, b1)

    # Define PyMC3 Model
    simple_model = pm.Model()

    with simple_model:
        # Define priors
        Lambda = pm.Normal('Std-Lambda',
                           mu=std_Lambda_prior_mean,
                           sd=std_Lambda_prior_width,
                           shape=(1, len(std_Lambda_prior_mean)))

        Locals = pm.Normal(
            'Std-Local',
            mu=loc_mean,
            sd=loc_std,
            shape=loc_mean.shape,
            transform=pm.distributions.transforms.Interval(lowBound, upBound),
        )
        TimeSq = tt.reshape(Locals[:, -1]**2., (n_stars, 1))

        TruLa = pm.Deterministic('Lambda',
                                 Lambda * input_std[:2] + input_mean[:2])
        TruTh = pm.Deterministic(
            'Thetas', Locals[:, :3] * input_std[2:5] + input_mean[2:5])
        TruTi = pm.Deterministic(
            'Times', Locals[:, -1] * input_std[-1] + input_mean[-1])

        ## NEURAL NET
        Lambda_all = ma.matrix_dot(ones_tensor, Lambda)
        InputVariables = ma.concatenate([Lambda_all, Locals, TimeSq], axis=1)

        layer1 = ma.matrix_dot(InputVariables, w0) + b0_all
        output = ma.matrix_dot(ma.tanh(layer1), w1) + b1_all

        if elem_err:
            # ERRORS
            #element_error = pm.Normal('Element-Error',mu=-2,sd=1,shape=(1,n_els))
            element_error = pm.HalfCauchy('Std-Element-Error',
                                          beta=0.01 / output_std,
                                          shape=(1, n_els))
            TruErr = pm.Deterministic('Element-Error',
                                      element_error * output_std)
            stacked_error = ma.matrix_dot(ones_tensor, element_error)
            tot_error = ma.sqrt(
                stacked_error**2. +
                norm_sd**2.)  # NB this is all standardized by output_std here
        else:
            tot_error = norm_sd  # NB: all quantities are standardized here

        predictions = pm.Deterministic("Predicted-Abundances",
                                       output * output_std + output_mean)

        # Define likelihood function (unravelling output to make a multivariate gaussian)
        likelihood = pm.Normal('likelihood',
                               mu=output.ravel(),
                               sd=tot_error.ravel(),
                               observed=norm_data.ravel())

    # Now sample
    init_time = ttime.time()
    with simple_model:
        samples = pm.sample(draws=n_samples,
                            chains=chains,
                            cores=cores,
                            tune=tune,
                            nuts_kwargs={'target_accept': 0.9},
                            init='advi+adapt_diag',
                            n_init=n_init)
    end_time = ttime.time() - init_time

    def construct_output(samples):
        Lambda = samples.get_values('Lambda')[:, 0, :]
        Thetas = samples.get_values('Thetas')[:, :, :]
        Times = samples.get_values('Times')[:, :]

        predictions = samples.get_values('Predicted-Abundances')[:, :, :]

        if elem_err:
            Errs = samples.get_values('Element-Error')[:, 0, :]
            return Lambda, Thetas, Times, Errs, predictions
        else:
            return Lambda, Thetas, Times, predictions

    print("Finished after %.2f seconds" % end_time)

    if elem_err:
        Lambda, Thetas, Times, Errs, predictions = construct_output(samples)
        return Lambda, Thetas, Times, end_time, Errs, predictions
    else:
        Lambda, Thetas, Times, predictions = construct_output(samples)
        return Lambda, Thetas, Times, end_time, predictions