Exemplo n.º 1
0
def reduce(
    fn,
    sequences,
    outputs_info,
    non_sequences=None,
    go_backwards=False,
    mode=None,
    name=None,
):
    """
    Similar behaviour as python's reduce.

    Parameters
    ----------
    fn
        The function that ``reduce`` applies at each iteration step
        (see ``scan``  for more info).
    sequences
        List of sequences over which ``reduce`` iterates
        (see ``scan`` for more info).
    outputs_info
        List of dictionaries describing the outputs of
        reduce (see ``scan`` for more info).
    non_sequences
        List of arguments passed to ``fn``. ``reduce`` will
                          not iterate over these arguments (see ``scan`` for
                          more info).
    go_backwards : bool
        Decides the direction of iteration. True means that sequences are parsed
        from the end towards the beginning, while False is the other way around.
    mode
        See ``scan``.
    name
        See ``scan``.

    """
    rval = scan(
        fn=fn,
        sequences=sequences,
        outputs_info=outputs_info,
        non_sequences=non_sequences,
        go_backwards=go_backwards,
        truncate_gradient=-1,
        mode=mode,
        name=name,
    )
    if isinstance(rval[0], (list, tuple)):
        return [x[-1] for x in rval[0]], rval[1]
    else:
        return rval[0][-1], rval[1]
Exemplo n.º 2
0
def linear_cg(compute_Gv,
              bs,
              rtol=1e-6,
              maxit=1000,
              damp=0,
              floatX=None,
              profile=0):
    """
    assume all are lists all the time
    Reference:
        http://en.wikipedia.org/wiki/Conjugate_gradient_method
    """
    n_params = len(bs)

    def loop(rsold, *args):
        ps = args[:n_params]
        rs = args[n_params:2 * n_params]
        xs = args[2 * n_params:]
        _Aps = compute_Gv(*ps)[0]
        Aps = [x + damp * y for x, y in zip(_Aps, ps)]
        alpha = rsold / sum((x * y).sum() for x, y in zip(Aps, ps))
        xs = [x + alpha * p for x, p in zip(xs, ps)]
        rs = [r - alpha * Ap for r, Ap, in zip(rs, Aps)]
        rsnew = sum((r * r).sum() for r in rs)
        ps = [r + rsnew / rsold * p for r, p in zip(rs, ps)]
        return [rsnew]+ps+rs+xs, \
                theano.scan_module.until(abs(rsnew) < rtol)

    r0s = bs
    _p0s = [tensor.unbroadcast(tensor.shape_padleft(x), 0) for x in r0s]
    _r0s = [tensor.unbroadcast(tensor.shape_padleft(x), 0) for x in r0s]
    _x0s = [
        tensor.unbroadcast(tensor.shape_padleft(tensor.zeros_like(x)), 0)
        for x in bs
    ]
    _rsold = sum((r * r).sum() for r in r0s)
    #_rsold = tensor.unbroadcast(tensor.shape_padleft(rsold),0)
    outs, updates = scan(loop,
                         outputs_info=[_rsold] + _p0s + _r0s + _x0s,
                         n_steps=maxit,
                         mode=theano.Mode(linker='cvm'),
                         name='linear_conjugate_gradient',
                         profile=profile)
    fxs = outs[1 + 2 * n_params:]
    #return [x[0] for x in fxs]
    # 5vision hacks
    x = theano.gradient.disconnected_grad(fxs[0][-1].flatten())
    residual = outs[0][-1]
    return [x, residual]
Exemplo n.º 3
0
def linear_cg_precond(compute_Gv,
                      bs,
                      Msz,
                      rtol=1e-16,
                      maxit=100000,
                      floatX=None):
    """
    assume all are lists all the time
    Reference:
        http://en.wikipedia.org/wiki/Conjugate_gradient_method
    """
    n_params = len(bs)

    def loop(rsold, *args):
        ps = args[:n_params]
        rs = args[n_params:2 * n_params]
        xs = args[2 * n_params:]
        Aps = compute_Gv(*ps)
        alpha = rsold / sum((x * y).sum() for x, y in zip(Aps, ps))
        xs = [x + alpha * p for x, p in zip(xs, ps)]
        rs = [r - alpha * Ap for r, Ap, in zip(rs, Aps)]
        zs = [r / z for r, z in zip(rs, Msz)]
        rsnew = sum((r * z).sum() for r, z in zip(rs, zs))
        ps = [z + rsnew / rsold * p for z, p in zip(zs, ps)]
        return [rsnew] + ps + rs + xs,

    theano.scan_module.until(abs(rsnew) < rtol)

    r0s = bs
    _p0s = [
        tensor.unbroadcast(tensor.shape_padleft(x / z), 0)
        for x, z in zip(r0s, Msz)
    ]
    _r0s = [tensor.unbroadcast(tensor.shape_padleft(x), 0) for x in r0s]
    _x0s = [
        tensor.unbroadcast(tensor.shape_padleft(tensor.zeros_like(x)), 0)
        for x in bs
    ]
    rsold = sum((r * r / z).sum() for r, z in zip(r0s, Msz))
    _rsold = tensor.unbroadcast(tensor.shape_padleft(rsold), 0)
    outs, updates = scan(loop,
                         states=[_rsold] + _p0s + _r0s + _x0s,
                         n_steps=maxit,
                         mode=theano.Mode(linker='c|py'),
                         name='linear_conjugate_gradient',
                         profile=0)
    fxs = outs[1 + 2 * n_params:]
    return [x[0] for x in fxs]
Exemplo n.º 4
0
def reduce(fn,
           sequences,
           outputs_info,
           non_sequences=None,
           go_backwards=False,
           mode=None,
           name=None):
    """
    Similar behaviour as python's reduce.

    Parameters
    ----------
    fn
        The function that ``reduce`` applies at each iteration step
        (see ``scan``  for more info).
    sequences
        List of sequences over which ``reduce`` iterates
        (see ``scan`` for more info).
    outputs_info
        List of dictionaries describing the outputs of
        reduce (see ``scan`` for more info).
    non_sequences
        List of arguments passed to ``fn``. ``reduce`` will
                          not iterate over these arguments (see ``scan`` for
                          more info).
    go_backwards : bool 
        Decides the direction of iteration. True means that sequences are parsed
        from the end towards the begining, while False is the other way around.
    mode
        See ``scan``.
    name
        See ``scan``.

    """
    rval = scan(fn=fn,
                     sequences=sequences,
                     outputs_info=outputs_info,
                     non_sequences=non_sequences,
                     go_backwards=go_backwards,
                     truncate_gradient=-1,
                     mode=mode,
                     name=name)
    if isinstance(rval[0], (list, tuple)):
        return [x[-1] for x in rval[0]], rval[1]
    else:
        return rval[0][-1], rval[1]
Exemplo n.º 5
0
def map(
    fn,
    sequences,
    non_sequences=None,
    truncate_gradient=-1,
    go_backwards=False,
    mode=None,
    name=None,
):
    """
    Similar behaviour as python's map.

    Parameters
    ----------
    fn
        The function that ``map`` applies at each iteration step
        (see ``scan`` for more info).
    sequences
        List of sequences over which ``map`` iterates
        (see ``scan`` for more info).
    non_sequences
        List of arguments passed to ``fn``. ``map`` will not iterate over
        these arguments (see ``scan`` for more info).
    truncate_gradient
        See ``scan``.
    go_backwards : bool
        Decides the direction of iteration. True means that sequences are parsed
        from the end towards the beginning, while False is the other way around.
    mode
        See ``scan``.
    name
        See ``scan``.

    """
    return scan(
        fn=fn,
        sequences=sequences,
        outputs_info=[],
        non_sequences=non_sequences,
        truncate_gradient=truncate_gradient,
        go_backwards=go_backwards,
        mode=mode,
        name=name,
    )
Exemplo n.º 6
0
def map(fn,
        sequences,
        non_sequences=None,
        truncate_gradient=-1,
        go_backwards=False,
        mode=None,
        name=None):
    """
    Similar behaviour as python's map.

    Parameters
    ----------
    fn
        The function that ``map`` applies at each iteration step
        (see ``scan`` for more info).
    sequences
        List of sequences over which ``map`` iterates 
        (see ``scan`` for more info).
    non_sequences
        List of arguments passed to ``fn``. ``map`` will not iterate over
        these arguments (see ``scan`` for more info).
    truncate_gradient
        See ``scan``.
    go_backwards : bool
        Decides the direction of iteration. True means that sequences are parsed
        from the end towards the begining, while False is the other way around.
    mode
        See ``scan``.
    name
        See ``scan``.

    """
    return scan(fn=fn,
                     sequences=sequences,
                     outputs_info=[],
                     non_sequences=non_sequences,
                     truncate_gradient=truncate_gradient,
                     go_backwards=go_backwards,
                     mode=mode,
                     name=name)
Exemplo n.º 7
0
def jobman(state, channel):
    # load dataset
    rng = numpy.random.RandomState(state['seed'])

    # declare the dimensionalies of the input and output
    if state['chunks'] == 'words':
        state['n_in'] = 10000
        state['n_out'] = 10000
    else:
        state['n_in'] = 50
        state['n_out'] = 50
    train_data, valid_data, test_data = get_text_data(state)

    ## BEGIN Tutorial
    ### Define Theano Input Variables
    x = TT.lvector('x')
    y = TT.lvector('y')
    h0 = theano.shared(numpy.zeros((eval(state['nhids'])[-1],), dtype='float32'))

    ### Neural Implementation of the Operators: \oplus
    #### Word Embedding
    emb_words = MultiLayer(
        rng,
        n_in=state['n_in'],
        n_hids=eval(state['inp_nhids']),
        activation=eval(state['inp_activ']),
        init_fn='sample_weights_classic',
        weight_noise=state['weight_noise'],
        rank_n_approx = state['rank_n_approx'],
        scale=state['inp_scale'],
        sparsity=state['inp_sparse'],
        learn_bias = True,
        bias_scale=eval(state['inp_bias']),
        name='emb_words')

    #### Deep Transition Recurrent Layer
    rec = eval(state['rec_layer'])(
            rng,
            eval(state['nhids']),
            activation = eval(state['rec_activ']),
            #activation = 'TT.nnet.sigmoid',
            bias_scale = eval(state['rec_bias']),
            scale=eval(state['rec_scale']),
            sparsity=eval(state['rec_sparse']),
            init_fn=eval(state['rec_init']),
            weight_noise=state['weight_noise'],
            name='rec')

    #### Stiching them together
    ##### (1) Get the embedding of a word
    x_emb = emb_words(x, no_noise_bias=state['no_noise_bias'])
    ##### (2) Embedding + Hidden State via DT Recurrent Layer
    reset = TT.scalar('reset')
    rec_layer = rec(x_emb, n_steps=x.shape[0],
                    init_state=h0*reset,
                    no_noise_bias=state['no_noise_bias'],
                    truncate_gradient=state['truncate_gradient'],
                    batch_size=1)

    ## BEGIN Exercise: DOT-RNN
    ### Neural Implementation of the Operators: \lhd

    #### Exercise (1)
    #### TODO: Define a layer from the hidden state to the intermediate layer

    #### Exercise (1)
    #### TODO: Define a layer from the input to the intermediate Layer

    #### Hidden State: Combine emb_state and emb_words_out
    #### Exercise (1)
    #### TODO: Define an activation layer

    #### Exercise (2)
    #### TODO: Define a dropout layer

    #### Softmax Layer
    output_layer = SoftmaxLayer(
        rng,
        eval(state['dout_nhid']),
        state['n_out'],
        scale=state['out_scale'],
        bias_scale=state['out_bias_scale'],
        init_fn="sample_weights_classic",
        weight_noise=state['weight_noise'],
        sparsity=state['out_sparse'],
        sum_over_time=True,
        name='out')

    ### Few Optional Things
    #### Direct shortcut from x to y
    if state['shortcut_inpout']:
        shortcut = MultiLayer(
            rng,
            n_in=state['n_in'],
            n_hids=eval(state['inpout_nhids']),
            activations=eval(state['inpout_activ']),
            init_fn='sample_weights_classic',
            weight_noise = state['weight_noise'],
            scale=eval(state['inpout_scale']),
            sparsity=eval(state['inpout_sparse']),
            learn_bias=eval(state['inpout_learn_bias']),
            bias_scale=eval(state['inpout_bias']),
            name='shortcut')

    #### Learning rate scheduling (1/(1+n/beta))
    state['clr'] = state['lr']
    def update_lr(obj, cost):
        stp = obj.step
        if isinstance(obj.state['lr_start'], int) and stp > obj.state['lr_start']:
            time = float(stp - obj.state['lr_start'])
            new_lr = obj.state['clr']/(1+time/obj.state['lr_beta'])
            obj.lr = new_lr
    if state['lr_adapt']:
        rec.add_schedule(update_lr)

    ### Neural Implementations of the Language Model
    #### Training
    if state['shortcut_inpout']:
        additional_inputs = [rec_layer, shortcut(x)]
    else:
        additional_inputs = [rec_layer]

    ##### Exercise (1): Compute the output intermediate layer
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer

    train_model = output_layer(outhid,
                               no_noise_bias=state['no_noise_bias'],
                               additional_inputs=additional_inputs).train(target=y,
            scale=numpy.float32(1./state['seqlen']))

    nw_h0 = rec_layer.out[rec_layer.out.shape[0]-1]
    if state['carry_h0']:
        train_model.updates += [(h0, nw_h0)]

    #### Validation
    h0val = theano.shared(numpy.zeros((eval(state['nhids'])[-1],), dtype='float32'))
    rec_layer = rec(emb_words(x, use_noise=False),
                    n_steps = x.shape[0],
                    batch_size=1,
                    init_state=h0val*reset,
                    use_noise=False)
    nw_h0 = rec_layer.out[rec_layer.out.shape[0]-1]

    ##### Exercise (1):
    ##### TODO: Compute the output intermediate layer

    ##### Exercise (2): Apply Dropout
    ##### TODO: Apply the dropout layer without noise

    if state['shortcut_inpout']:
        additional_inputs=[rec_layer, shortcut(x, use_noise=False)]
    else:
        additional_inputs=[rec_layer]
    valid_model = output_layer(outhid,
            additional_inputs=additional_inputs,
            use_noise=False).validate(target=y, sum_over_time=True)

    valid_updates = []
    if state['carry_h0']:
        valid_updates = [(h0val, nw_h0)]

    valid_fn = theano.function([x,y, reset], valid_model.out,
          name='valid_fn', updates=valid_updates)

    #### Sampling
    ##### single-step sampling
    def sample_fn(word_tm1, h_tm1):
        x_emb = emb_words(word_tm1, use_noise = False, one_step=True)
        h0 = rec(x_emb, state_before=h_tm1, one_step=True, use_noise=False)[-1]
        outhid = outhid_dropout(outhid_activ(emb_state(h0, use_noise=False, one_step=True) +
            emb_words_out(word_tm1, use_noise=False, one_step=True), one_step=True), 
            use_noise=False, one_step=True)
        word = output_layer.get_sample(state_below=outhid, additional_inputs=[h0], temp=1.)
        return word, h0

    ##### scan for iterating the single-step sampling multiple times
    [samples, summaries], updates = scan(sample_fn,
                      states = [
                          TT.alloc(numpy.int64(0), state['sample_steps']),
                          TT.alloc(numpy.float32(0), 1, eval(state['nhids'])[-1])],
                      n_steps= state['sample_steps'],
                      name='sampler_scan')

    ##### build a Theano function for sampling
    sample_fn = theano.function([], [samples],
        updates=updates, profile=False, name='sample_fn')

    ##### Load a dictionary
    dictionary = numpy.load(state['dictionary'])
    if state['chunks'] == 'chars':
        dictionary = dictionary['unique_chars']
    else:
        dictionary = dictionary['unique_words']
    def hook_fn():
        sample = sample_fn()[0]
        print 'Sample:',
        if state['chunks'] == 'chars':
            print "".join(dictionary[sample])
        else:
            for si in sample:
                print dictionary[si],
            print

    ### Build and Train a Model
    #### Define a model
    model = LM_Model(
        cost_layer = train_model,
        weight_noise_amount=state['weight_noise_amount'],
        valid_fn = valid_fn,
        clean_before_noise_fn = False,
        noise_fn = None,
        rng = rng)

    if state['reload']:
        model.load(state['prefix']+'model.npz')

    #### Define a trainer
    ##### Training algorithm (SGD)
    if state['moment'] < 0:
        algo = SGD(model, state, train_data)
    else:
        algo = SGD_m(model, state, train_data)
    ##### Main loop of the trainer
    main = MainLoop(train_data,
                    valid_data,
                    test_data,
                    model,
                    algo,
                    state,
                    channel,
                    train_cost = False,
                    hooks = hook_fn,
                    validate_postprocess =  eval(state['validate_postprocess']))
    ## Run!
    main.main()