Exemplo n.º 1
0
    stk = comstok.w2i['<s>']
    comstart[0] = stk
    outfn = outdir+"/predictions/predict-{}-cfw.txt".format(outfile.split('.')[0])
    outf = open(outfn, 'w')
    print("writing to file: " + outfn)
    batch_sets = [allfids[i:i+batchsize] for i in range(0, len(allfids), batchsize)]
 
    prep("computing predictions...\n")
    for c, fid_set in enumerate(batch_sets):
        st = timer()
        
        for fid in fid_set:
            refcoms[fid] = seqdata['c'+testval][fid]
            seqdata['c'+testval][fid] = comstart #np.asarray([stk])
            
        bg = batch_gen(seqdata, testval, config, training=False)
        batch = bg.make_batch(fid_set)

        if config['batch_maker'] == 'datsonly':
            batch_results = gendescr_2inp(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy')
        elif config['batch_maker'] == 'ast':
            batch_results = gendescr_3inp(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy')
        elif config['batch_maker'] == 'ast_threed':
            batch_results = gendescr_4inp(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy')
        elif config['batch_maker'] == 'threed':
            batch_results = gendescr_threed(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy')
        elif config['batch_maker'] == 'graphast':
            batch_results = gendescr_graphast(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy')
        elif config['batch_maker'] == 'graphast_threed':
            batch_results = gendescr_5inp(model, batch, comstok, comlen, batchsize, config, refcoms, c, strat='greedy')
        elif config['batch_maker'] == 'pathast_threed':
Exemplo n.º 2
0
    config['tdatlen'] = len(list(seqdata['dttrain'].values())[0])
    config['sdatlen'] = seqdata['config']['sdatlen']
    config['comlen'] = len(list(seqdata['ctrain'].values())[0])
    config['smllen'] = len(list(seqdata['strain'].values())[0])

    config['multigpu'] = multigpu
    config['batch_size'] = batch_size

    prep('creating model... ')
    config, model = create_model(modeltype, config)
    drop()

    print(model.summary())

    gen = batch_gen(seqdata, 'train', modeltype, config)
    checkpoint = ModelCheckpoint(outdir + '/models/' + modeltype +
                                 '_E{epoch:02d}_' + str(timestart) + '.h5')
    savehist = HistoryCallback()
    savehist.setCatchExit(outdir, modeltype, timestart, config)

    valgen = batch_gen(seqdata, 'val', modeltype, config)

    callbacks = [checkpoint, savehist]

    try:
        history = model.fit_generator(gen,
                                      steps_per_epoch=steps,
                                      epochs=epochs,
                                      verbose=1,
                                      max_queue_size=3,
Exemplo n.º 3
0
    # cmd = "grep {} /nfs/projects/attn-to-fc/data/standard/output/sdats.test".format(fid)
    # print('sdats for {}'.format(fid))
    # os.system(cmd)
    # print()
    #print(modeltypewsdats, configwsdats['batch_maker'])
    #print(modeltypewosdats, configwosdats['batch_maker'])
    #sys.exit()

    prep("computing predictions...\n")
    for c, fid_set in enumerate(batch_sets):
        st = timer()

        for fid in fid_set:
            seqdata['ctest'][fid] = comstart  #np.asarray([stk])

        bg = batch_gen(seqdata, 'test', configwsdats, training=False)
        batch = bg.make_batch(fid_set)

        if configwsdats['batch_maker'] == 'datsonly':
            batch_resultswsdats = gendescr_2inp(modelwsdats, batch, comstok,
                                                comlen, batchsize,
                                                configwsdats, strat, beamwidth,
                                                outfilewsdats, stopword)
        elif configwsdats['batch_maker'] == 'ast':
            batch_resultswsdats = gendescr_3inp(modelwsdats, batch, comstok,
                                                comlen, batchsize,
                                                configwsdats, strat, beamwidth,
                                                outfilewsdats, stopword,
                                                outdir)
        elif configwsdats['batch_maker'] == 'ast_threed':
            batch_resultswsdats = gendescr_4inp(modelwsdats, batch, comstok,
Exemplo n.º 4
0
        config['tdatlen'] = len(list(seqdata['dttrain'].values())[0])
        config['sdatlen'] = seqdata['config']['sdatlen']
        config['smllen'] = len(list(seqdata['strain'].values())[0])
    except KeyError:
        pass  # some configurations do not have all data, which is fine

    config['multigpu'] = multigpu
    config['batch_size'] = batch_size

    prep('creating model... ')
    config, model = create_model(modeltype, config)
    drop()

    print(model.summary())

    gen = batch_gen(seqdata, 'train', config)
    #checkpoint = ModelCheckpoint(outdir+'/'+modeltype+'_E{epoch:02d}_TA{acc:.2f}_VA{val_acc:.2f}_VB{val_bleu:}.h5', monitor='val_loss')
    checkpoint = ModelCheckpoint(outdir + '/models/' + modeltype +
                                 '_E{epoch:02d}_' + str(timestart) + '.h5')
    savehist = HistoryCallback()
    savehist.setCatchExit(outdir, modeltype, timestart, config)

    valgen = batch_gen(seqdata, 'val', config)

    # If you want it to calculate BLEU Score after each epoch use callback_valgen and test_cb
    #####
    #callback_valgen = batch_gen_train_bleu(seqdata, comvocabsize, 'val', modeltype, batch_size=batch_size)
    #test_cb = mycallback(callback_valgen, steps)
    #####
    callbacks = [checkpoint, savehist]
Exemplo n.º 5
0
    prep('loading model... ')
    model = keras.models.load_model(modelfile, custom_objects={"tf":tf, "keras":keras, "OurCustomGraphLayer":OurCustomGraphLayer, "SeqSelfAttention":SeqSelfAttention})
    print(model.summary())
    drop()

    batch_sets = [allfids[i:i+batchsize] for i in range(0, len(allfids), batchsize)]
    refs = list()
    preds = list()

    predf = open('{}/predictions/{}_{}_{}.tsv'.format(outdir, modeltype, mid, timestart), 'w')
 
    prep("computing predictions...\n")
    for c, fid_set in enumerate(batch_sets):
        st = timer()
        
        bg = batch_gen(seqdata, testval, config, training=False, firstwords=firstwords)
        batch = bg.make_batch(fid_set)

        if config['batch_maker'] == 'fw_datsonly':
            batch_results = gendescr_datsonly(model, batch, batchsize, config)
        elif config['batch_maker'] == 'fw_datsfc':
            batch_results = gendescr_datsfc(model, batch, batchsize, config)
        elif config['batch_maker'] == 'fw_datspc':
            batch_results = gendescr_datspc(model, batch, batchsize, config)
        elif config['batch_maker'] == 'fw_datsast':
            batch_results = gendescr_datsast(model, batch, batchsize, config)
        elif config['batch_maker'] == 'fw_datsastfc':
            batch_results = gendescr_datsastfc(model, batch, batchsize, config)
        elif config['batch_maker'] == 'fw_datsastpc':
            batch_results = gendescr_datsastpc(model, batch, batchsize, config)
        elif config['batch_maker'] == 'fw_graphast':
Exemplo n.º 6
0
    config['multigpu'] = multigpu
    config['batch_size'] = batch_size

    prep('creating model... ')
    config, model = create_model(modeltype, config)
    drop()

    print(model.summary())
    fn = outdir + '/histories/' + modeltype + '_conf_' + datrim + str(
        timestart) + '.pkl'  #start with saved config
    confoutfd = open(fn, 'wb')
    pickle.dump(config, confoutfd)
    print('saved config to: ' + fn)

    if fwfile is not None:
        gen = batch_gen(seqdata, 'train', config, firstwords=firstwords)
    else:
        gen = batch_gen(seqdata, 'train', config)
    checkpoint = ModelCheckpoint(outdir + '/models/' + modeltype +
                                 '_E{epoch:02d}_' + datrim + str(timestart) +
                                 '.h5')
    if fwfile is not None:
        valgen = batch_gen(seqdata, 'val', config, firstwords=firstwords)
    else:
        valgen = batch_gen(seqdata, 'val', config)
    callbacks = [checkpoint]

    model.fit_generator(gen,
                        steps_per_epoch=steps,
                        epochs=epochs,
                        verbose=1,