def main(): logging.getLogger().setLevel('INFO') logging.info("Loading data") prov = dp.getDataProvider(dataset='coco', root=root, audio_kind=None) sent = list(prov.iterSentences(split='val')) sent_id = [sent_i['sentid'] for sent_i in sent] sent_tok = [sent_i['tokens'] for sent_i in sent] logging.info("Loading imaginet model") model_im = D.load(model_im_path) logging.info("Loading plain LM model") model_lm = imaginet.task.load(model_lm_path) logging.info("Loading vectorsum model") model_sum = imaginet.task.load(model_sum_path) dump_activations([sent_i['tokens'] for sent_i in sent], [("visual", model_im, model_im.visual), ("textual", model_im, model_im.lm), ("lm", model_lm, model_lm.task), ("sum", model_sum, model_sum.task)]) writer = csv.writer(open(data_path + '/omission_coco_val.csv', "w")) writer.writerow([ "sentid", "position", "word", "omission_v", "omission_t", "omission_lm", "omission_sum" ]) for i in range(len(sent)): logging.info("Processing: {}".format(sent_id[i])) O_v = omission(model_im, sent_tok[i], task=model_im.visual) O_t = omission(model_im, sent_tok[i], task=model_im.lm) O_lm = omission(model_lm, sent_tok[i], task=model_lm.task) O_sum = omission(model_sum, sent_tok[i], task=model_sum.task) for j in range(len(sent_tok[i])): writer.writerow([ sent_id[i], j, sent_tok[i][j], O_v[j], O_t[j], O_lm[j], O_sum[j] ])
def train(dataset='coco', datapath='.', model_path='.', tokenize=phonemes, max_norm=None, min_df=10, scale=True, epochs=1, batch_size=64, shuffle=True, size_embed=128, size_hidden=512, depth=2, validate_period=64*1000, limit=None, seed=None): # sys.setrecursionlimit(50000) # needed for pickling models if seed is not None: random.seed(seed) numpy.random.seed(seed) prov = dp.getDataProvider(dataset, root=datapath) data = SimpleData(prov, tokenize=tokenize, min_df=min_df, scale=scale, batch_size=batch_size, shuffle=shuffle, limit=limit) config = dict(size_embed=size_embed, size=size_hidden, depth=depth, size_target=4096, max_norm=max_norm) model = visual.VisualModel(dict(scaler=data.scaler, batcher=data.batcher), config) do_training(model, data, epochs, validate_period, model_path)
def main(): logging.getLogger().setLevel('INFO') logging.info("Loading input data") root = '/home/gchrupal/reimaginet/' prov = dp.getDataProvider(dataset='coco', root=root, audio_kind=None) ids = np.array([senti['sentid'] for senti in prov.iterSentences(split='val')]) data = pd.read_csv("/home/gchrupal/cl-resubmit/data/depparse_coco_val.csv") sent_data = dict(senti for senti in data.groupby(['sentid'])) with open("/home/gchrupal/cl-resubmit/data/mutual_examples.csv", "w") as out_ex: writer_ex = csv.writer(out_ex) writer_ex.writerow(["pathway", "condition", "order", "dimension", "index","history","history_w"]) with open("/home/gchrupal/cl-resubmit/data/mutual.csv", "w") as out: writer = csv.writer(out) writer.writerow(["pathway", "condition", "order", "dimension", "mi"]) for net in ["visual", "textual", "lm", "sum"]: states = np.load("/home/gchrupal/cl-resubmit/data/states_{}.npy".format(net), encoding='bytes') S = quantize(np.vstack([state[:-1,:] for state in states]).T) for typ in ['word', 'dep']: for order in [1,2,3]: logging.info("Computing scores for {}:{}:{}".format(net, typ, order)) context_w = [item for i in ids for item in ngram(list(sent_data[i]['word']), order) ] context = [item for i in ids for item in ngram(list(sent_data[i][typ]), order) ] mi = [] for i in range(1024): mi.append(mutual_info_score(S[i], context)) writer.writerow([net, typ, order, i, mi[-1]]) logging.info("Finding examples for {}:{}:{}".format(net, typ, order)) top3 = np.array(mi).argsort()[-3:] for dim in top3: for index in S[dim].argsort()[-10:]: writer_ex.writerow([net, typ, order, dim, index, context[index], context_w[index]])
def __init__(self, model): self.model = model prov = dp.getDataProvider(dataset='coco', root=root, audio_kind=None) images = list(prov.iterImages(split='val')) self.img_fs = model.scaler.transform( numpy.array([img['feat'] for img in images], dtype='float32')) data = json.load( open("/home/gchrupal/reimaginet/data/coco/dataset.json")) COCOID = {} for img in data['images']: COCOID[img['imgid']] = img['cocoid'] self.IDS = [COCOID[img['imgid']] for img in images]
def evaluate(dataset='coco', datapath='.', model_path='model.zip', batch_size=128, tokenize=phonemes ): model = visual.load(path=model_path) task = model.Visual scaler = model.scaler batcher = model.batcher mapper = batcher.mapper prov = dp.getDataProvider(dataset, root=datapath) sents_tok = [ tokenize(sent) for sent in prov.iterSentences(split='val') ] predictions = visual.predict_img(model, sents_tok, batch_size=batch_size) sents = list(prov.iterSentences(split='val')) images = list(prov.iterImages(split='val')) img_fs = list(scaler.transform([ image['feat'] for image in images ])) correct_img = numpy.array([ [ sents[i]['imgid']==images[j]['imgid'] for j in range(len(images)) ] for i in range(len(sents)) ] ) return ranking(img_fs, predictions, correct_img, ns=(1,5,10), exclude_self=False)
def evaluate(dataset='coco', datapath='.', model_path='model.zip', batch_size=128, tokenize=phonemes): model = visual.load(path=model_path) task = model.Visual scaler = model.scaler batcher = model.batcher mapper = batcher.mapper prov = dp.getDataProvider(dataset, root=datapath) sents_tok = [tokenize(sent) for sent in prov.iterSentences(split='val')] predictions = visual.predict_img(model, sents_tok, batch_size=batch_size) sents = list(prov.iterSentences(split='val')) images = list(prov.iterImages(split='val')) img_fs = list(scaler.transform([image['feat'] for image in images])) correct_img = numpy.array( [[sents[i]['imgid'] == images[j]['imgid'] for j in range(len(images))] for i in range(len(sents))]) return ranking(img_fs, predictions, correct_img, ns=(1, 5, 10), exclude_self=False)
def train(dataset='coco', datapath='.', model_path='.', tokenize=phonemes, max_norm=None, min_df=10, scale=True, epochs=1, batch_size=64, shuffle=True, size_embed=128, size_hidden=512, depth=2, validate_period=64 * 1000, limit=None, seed=None): # sys.setrecursionlimit(50000) # needed for pickling models if seed is not None: random.seed(seed) numpy.random.seed(seed) prov = dp.getDataProvider(dataset, root=datapath) data = SimpleData(prov, tokenize=tokenize, min_df=min_df, scale=scale, batch_size=batch_size, shuffle=shuffle, limit=limit) config = dict(size_embed=size_embed, size=size_hidden, depth=depth, size_target=4096, max_norm=max_norm) model = visual.VisualModel(dict(scaler=data.scaler, batcher=data.batcher), config) do_training(model, data, epochs, validate_period, model_path)
from imaginet.simple_data import words from imaginet.data_provider import getDataProvider import cPickle as pickle from collections import Counter import sys split = sys.argv[1] prov = getDataProvider('coco', root = '../reimaginet') sents = list(prov.iterSentences(split=split)) indexlexicon = {} for s in sents: for word in words(s): if word in indexlexicon: indexlexicon[word].append(s['sentid']) else: indexlexicon[word] = [s['sentid']] with open("sentidlex{}.p".format(split), "wb") as l: pickle.dump(indexlexicon, l)
import imaginet.simple_data as sd import imaginet.experiment as E import imaginet.data_provider as dp import imaginet.defn.audiovis_rhn as D dataset = 'flickr8k' batch_size = 32 epochs=25 prov = dp.getDataProvider(dataset, root='/home/gchrupala/repos/reimaginet/', audio_kind='human.max1K.accel3.ord.mfcc') data = sd.SimpleData(prov, min_df=10, scale=False, batch_size=batch_size, shuffle=True) model_config = dict(size=1024, depth=4, recur_depth=2, max_norm=2.0, residual=True, drop_i=0.25, drop_s=0.1, lr=0.0002, size_vocab=37, size_target=4096, filter_length=6, filter_size=64, stride=2, contrastive=True, margin_size=0.2, fixed=True, init_img='xavier', size_attn=128) run_config = dict(seed=51, task=D.Visual, epochs=epochs, validate_period=400) def audio(sent): return sent['audio'] eval_config = dict(tokenize=audio, split='val', task=D.Visual, batch_size=batch_size, epochs=epochs, encode_sentences=D.encode_sentences) E.run_train(data, prov, model_config, run_config, eval_config) #E.run_eval(prov, eval_config, encode_sentences=D.encode_sentences)
import imaginet.simple_data as sd import imaginet.experiment as E import imaginet.data_provider as dp import imaginet.defn.visual2_rhn as D dataset = 'flickr8k' batch_size = 32 epochs=20 prov = dp.getDataProvider(dataset, root='/home/gchrupala/repos/reimaginet/', audio_kind=None) data = sd.SimpleData(prov, min_df=1, scale=False, batch_size=batch_size, shuffle=True, tokenize=sd.words, val_vocab=True) model_config = dict(size_embed=300, size=1024, depth=1, recur_depth=1, max_norm=2.0, residual=True, drop_i=0.0, drop_s=0.0, lr=0.001, size_target=4096, contrastive=True, margin_size=0.2, fixed=True, init_img='xavier') run_config = dict(seed=61, task=D.Visual, epochs=epochs, validate_period=400) def audio(sent): return sent['audio'] eval_config = dict(tokenize=sd.words, split='val', task=D.Visual, batch_size=batch_size, epochs=epochs, encode_sentences=D.encode_sentences) E.run_train(data, prov, model_config, run_config, eval_config) #E.run_eval(prov, eval_config, encode_sentences=D.encode_sentences)