weights_folder_path = 'weights/' train_patience = 5 max_segment_len = 20 #The max length of a segment in dataset is 114 min_word_frequency = 1 embedding_vecor_length = 300 use_pretrained_word_embedding = True use_pretrained_single_model = True use_cartesian_fusion = True end_to_end = True feature_selection = True a, b = 4.0, 8.0 fusion_method = 'Cartesion' if use_cartesian_fusion else 'Concat' #word2ix = loader.load_word2ix() word_embedding = [loader.load_word_embedding() ] if use_pretrained_word_embedding else None train, valid, test = loader.load_word_level_features(max_segment_len, tr_split) feature_str = '' if args.feature_selection: with open('/media/bighdd5/Paul/mosi/fs_mask.pkl') as f: [covarep_ix, facet_ix] = pickle.load(f) facet_train = train['facet'][:, :, facet_ix] facet_valid = valid['facet'][:, :, facet_ix] facet_test = test['facet'][:, :, facet_ix] covarep_train = train['covarep'][:, :, covarep_ix] covarep_valid = valid['covarep'][:, :, covarep_ix] covarep_test = test['covarep'][:, :, covarep_ix] feature_str = '_t' + str(embedding_vecor_length) + '_c' + str( covarep_test.shape[2]) + '_f' + str(facet_test.shape[2])
def get_data(args, config): tr_split = 2.0 / 3 # fixed. 62 training & validation, 31 test val_split = 0.1514 # fixed. 52 training 10 validation use_pretrained_word_embedding = True # fixed. use glove 300d embedding_vecor_length = 300 # fixed. use glove 300d # 115 # fixed for MOSI. The max length of a segment in MOSI dataset is 114 max_segment_len = config['seqlength'] end_to_end = True # fixed word2ix = loader.load_word2ix() word_embedding = [loader.load_word_embedding() ] if use_pretrained_word_embedding else None train, valid, test = loader.load_word_level_features( max_segment_len, tr_split) ix2word = inv_map = {v: k for k, v in word2ix.iteritems()} print len(word2ix) print len(ix2word) print word_embedding[0].shape feature_str = '' if args.feature_selection: with open('/media/bighdd5/Paul/mosi/fs_mask.pkl') as f: [covarep_ix, facet_ix] = pickle.load(f) facet_train = train['facet'][:, :, facet_ix] facet_valid = valid['facet'][:, :, facet_ix] facet_test = test['facet'][:, :, facet_ix] covarep_train = train['covarep'][:, :, covarep_ix] covarep_valid = valid['covarep'][:, :, covarep_ix] covarep_test = test['covarep'][:, :, covarep_ix] feature_str = '_t' + str(embedding_vecor_length) + '_c' + str( covarep_test.shape[2]) + '_f' + str(facet_test.shape[2]) else: facet_train = train['facet'] facet_valid = valid['facet'] covarep_train = train['covarep'][:, :, 1:35] covarep_valid = valid['covarep'][:, :, 1:35] facet_test = test['facet'] covarep_test = test['covarep'][:, :, 1:35] text_train = train['text'] text_valid = valid['text'] text_test = test['text'] y_train = train['label'] y_valid = valid['label'] y_test = test['label'] lengths_train = train['lengths'] lengths_valid = valid['lengths'] lengths_test = test['lengths'] #f = h5py.File("out/mosi_lengths_test.hdf5", "w") #f.create_dataset('d1',data=lengths_test) #f.close() #assert False facet_train_max = np.max(np.max(np.abs(facet_train), axis=0), axis=0) facet_train_max[facet_train_max == 0] = 1 #covarep_train_max = np.max(np.max(np.abs(covarep_train), axis =0),axis=0) #covarep_train_max[covarep_train_max==0] = 1 facet_train = facet_train / facet_train_max facet_valid = facet_valid / facet_train_max #covarep_train = covarep_train / covarep_train_max facet_test = facet_test / facet_train_max #covarep_test = covarep_test / covarep_train_max text_input = Input(shape=(max_segment_len, ), dtype='int32', name='text_input') text_eb_layer = Embedding(word_embedding[0].shape[0], embedding_vecor_length, input_length=max_segment_len, weights=word_embedding, name='text_eb_layer', trainable=False)(text_input) model = Model(text_input, text_eb_layer) text_train_emb = model.predict(text_train) print text_train_emb.shape # n x seq x 300 print covarep_train.shape # n x seq x 5/34 print facet_train.shape # n x seq x 20/43 X_train = np.concatenate((text_train_emb, covarep_train, facet_train), axis=2) text_valid_emb = model.predict(text_valid) print text_valid_emb.shape # n x seq x 300 print covarep_valid.shape # n x seq x 5/34 print facet_valid.shape # n x seq x 20/43 X_valid = np.concatenate((text_valid_emb, covarep_valid, facet_valid), axis=2) text_test_emb = model.predict(text_test) print text_test_emb.shape # n x seq x 300 print covarep_test.shape # n x seq x 5/34 print facet_test.shape # n x seq x 20/43 X_test = np.concatenate((text_test_emb, covarep_test, facet_test), axis=2) return X_train, y_train, X_valid, y_valid, X_test, y_test