Exemplo n.º 1
0
def build_model(input_video, input_stories, input_question, input_answer, 
			v2i,w2v_model,pca_mat=None,d_w2v=300,d_lproj=300,
			answer_index = None, lr=0.01, question_guided=False):


	with tf.variable_scope('video_subtitle_hierarchical_frame_clip') as scope:
		

		T_B, T_w2v, T_mask, pca_mat_ = ModelUtil.setWord2VecModelConfiguration(v2i,w2v_model,d_w2v,d_lproj)
		# encode question
		embedded_question_words, mask_q = ModelUtil.getEmbeddingWithWord2Vec(input_question, T_w2v, T_mask)
		embedded_question = HSEModelUtil.getAverageRepresentation(embedded_question_words,T_B,d_lproj)

		# encode stories
		embedded_stories_words, mask_s = ModelUtil.getEmbeddingWithWord2Vec(input_stories, T_w2v, T_mask)
		# embedded_stories = ModelUtil.getMemoryNetworks(embedded_stories_words, embedded_question, d_lproj, T_B=T_B, return_sequences=True)
		embeded_stories = HSEModelUtil.getAverageRepresentation(embedded_stories_words, T_B, d_lproj)
		# encode video
		# embedded_video = HHSEModelUtil.getVideoDualSemanticEmbedding(input_video, T_w2v, embedded_stories, T_B, pca_mat=pca_mat) # batch x timesteps x d_w2v
		embedded_video = HSEModelUtil.getVideoDualSemanticEmbeddingWithQuestionAttention(input_video, T_w2v, embeded_stories, embedded_question, T_B, pca_mat=pca_mat) # batch x timesteps x d_w2v


		# encode answers
		embedded_answer_words, mask_a = ModelUtil.getEmbeddingWithWord2Vec(input_answer, T_w2v, T_mask)
		embedded_answer = HSEModelUtil.getAverageRepresentation(embedded_answer_words,T_B,d_lproj)

		# get video loss
		video_loss,video_scores = ModelUtil.getClassifierLoss(embedded_video, embedded_question, embedded_answer, answer_index=answer_index)

		# train module
		loss = tf.reduce_mean(video_loss)
		# acc_value = tf.metrics.accuracy(y, embedded_question)
		optimizer = tf.train.GradientDescentOptimizer(lr)
		train = optimizer.minimize(loss)
		return train,loss,video_scores
Exemplo n.º 2
0
def build_model(input_stories,
                input_question,
                size_voc,
                word_embedding_size,
                sentence_embedding_size,
                input_answer,
                common_space_dim,
                answer_index=None,
                lr=0.01,
                isTest=False):

    with tf.variable_scope('share_embedding_matrix') as scope:

        # encode question
        embeded_question_words, mask_q = ModelUtil.getEmbedding(
            input_question, size_voc, word_embedding_size)
        embeded_question = ModelUtil.getQuestionEncoder(
            embeded_question_words, sentence_embedding_size, mask_q)

        scope.reuse_variables()
        # encode stories
        embeded_stories_words, mask_s = ModelUtil.getAnswerEmbedding(
            input_stories, size_voc, word_embedding_size)
        embeded_stories = ModelUtil.getMemoryNetworks(embeded_stories_words,
                                                      embeded_question, mask_s)

        # encode answers
        embeded_answer_words, mask_a = ModelUtil.getAnswerEmbedding(
            input_answer, size_voc, word_embedding_size)
        embeded_answer = ModelUtil.getAnswerEncoder(embeded_answer_words,
                                                    sentence_embedding_size,
                                                    mask_a)

        # T_s, T_q, T_a = ModelUtil.getMultiModel(embeded_stories, embeded_question, embeded_answer, common_space_dim)

        if not isTest:
            # loss = ModelUtil.getTripletLoss(T_s, T_q, T_a, y)
            loss, scores = ModelUtil.getRankingLoss(embeded_stories,
                                                    embeded_question,
                                                    embeded_answer,
                                                    answer_index=answer_index,
                                                    isTest=isTest)

            # train module
            loss = tf.reduce_mean(loss)
            # acc_value = tf.metrics.accuracy(y, embeded_question)
            optimizer = tf.train.GradientDescentOptimizer(lr)
            train = optimizer.minimize(loss)
            return train, loss, scores
        else:
            scores = ModelUtil.getRankingLoss(embeded_stories,
                                              embeded_question,
                                              embeded_answer,
                                              answer_index=answer_index,
                                              isTest=isTest)
            return scores
Exemplo n.º 3
0
 def CaculateErrorRate(session,dataList,labels):
     data_size = dataList.shape[0]
     errorCount = 0;
     for step in xrange(int(data_size / BATCH_SIZE)):
         offset = (step * BATCH_SIZE)
         batch_data = dataList[offset:(offset + BATCH_SIZE), :, :, :]
         batch_labels = labels[offset:(offset + BATCH_SIZE)]
         feed_dict = {validation_data_node: batch_data,
                      validation_labels_node: batch_labels}
         validation_prediction_result = session.run(validation_prediction,feed_dict=feed_dict)
         errorCount += ModelUtil.error_count(validation_prediction_result,batch_labels)
     return  errorCount *100.0/ data_size     
def build_model_with_linearProj(input_stories, input_question, input_answer, v2i, w2v, 
			answer_index=None, lr=0.01,
			d_w2v=300, d_lproj=300,
			isTest=False):


	with tf.variable_scope('share_embedding_matrix') as scope:
		
		T_B, T_w2v, T_mask, pca_mat = ModelUtil.setWord2VecModelConfiguration(v2i,w2v,d_w2v,d_lproj)
		# encode question
		embeded_question_words, mask_q = ModelUtil.getEmbeddingWithWord2Vec(input_question, T_w2v, T_mask)
		embeded_question = ModelUtil.getAverageRepresentation(embeded_question_words,T_B,d_lproj)

		scope.reuse_variables()
		# encode stories
		embeded_stories_words, mask_s = ModelUtil.getEmbeddingWithWord2Vec(input_stories, T_w2v, T_mask)
		embeded_stories = ModelUtil.getMemoryNetworks(embeded_stories_words, embeded_question, d_lproj, T_B=T_B)

		# encode answers
		embeded_answer_words, mask_a = ModelUtil.getEmbeddingWithWord2Vec(input_answer, T_w2v, T_mask)
		embeded_answer = ModelUtil.getAverageRepresentation(embeded_answer_words,T_B,d_lproj)

		# T_s, T_q, T_a = ModelUtil.getMultiModel(embeded_stories, embeded_question, embeded_answer, common_space_dim)
		

		if not isTest:
			# loss = ModelUtil.getTripletLoss(T_s, T_q, T_a, y)
			# loss,scores = ModelUtil.getRankingLoss(embeded_stories, embeded_question, embeded_answer, answer_index=answer_index,isTest=isTest)
			loss,scores = ModelUtil.getClassifierLoss(embeded_stories, embeded_question, embeded_answer, answer_index=answer_index,isTest=isTest)
			

			
			# train module
			loss = tf.reduce_mean(loss)
			# acc_value = tf.metrics.accuracy(y, embeded_question)
			optimizer = tf.train.GradientDescentOptimizer(lr)
			train = optimizer.minimize(loss)
			return train,loss,scores
		else:
			scores = ModelUtil.getRankingLoss(embeded_stories, embeded_question, embeded_answer, answer_index=answer_index,isTest=isTest)
			return scores
Exemplo n.º 5
0
 def CaculateErrorRate(session,dataList,labels):
     data_size = dataList.shape[0]
     errorCount = 0;
     for step in xrange(int(data_size / 100)):
         offset = (step * 100)
         batch_data = dataList[offset:(offset + 100)]
         batch_text_data_vector = TextVectorUtil.BuildText2DimArray(batch_data,tokenDict)
         batch_labels = labels[offset:(offset + 100)]
         feed_dict={X: batch_text_data_vector, Y: batch_labels,p_keep_input: 1.0,p_keep_hidden: 1.0}
         # Run the graph and fetch some of the nodes.
         #print batch_data.shape
         #print batch_labels.shape
         #print train_labels
         validation_prediction_result = session.run(validation_x,feed_dict=feed_dict)
         errorCount += ModelUtil.error_count(validation_prediction_result,batch_labels)
     return  errorCount *100.0/ data_size
Exemplo n.º 6
0
 def CaculateErrorRate(session,dataList, labels):
     data_size = dataList.shape[0]
     errorCount = 0;
     for step in xrange(int(data_size / BATCH_SIZE)):
         offset = (step * BATCH_SIZE)
         batch_data_image = dataList[offset:(offset + BATCH_SIZE), :, :, :]
         batch_labels = labels[offset:(offset + BATCH_SIZE)]
         
         batch_text_data = train_tokens_list[offset:(offset + BATCH_SIZE)]
         batch_text_data_vector = TextVectorUtil.BuildText2DimArray(batch_text_data,tokenDict)
         feature_values = s.run(model_1_features,feed_dict={train_data_node_model_1:batch_data_image})
         batch_data = numpy.append(feature_values,batch_text_data_vector,1)
         
         feed_dict = {validation_data_node: batch_data,
                      validation_labels_node: batch_labels}
         validation_prediction_result = session.run(validation_prediction,feed_dict=feed_dict)
         errorCount += ModelUtil.error_count(validation_prediction_result,batch_labels)
     return  errorCount *100.0/ data_size
Exemplo n.º 7
0
def build_model(input_video,
                input_question,
                input_answer,
                v2i,
                w2v_model,
                pca_mat=None,
                d_w2v=300,
                d_lproj=300,
                answer_index=None,
                lr=0.01):

    with tf.variable_scope('share_embedding_matrix') as scope:

        T_B, T_w2v, T_mask, pca_mat_ = ModelUtil.setWord2VecModelConfiguration(
            v2i, w2v_model, d_w2v, d_lproj)
        # encode question
        embeded_question_words, mask_q = ModelUtil.getEmbeddingWithWord2Vec(
            input_question, T_w2v, T_mask)
        embeded_question = ModelUtil.getAverageRepresentation(
            embeded_question_words, T_B, d_lproj)

        embeded_video = ModelUtil.getVideoSemanticEmbedding(
            input_video, T_w2v, T_B,
            pca_mat=pca_mat)  # batch x timesteps x d_w2v

        embeded_answer_words, mask_a = ModelUtil.getEmbeddingWithWord2Vec(
            input_answer, T_w2v, T_mask)
        embeded_answer = ModelUtil.getAverageRepresentation(
            embeded_answer_words, T_B, d_lproj)

        loss, scores = ModelUtil.getClassifierLoss(embeded_video,
                                                   embeded_question,
                                                   embeded_answer,
                                                   answer_index=answer_index)

        # train module
        loss = tf.reduce_mean(loss)
        # acc_value = tf.metrics.accuracy(y, embeded_question)
        optimizer = tf.train.GradientDescentOptimizer(lr)
        train = optimizer.minimize(loss)
        return train, loss, scores
Exemplo n.º 8
0
def main():

    size_voc = 10

    video_feature_dims = 100
    timesteps_v = 10  # sequences length for video
    timesteps_q = 11  # sequences length for question
    timesteps_a = 12  # sequences length for anwser
    numberOfChoices = 2  # for input choices, one for correct, one for wrong answer

    word_embedding_size = 10
    sentence_embedding_size = 20
    visual_embedding_dims = 25

    common_space_dim = 30

    print('test..')
    with tf.variable_scope('share_embedding_matrix') as scope:
        input_video = tf.placeholder(tf.float32,
                                     shape=(None, timesteps_v,
                                            video_feature_dims),
                                     name='input_video')
        input_question = tf.placeholder(tf.int32,
                                        shape=(None, timesteps_q),
                                        name='input_question')
        input_answer = tf.placeholder(tf.int32,
                                      shape=(None, numberOfChoices,
                                             timesteps_a),
                                      name='input_answer')

        y = tf.placeholder(tf.float32, shape=(None, numberOfChoices))

        embeded_video = ModelUtil.getVideoEncoder(input_video,
                                                  visual_embedding_dims)

        embeded_question_words, mask_q = ModelUtil.getEmbedding(
            input_question, size_voc, word_embedding_size)
        embeded_question = ModelUtil.getQuestionEncoder(
            embeded_question_words, sentence_embedding_size, mask_q)

        scope.reuse_variables()
        embeded_answer_words, mask_a = ModelUtil.getAnswerEmbedding(
            input_answer, size_voc, word_embedding_size)
        embeded_answer = ModelUtil.getAnswerEncoder(embeded_answer_words,
                                                    sentence_embedding_size,
                                                    mask_a)

        T_v, T_q, T_a = ModelUtil.getMultiModel(embeded_video,
                                                embeded_question,
                                                embeded_answer,
                                                common_space_dim)
        loss = ModelUtil.getTripletLoss(T_v, T_q, T_a, y)

        # train module
        loss = tf.reduce_mean(loss)
        # acc_value = tf.metrics.accuracy(y, embeded_question)
        optimizer = tf.train.GradientDescentOptimizer(0.01)
        train = optimizer.minimize(loss)

    # runtime environment
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    with sess.as_default():
        for i in range(10000):
            batch_size = 64
            data_v = np.random.random(
                (batch_size, timesteps_v, video_feature_dims))
            data_q = np.random.randint(0,
                                       10,
                                       size=(batch_size, timesteps_q),
                                       dtype='int32')
            data_a = np.random.randint(0,
                                       10,
                                       size=(batch_size, numberOfChoices,
                                             timesteps_a),
                                       dtype='int32')

            data_y = np.zeros((batch_size, numberOfChoices), dtype='float32')
            data_y[:, 1] = 1.0
            _, l = sess.run(
                [train, loss],
                feed_dict={
                    input_video: data_v,
                    input_question: data_q,
                    input_answer: data_a,
                    y: data_y
                })
            print(l)
Exemplo n.º 9
0
def build_model(input_video,
                input_stories,
                input_question,
                input_answer,
                v2i,
                w2v_model,
                pca_mat=None,
                d_w2v=300,
                d_lproj=300,
                answer_index=None,
                lr=0.01,
                isTest=False,
                question_guided=False):

    with tf.variable_scope('video_subtitle_hierarchical_frame') as scope:

        T_B, T_w2v, T_mask, pca_mat_ = ModelUtil.setWord2VecModelConfiguration(
            v2i, w2v_model, d_w2v, d_lproj)
        # encode question
        embedded_question_words, mask_q = ModelUtil.getEmbeddingWithWord2Vec(
            input_question, T_w2v, T_mask)
        embedded_question = HSEModelUtil.getAverageRepresentation(
            embedded_question_words, T_B, d_lproj)

        # encode stories
        # embedded_stories_words, mask_s = ModelUtil.getEmbeddingWithWord2Vec(input_stories, T_w2v, T_mask)
        # embedded_stories = ModelUtil.getMemoryNetworks(embedded_stories_words, embedded_question, d_lproj, T_B=T_B, return_sequences=True)

        # encode video
        # embedded_video = HSEModelUtil.getVideoDualSemanticEmbedding(input_video, T_w2v, embedded_stories, T_B, pca_mat=pca_mat) # batch x timesteps x d_w2v
        # print('pca_mat:',pca_mat)
        # embedded_video = HSEModelUtil.getVideoSemanticEmbedding(input_video, T_w2v, T_B, pca_mat=pca_mat)

        seqvlad = SeqVladModel.SeqVladWithReduAttentionModel(input_video,
                                                             d_w2v=d_w2v,
                                                             reduction_dim=512,
                                                             centers_num=64,
                                                             filter_size=3)
        vlad_feature = seqvlad.build_model()

        # encode answers
        embedded_answer_words, mask_a = ModelUtil.getEmbeddingWithWord2Vec(
            input_answer, T_w2v, T_mask)
        embedded_answer = HSEModelUtil.getAverageRepresentation(
            embedded_answer_words, T_B, d_lproj)

        video_loss, video_scores = ModelUtil.getClassifierLoss(
            vlad_feature,
            embedded_question,
            embedded_answer,
            answer_index=answer_index)

        if isTest:
            # get video loss
            video_scores = ModelUtil.getClassifierLoss(
                vlad_feature,
                embedded_question,
                embedded_answer,
                answer_index=answer_index,
                isTest=isTest)

            return video_scores
        else:
            # train module
            loss = tf.reduce_mean(video_loss)
            optimizer = tf.train.GradientDescentOptimizer(lr)
            train = optimizer.minimize(loss)
            return train, loss, video_scores
Exemplo n.º 10
0
                       X.shape[1])
    #Y = tsne.FitNumpy(X)   # simple way, but slow for large file.
    return np.fromiter(Y, float).reshape(X.shape[0], -1)


def ReduceByPca(X, pcaNumber=50):
    pca = TsneDx.FastPca()
    X = X.astype(np.float32)
    X1 = pca.DoPcaBuffer(X.__array_interface__['data'][0], X.shape[0],
                         X.shape[1], pcaNumber)
    return np.fromiter(X1, float).reshape(X.shape[0], -1)


#=================================

log = ModelUtil.Logger()

print('Loading data from VisuMap...')
X = log.LoadTable(dsName='+')
if len(X) == 0: X = log.LoadTable(dsName='@')
print('Loaded table ', X.shape)

pcaNr = -100
if pcaNr > 0:
    print('Doing PCA-Reduction on table ', X.shape)
    X = ReduceByPca(X, pcaNumber=pcaNr)
    print('Data reduced to: ', X.shape)

print('Fitting table ', X.shape)
t0 = time.time()
Y = DoTsneMap(X, perplexityRatio=0.025, maxEpochs=100, outDim=2, metricType=0)
Exemplo n.º 11
0
import numpy as np
import ModelUtil as mu
import tensorflow as tf
import tensorflow.compat.v1 as tf1

co = mu.CmdOptions()
md = mu.ModelBuilder(job=co.job)
#D = np.load('sphere.npy')
D = np.load('sphere2.npy')
N, yDim = D.shape[0], D.shape[1]
md.r0, md.decay, md.batchSize, layers, L, repDim = 0.001, 0.99, 100 * co.jj, 3 * [
    24
], 200, 8

md.InitModel(0, yDim)
R = tf.Variable(np.random.uniform(0, 0.1, [L, repDim]).astype(np.float32))
md.inputHod = tf1.placeholder(tf.int32, shape=[None], name='InputHolder')
md.top = tf.gather(R, md.inputHod)
md.AddLayers(layers)
md.AddLayers(yDim, activation=tf.nn.sigmoid)
md.AddScalingTo(D)

batchDist = tf.reduce_sum(tf.square(md.top - md.Label()), axis=1)
md.cost = batchDist[tf.argmin(batchDist)]
md.SetAdamOptimizer(co.epochs, N)
md.InitAllVariables()

for md.lastEpoch in range(1, co.epochs + 1):
    md.lastError = 0.0
    for row in range(N):
        inTensor = np.random.randint(0, L, size=(md.batchSize))
Exemplo n.º 12
0
def train():
    tokenDict = TextVectorUtil.GetAllTokenDict('../../data/all_trainning_tokens.csv')
    all_labels = DataUtil.LoadAllLabels('../../category_name_id_map.csv')
    train_data,train_labels = DataUtil.LoadTextTokenList('../../data/trainning_data.csv')
    
    validation_data,validation_labels = DataUtil.LoadTextTokenList('../../data/validation_data.csv')
    
    train_size = train_data.shape[0]
    validation_size = validation_data.shape[0]
    
    
    train_labels = [item[0] for item in train_labels]
    # Convert labels to softmax matrix
    label_list = np.ndarray(shape=[train_size], dtype=np.float32)
    for index in range(train_size):
        label_list[index] = np.where(all_labels == train_labels[index])[0][0]
    
    train_labels = (np.arange(len(all_labels)) == label_list[:, None]).astype(np.float32)
    
    validation_labels = [item[0] for item in validation_labels]
    # Convert labels to softmax matrix
    label_list = np.ndarray(shape=[validation_size], dtype=np.float32)
    for index in range(validation_size):
        label_list[index] = np.where(all_labels == validation_labels[index])[0][0]
    
    validation_labels = (np.arange(len(all_labels)) == label_list[:, None]).astype(np.float32)

    w_h = init_weights([len(tokenDict), 400])
    w_h2 = init_weights([400, 400])
    w_o = init_weights([400, len(all_labels)])

    X = tf.placeholder("float", [None, len(tokenDict)])
    Y = tf.placeholder("float", [None, len(all_labels)])
    
    p_keep_input = tf.placeholder("float")
    p_keep_hidden = tf.placeholder("float")
    
    py_x = model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden,True)
    validation_x = model(X, w_h, w_h2, w_o, p_keep_input, p_keep_hidden,False)
    
    def CaculateErrorRate(session,dataList,labels):
        data_size = dataList.shape[0]
        errorCount = 0;
        for step in xrange(int(data_size / 100)):
            offset = (step * 100)
            batch_data = dataList[offset:(offset + 100)]
            batch_text_data_vector = TextVectorUtil.BuildText2DimArray(batch_data,tokenDict)
            batch_labels = labels[offset:(offset + 100)]
            feed_dict={X: batch_text_data_vector, Y: batch_labels,p_keep_input: 1.0,p_keep_hidden: 1.0}
            # Run the graph and fetch some of the nodes.
            #print batch_data.shape
            #print batch_labels.shape
            #print train_labels
            validation_prediction_result = session.run(validation_x,feed_dict=feed_dict)
            errorCount += ModelUtil.error_count(validation_prediction_result,batch_labels)
        return  errorCount *100.0/ data_size
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, Y))
    train_op = tf.train.RMSPropOptimizer(0.01, 0.95).minimize(cost)
    #predict_op = tf.argmax(py_x, 1)
    predict_op = tf.nn.softmax(py_x)
    
    sess = tf.Session()
    init = tf.initialize_all_variables()
    sess.run(init)
    
    batch_size = 100
    for x in range(10):
        for i in range(train_size / batch_size):
            offset = (i * batch_size) % (train_size - batch_size)
            current_batch_data = train_data[offset:(offset + batch_size)]
            batch_text_data_vector = TextVectorUtil.BuildText2DimArray(current_batch_data,tokenDict)
            batch_labels = train_labels[offset:(offset + batch_size)]
            #print batch_text_data_vector
            loss, prediction,_ = sess.run([cost,predict_op,train_op], feed_dict={X: batch_text_data_vector, 
                                                            Y: batch_labels,
                                                            p_keep_input: 1.0,
                                                            p_keep_hidden: 1.0})
            #print prediction     
            #print batch_labels
            print 'Loss, %.3f' % loss
            print 'predict %.3f' % ModelUtil.error_rate(prediction,batch_labels)
            
            if (x*(train_size / batch_size) + i) % 200 ==0 and (x*(train_size / batch_size) + i)!=0:
                print 'validation error:%.4f' % CaculateErrorRate(sess,validation_data,validation_labels)
    sess.close()       
Exemplo n.º 13
0
def build_model(input_video,
                input_stories,
                input_question,
                input_answer,
                v2i,
                w2v_model,
                pca_mat=None,
                d_w2v=300,
                d_lproj=300,
                answer_index=None,
                lr=0.01,
                question_guided=False):

    with tf.variable_scope('share_embedding_matrix') as scope:

        T_B, T_w2v, T_mask, pca_mat = ModelUtil.setWord2VecModelConfiguration(
            v2i, w2v_model, d_w2v, d_lproj)
        # encode question
        embeded_question_words, mask_q = ModelUtil.getEmbeddingWithWord2Vec(
            input_question, T_w2v, T_mask)
        embeded_question = ModelUtil.getAverageRepresentation(
            embeded_question_words, T_B, d_lproj)

        # encode video
        if question_guided:
            embeded_video = SEModelUtil.getVideoQuestionGuidedSemanticEmbedding(
                input_video, embeded_question, T_w2v, T_B, pca_mat=pca_mat)
        else:
            embeded_video = ModelUtil.getVideoSemanticEmbedding(
                input_video, T_w2v, T_B,
                pca_mat=pca_mat)  # batch x timesteps x d_w2v

        # encode stories
        embeded_stories_words, mask_s = ModelUtil.getEmbeddingWithWord2Vec(
            input_stories, T_w2v, T_mask)
        embeded_stories = ModelUtil.getMemoryNetworks(embeded_stories_words,
                                                      embeded_question,
                                                      d_lproj,
                                                      T_B=T_B)

        # encode answers
        embeded_answer_words, mask_a = ModelUtil.getEmbeddingWithWord2Vec(
            input_answer, T_w2v, T_mask)
        embeded_answer = ModelUtil.getAverageRepresentation(
            embeded_answer_words, T_B, d_lproj)

        # get video loss
        video_loss, video_scores = ModelUtil.getClassifierLoss(
            embeded_video,
            embeded_question,
            embeded_answer,
            answer_index=answer_index)

        # get subtitle loss
        subtitle_loss, subtitle_scores = ModelUtil.getClassifierLoss(
            embeded_stories,
            embeded_question,
            embeded_answer,
            answer_index=answer_index)

        # late fussion
        loss = 1.0 * (video_loss + subtitle_loss) / 2

        scores = 1.0 * (video_scores + subtitle_scores) / 2

        # train module
        loss = tf.reduce_mean(loss)
        # acc_value = tf.metrics.accuracy(y, embeded_question)
        optimizer = tf.train.GradientDescentOptimizer(lr)
        train = optimizer.minimize(loss)
        return train, loss, scores
Exemplo n.º 14
0
import hdbscan
import ModelUtil as mu
import numpy as np

vm = mu.Logger()
pos = vm.LoadTable('$')
minCluster = 10
minSamples = 10
print('HDBSCAN Clustering!   Loaded table: ', pos.shape)
cluster = hdbscan.HDBSCAN(min_cluster_size=minCluster, min_samples=minSamples)
labels = cluster.fit_predict(pos)
vm.UpdateLabels(labels)

noises = np.count_nonzero(labels == -1)
clusters = len(np.unique(labels))
vm.RunScript("vv.Title='Clusters: %d; Noise: %d, minSz/Sp: %d/%d'" %
             (clusters, noises, minCluster, minSamples))
Exemplo n.º 15
0
def main_1(argv=None):  # pylint: disable=unused-argument
    
    
    train_data, train_tokens_list,train_labels = DataUtil.LoadCategoryData('../data/trainning_data.csv','../'+NAME_ID_MAPPING_NAME,'../data/100_100',imageInfo)
    validation_data, validation_tokens_list,validation_labels = DataUtil.LoadCategoryData('../data/validation_data.csv','../'+NAME_ID_MAPPING_NAME,'../data/100_100',imageInfo)
    test_data, test_tokens_list,test_labels = DataUtil.LoadCategoryData('../data/test_data.csv','../'+NAME_ID_MAPPING_NAME,'../data/100_100',imageInfo)

    print train_labels
    validation_size = validation_data.shape[0]
    test_size = test_data.shape[0]
    train_size = train_data.shape[0]

    print "train_labels",train_labels.shape
    
    tokenDict = TextVectorUtil.GetAllTokenDict('../data/all_trainning_tokens.csv')
    
    tokenCount = len(tokenDict)
    
    labelCount = train_labels.shape[1]
    
    num_epochs = NUM_EPOCHS
   
    # This is where training samples and labels are fed to the graph.
    # These placeholder nodes will be fed a batch of training data at each
    # training step using the {feed_dict} argument to the Run() call below.
    train_data_node = tf.placeholder(tf.float32, shape=[None, imageInfo['WIDTH'], imageInfo['HEIGHT'], imageInfo['CHANNELS']])
    
    train_text_node = tf.placeholder(tf.float32,shape=(BATCH_SIZE, tokenCount))
    
    train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, labelCount))

    validation_data_node = tf.placeholder(tf.float32,shape=(BATCH_SIZE, imageInfo['WIDTH'], imageInfo['HEIGHT'], imageInfo['CHANNELS']))
    validation_text_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, tokenCount))
        
    validation_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, labelCount))
    
    test_data_node = tf.placeholder(tf.float32,shape=(BATCH_SIZE, imageInfo['WIDTH'], imageInfo['HEIGHT'], imageInfo['CHANNELS']))
    test_text_node = tf.placeholder(tf.float32,shape=(BATCH_SIZE, tokenCount))
        
    check_data_node = tf.placeholder(tf.float32, shape=(1, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS), name='check_data_node')
    check_text_node = tf.placeholder(tf.float32,shape=(1, tokenCount))
    
    # The variables below hold all the trainable weights. They are passed an
    # initial value which will be assigned when when we call:
    # {tf.initialize_all_variables().run()}
    conv1_weights = tf.Variable(
        tf.truncated_normal([5, 5, imageInfo['CHANNELS'], 32],  # 5x5 filter, depth 32.
                            stddev=0.1,
                            seed=SEED), name='conv1_weights')
    conv1_biases = tf.Variable(tf.zeros([32]), name='conv1_biases')
	
    conv2_weights = tf.Variable(
        tf.truncated_normal([5, 5, 32, 64],
                            stddev=0.1,
                            seed=SEED), name='conv2_weights')
    conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]), name='conv2_biases')
    conv2_biases = tf.Variable(tf.constant(0.1, shape=[64]), name='conv2_biases')   
    
    conv3_weights = tf.Variable(
        tf.truncated_normal([5, 5, 64, 128],
                            stddev=0.1,
                            seed=SEED), name='conv3_weights') 
    conv3_biases = tf.Variable(tf.constant(0.1, shape=[128]), name='conv3_biases')
    
    fc1_weights = tf.Variable(  # fully connected, depth 1024.
        tf.truncated_normal([int(imageInfo['WIDTH'] / 8) * int(imageInfo['HEIGHT'] / 8) * 128 + tokenCount, 800],
                            stddev=0.1,
                            seed=SEED), name='fc1_weights')
    fc1_biases = tf.Variable(tf.constant(0.1, shape=[800]), name='fc1_biases')
    fc2_weights = tf.Variable(
        tf.truncated_normal([800, 800],
                            stddev=0.1,
                            seed=SEED), name='fc2_weights')
    fc2_biases = tf.Variable(tf.constant(0.1, shape=[800]), name='fc2_biases')
    
    fc3_weights = tf.Variable(
        tf.truncated_normal([800, labelCount],
                            stddev=0.1,
                            seed=SEED), name='fc3_weights')
    fc3_biases = tf.Variable(tf.constant(0.1, shape=[labelCount]), name='fc3_biases')
    
    # Var list to save
    #varlist = [conv1_weights,conv1_biases,conv2_weights,conv2_biases,fc1_weights,fc1_biases,fc2_weights,fc2_biases]

    # We will replicate the model structure for the training subgraph, as well
    # as the evaluation subgraphs, while sharing the trainable parameters.
    def model(data,text_data, train=False):
        """The Model definition."""
        # 2D convolution, with 'SAME' padding (i.e. the output feature map has
        # the same size as the input). Note that {strides} is a 4D array whose
        # shape matches the data layout: [image index, y, x, depth].
        conv = tf.nn.conv2d(data,
                            conv1_weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        # Bias and rectified linear non-linearity.
        relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
        # Max pooling. The kernel size spec {ksize} also follows the layout of
        # the data. Here we have a pooling window of 2, and a stride of 2.
        pool = tf.nn.max_pool(relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME')
        conv = tf.nn.conv2d(pool,
                            conv2_weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
        pool = tf.nn.max_pool(relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='SAME')
        print pool.get_shape().as_list()
        conv = tf.nn.conv2d(pool,
                            conv3_weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        relu = tf.nn.relu(tf.nn.bias_add(conv, conv3_biases))
        pool = tf.nn.max_pool(relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding='VALID')
                                                            
        # Reshape the feature map cuboid into a 2D matrix to feed it to the
        # fully connected layers.
        pool_shape = pool.get_shape().as_list()
        print pool_shape
        print fc1_weights.get_shape().as_list()
        reshape = tf.reshape(
            pool,
            [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
        #Add text vector into account before fully connected layer
        
        reshape = tf.concat(1,[reshape,text_data])
        
        # Fully connected layer. Note that the '+' operation automatically
        # broadcasts the biases.
        hidden1 = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
        # Add a 50% dropout during training only. Dropout also scales
        # activations such that no rescaling is needed at evaluation time.
        '''
        if train:
            hidden1 = tf.nn.dropout(hidden1, 0.5, seed=SEED)
        '''
        hidden2 = tf.nn.relu(tf.matmul(hidden1, fc2_weights) + fc2_biases)
        '''
        if train:
            hidden2 = tf.nn.dropout(hidden2, 0.5, seed=SEED)
        '''
        return tf.matmul(hidden2, fc3_weights) + fc3_biases      
    def FreezeGraph(sess):
        checkpoint_prefix = os.path.join(MODEL_FOLDER, "saved_checkpoint")
        checkpoint_state_name = "checkpoint_state"
        input_graph_name = "input_graph.pb"
        output_graph_name = "output_graph.pb"

        # We'll create an input graph that has a single variable containing 1.0,
        # and that then multiplies it by 2.
        saver = tf.train.Saver()
        saver.save(sess, checkpoint_prefix, global_step=0,
                        latest_filename=checkpoint_state_name)
        tf.train.write_graph(sess.graph.as_graph_def(), MODEL_FOLDER,input_graph_name)

        # We save out the graph to disk, and then call the const conversion
        # routine.
        input_graph_path = os.path.join(MODEL_FOLDER, input_graph_name)
        input_saver_def_path = ""
        input_binary = False
        input_checkpoint_path = checkpoint_prefix + "-0"
        output_node_names = "check_data_node,check_prediction"
        restore_op_name = "save/restore_all"
        filename_tensor_name = "save/Const:0"
        output_graph_path = os.path.join(MODEL_FOLDER, output_graph_name)
        clear_devices = False

        freeze_graph(input_graph_path, input_saver_def_path,
                                input_binary, input_checkpoint_path,
                                output_node_names, restore_op_name,
                                filename_tensor_name, output_graph_path,
                                clear_devices)
    # Training computation: logits + cross-entropy loss.
    logits = model(train_data_node,train_text_node, True)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
        logits, train_labels_node))

    # L2 regularization for the fully connected parameters.
    regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                    tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
    # Add the regularization term to the loss.
    #loss += 5e-8 * regularizers

    # Optimizer: set up a variable that's incremented once per batch and
    # controls the learning rate decay.
    batch = tf.Variable(0)
    # Decay once per epoch, using an exponential schedule starting at 0.01.
    learning_rate = tf.train.exponential_decay(
        0.003,                # Base learning rate.
        batch * BATCH_SIZE,  # Current index into the dataset.
        train_size,          # Decay step.
        0.95,                # Decay rate.
        staircase=True)
    # Use simple momentum for the optimization.
    optimizer = tf.train.MomentumOptimizer(learning_rate,
                                           0.9).minimize(loss,
                                                         global_step=batch)

    # Predictions for the minibatch, validation set and test set.
    train_prediction = tf.nn.softmax(logits)
    # We'll compute them only once in a while by calling their {eval()} method.
    validation_prediction = tf.nn.softmax(model(validation_data_node,validation_text_node))
    test_prediction = tf.nn.softmax(model(test_data_node,test_text_node))
    
    check_prediction = tf.nn.softmax(model(check_data_node,check_text_node), name="check_prediction")
    # Create a local session to run this computation.
    saver=tf.train.Saver();
    #Save the graph model
    #tf.train.export_meta_graph(filename='./models/producttype/graph.save', as_text=True)
    with tf.Session() as s:
    
        ckpt = tf.train.get_checkpoint_state(os.path.join(MODEL_FOLDER,'with_text'))
        tf.initialize_all_variables().run()
        if ckpt and ckpt.model_checkpoint_path:
            print "find the checkpoing file"
            saver.restore(s, ckpt.model_checkpoint_path)
        else:
            # Run all the initializers to prepare the trainable parameters.
            tf.initialize_all_variables().run()
        #Save the graph model
        tf.train.write_graph(s.graph_def, '', os.path.join(MODEL_FOLDER,'with_text/graph.pb'), as_text=False)

        print 'Initialized!'
        # Loop through training steps.
        for step in xrange(int(num_epochs * train_size / BATCH_SIZE)):
            # Compute the offset of the current minibatch in the data.
            # Note that we could use better randomization across epochs.
            offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
            batch_data = train_data[offset:(offset + BATCH_SIZE), :, :, :]
            batch_text_data = train_tokens_list[offset:(offset + BATCH_SIZE)]
            batch_text_data_vector = TextVectorUtil.BuildText2DimArray(batch_text_data,tokenDict)
            batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
            # This dictionary maps the batch data (as a numpy array) to the
            # node in the graph is should be fed to.
            #print batch_data.shape
            feed_dict = {train_data_node: batch_data,
                         train_text_node: batch_text_data_vector,
                         train_labels_node: batch_labels}
            # Run the graph and fetch some of the nodes.
            #print batch_data.shape
            #print batch_labels.shape
            #print train_labels
            _, l, lr, predictions = s.run(
                [optimizer, loss, learning_rate, train_prediction],
                feed_dict=feed_dict)

            if step % 1 == 0:
                #print s.run(conv1_weights);
                #print s.run(conv2_weights);
                
                #saver.save(s,save_path='../models/producttype/train_result')
                
                print 'Epoch %.2f' % (float(step) * BATCH_SIZE / train_size)
                print 'Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)
                print 'Minibatch error: %.1f%%' % ModelUtil.error_rate(predictions,batch_labels)
            if step % 100 == 0 and step != 0 :                                
                print 'Validation error: %.1f%%' % CaculateErrorRate(s,validation_data,validation_tokens_list,validation_labels)
                sys.stdout.flush()
                
        FreezeGraph(s)
        #saver.save(s,save_path='../models/producttype/train_result')
        # Finally print the result!
        test_error = CaculateErrorRate(s,test_data,test_tokens_list,test_labels)
        print 'Test error: %.1f%%' % test_error
        if FLAGS.self_test:
            print 'test_error', test_error
            assert test_error == 0.0, 'expected 0.0 test_error, got %.2f' % (test_error,)
Exemplo n.º 16
0
def trainModel_2():
    train_data, train_tokens_list,train_labels = DataUtil.LoadCategoryData('../../data/trainning_data.csv','../../'+NAME_ID_MAPPING_NAME,'../../data/100_100',imageInfo)
    validation_data, validation_tokens_list,validation_labels = DataUtil.LoadCategoryData('../../data/validation_data.csv','../../'+NAME_ID_MAPPING_NAME,'../../data/100_100',imageInfo)
    test_data, test_tokens_list,test_labels = DataUtil.LoadCategoryData('../../data/test_data.csv','../../'+NAME_ID_MAPPING_NAME,'../../data/100_100',imageInfo)
    
    validation_size = validation_data.shape[0]
    test_size = test_data.shape[0]
    train_size = train_data.shape[0]
    labelCount = train_labels.shape[1]
    input_d = int(imageInfo['WIDTH'] / 8) * int(imageInfo['HEIGHT'] / 8) * 64 + tokenCount
    
    train_data_node = tf.placeholder(tf.float32, shape=[None, input_d])
    train_labels_node = tf.placeholder(tf.float32, shape=(None, NUM_LABELS))

    validation_data_node = tf.placeholder(tf.float32, shape=[None, input_d])
    validation_labels_node = tf.placeholder(tf.float32,  shape=[None, NUM_LABELS])
    
    logits = model_2(train_data_node,True)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, train_labels_node))

    # L2 regularization for the fully connected parameters.
    regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                    tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
    # Add the regularization term to the loss.
    #loss += 5e-8 * regularizers
    
    train_data_node_model_1 = tf.placeholder(tf.float32, shape=[BATCH_SIZE, imageInfo['WIDTH'], imageInfo['HEIGHT'], imageInfo['CHANNELS']])
    model_1_features = model_1(train_data_node_model_1,True)
    
    # Decay once per epoch, using an exponential schedule starting at 0.01.
    learning_rate = tf.train.exponential_decay(
        0.01,                # Base learning rate.
        batch_2 * BATCH_SIZE,  # Current index into the dataset.
        train_size,          # Decay step.
        0.95,                # Decay rate.
        staircase=True)
    # Use simple momentum for the optimization.
    #optimizer = tf.train.MomentumOptimizer(learning_rate,0.9).minimize(loss,global_step=batch_2)
    optimizer = tf.train.RMSPropOptimizer(0.01, 0.95).minimize(loss,global_step=batch_2)
    # Predictions for the minibatch, validation set and test set.
    train_prediction = tf.nn.softmax(logits)
    validation_prediction = tf.nn.softmax(model_2(validation_data_node))
    
    
    def CaculateErrorRate(session,dataList, labels):
        data_size = dataList.shape[0]
        errorCount = 0;
        for step in xrange(int(data_size / BATCH_SIZE)):
            offset = (step * BATCH_SIZE)
            batch_data_image = dataList[offset:(offset + BATCH_SIZE), :, :, :]
            batch_labels = labels[offset:(offset + BATCH_SIZE)]
            
            batch_text_data = train_tokens_list[offset:(offset + BATCH_SIZE)]
            batch_text_data_vector = TextVectorUtil.BuildText2DimArray(batch_text_data,tokenDict)
            feature_values = s.run(model_1_features,feed_dict={train_data_node_model_1:batch_data_image})
            batch_data = numpy.append(feature_values,batch_text_data_vector,1)
            
            feed_dict = {validation_data_node: batch_data,
                         validation_labels_node: batch_labels}
            validation_prediction_result = session.run(validation_prediction,feed_dict=feed_dict)
            errorCount += ModelUtil.error_count(validation_prediction_result,batch_labels)
        return  errorCount *100.0/ data_size
    restorer = tf.train.Saver(store_list)
    saver=tf.train.Saver(store_list_2);
    with tf.Session() as s: 
        tf.initialize_all_variables().run()
        restorer.restore(s,save_path='./train_result')
        #saver.save(s,save_path='./train_result')
        for step in xrange(int(NUM_EPOCHS * train_size / BATCH_SIZE)):
            # Compute the offset of the current minibatch in the data.
            # Note that we could use better randomization across epochs.
            offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
            batch_data_image = train_data[offset:(offset + BATCH_SIZE), :, :, :]
            batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
            batch_text_data = train_tokens_list[offset:(offset + BATCH_SIZE)]
            batch_text_data_vector = TextVectorUtil.BuildText2DimArray(batch_text_data,tokenDict)
            
            feature_values = s.run(model_1_features,feed_dict={train_data_node_model_1:batch_data_image})
            batch_data = numpy.append(feature_values,batch_text_data_vector,1)
            # This dictionary maps the batch data (as a numpy array) to the
            # node in the graph is should be fed to.
            #print batch_data.shape
            #print 'max value:',numpy.max(feature_values)
            #print 'min value:',numpy.min(feature_values)
            feed_dict = {train_data_node: batch_data,train_labels_node: batch_labels}
            # Run the graph and fetch some of the nodes.
            _, l, lr, predictions = s.run([optimizer, loss, learning_rate, train_prediction],feed_dict=feed_dict)
            #saver.save(s,save_path='./train_result')
            if step % 1 == 0:
                print 'Epoch %.2f' % (float(step) * BATCH_SIZE / train_size)
                print 'Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)
                print 'Minibatch error: %.1f%%' % ModelUtil.error_rate(predictions,batch_labels)
                sys.stdout.flush()
            if step % 100 == 0 and step != 0 :                                
                print 'Validation error: %.1f%%' % CaculateErrorRate(s,validation_data,validation_labels)
                sys.stdout.flush()
                    
        # Finally print the result!
        test_error = CaculateErrorRate(s,test_data,test_labels)
        print 'Test error: %.1f%%' % test_error