/
usage_model_2.py
149 lines (124 loc) · 4.86 KB
/
usage_model_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
import os,random
import numpy as np
from keras.utils import np_utils
import keras.models as models
from keras.layers import Input,merge
from keras.layers.core import Reshape,Dense,Dropout,Activation,Flatten
from keras.layers.advanced_activations import LeakyReLU
from keras.activations import *
from keras.layers.wrappers import TimeDistributed
from keras.layers.noise import GaussianNoise
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D, UpSampling2D
from keras.layers.recurrent import LSTM
from keras.regularizers import *
from keras.layers.normalization import *
from keras.optimizers import *
import matplotlib.pyplot as plt
import seaborn as sns
import random, sys, keras
from keras.models import Model,load_model
from keras.utils import np_utils
from tqdm import tqdm
from preprocessing import *
from model2_pipeline import *
from pprint import pprint as pr
import pickle
CONTEXT_WINDOW_SIZE = 3
VECTOR_DIMENSION=100
MAX_LENGTH=100
def extract_key_words(sentence):
words = preprocess_on_review_content(sentence)
keys=["不错"]
class_number=find_class_number()
while len(keys)<=2:
new_keyword = get_new_keyword(words,keys)
keys.append( new_keyword )
return keys
def get_new_keyword(words,keys):
# construct words and keys as input vectos
DEFAULT_MISSING_INDEX=str(len(word_map)+1)
vectors[DEFAULT_MISSING_INDEX]=np.zeros(VECTOR_DIMENSION,dtype=float)
X = list()
Y = list()
vectors_of_words = [ vectors[word_map[it]] for it in words ]
x1 = np.zeros( (len(vectors_of_words),VECTOR_DIMENSION), dtype=float)
for count,word in enumerate(vectors_of_words):
x1[count]+=np.asarray(word,dtype=float)
for count,target_word in enumerate(keys):
x2_temp = [ np.array(vectors[word_mapping(it,word_map)], dtype=float) for it in keys[count:0:-1] ][:CONTEXT_WINDOW_SIZE]
x2 = np.zeros( (CONTEXT_WINDOW_SIZE,VECTOR_DIMENSION),dtype=float)
for c,it in enumerate(x2_temp):
x2[c]+=x2_temp[c]
target_index=int(word_mapping(target_word,word_map))
X.append((x2,x1 ))
if(len(keys)==0):
x2 = np.zeros( (CONTEXT_WINDOW_SIZE,VECTOR_DIMENSION),dtype=float)
X.append( (x2,x1))
X=np.array(X)
X_o,X_s = construct_input_data(X)
result = model.predict({'state_context_input': X_s, 'observation_context_input': X_o},batch_size=32,verbose=1)
# print(result)
t=result.argmax(1)
s = index_to_word[str(t[0])]
return s
def get_index(x,x_s):
for c,it in enumerate(x_s):
if x==it:
return c
else:
continue
return 0
def conditiaonal_probability(target_words,sentence,keywords,model):
sentence = preprocess_on_review_content("".join(sentence))
word_map,vectors=load_meta_model()
DEFAULT_MISSING_INDEX=str(len(word_map)+1)
vectors[DEFAULT_MISSING_INDEX]=np.zeros(VECTOR_DIMENSION,dtype=float)
X = list()
vectors_of_words = [vectors[word_map[it]] for it in sentence ]
x1 = np.zeros( (len(vectors_of_words),VECTOR_DIMENSION), dtype=float)
for count,word in enumerate(vectors_of_words):
x1[count]+=np.asarray(word,dtype=float)
x2_temp = [ np.array(vectors[word_mapping(it,word_map)], dtype=float) for it in keywords[count:0:-1] ][:CONTEXT_WINDOW_SIZE]
x2 = np.zeros( (CONTEXT_WINDOW_SIZE,VECTOR_DIMENSION),dtype=float)
for c,it in enumerate(x2_temp):
x2[c]+=x2_temp[c]
for target_word in target_words:
try:
x3 = np.array( vectors[word_mapping(target_word,word_map)] )
x3 = np.array(x3)
c= get_index(target_word,sentence)
x1_before,x1_current,x1_after = padding_zeros(x1,index=c,window_size=5,zero_shape=(100))
X.append(np.vstack((x2,x1_before,x1_current,x1_after,x3)))
except:
print(c)
print(x2.shape)
print(x1_after.shape)
print(x1_current.shape)
print(x1_before.shape)
print(x3.shape)
X=np.array(X)
# X,Y=construct_train_data_raw([sentence],[keys])
X_o,X_s,X_current = construct_input_data(X)
predictions=model.predict({'state_context_input': X_s, 'observation_context_input': X_o,'current_input':X_current},
batch_size=32,verbose=2)
return predictions
if __name__ == "__main__":
word_map,vectors=load_meta_model()
index_to_word= { item:key for key,item in word_map.items()}
records=load_labeled_data("./data/tmp.json")
model=load_model("./model/dis_enc_model.model")
for i in range(10):
record=records[i]
sentence = record["content"]
print("raw sentence is ")
print("".join(sentence))
real_keys=record["key"]
print("real keywords as")
print(real_keys)
words = sentence
pro=conditiaonal_probability(words,sentence,[],model)
pr(pro)
quit()