def test_f(args, y, output): correct = [0] * args.seq_length total = [0] * args.seq_length if args.label_type == 'one_hot': y_decode = one_hot_decode(y) output_decode = one_hot_decode(output) elif args.label_type == 'five_hot': y_decode = five_hot_decode(y) output_decode = five_hot_decode(output) for i in range(np.shape(y)[0]): y_i = y_decode[i] output_i = output_decode[i] # print(y_i) # print(output_i) class_count = {} for j in range(args.seq_length): if y_i[j] not in class_count: class_count[y_i[j]] = 0 class_count[y_i[j]] += 1 total[class_count[y_i[j]]] += 1 if y_i[j] == output_i[j]: correct[class_count[y_i[j]]] += 1 # set_trace() # return [float(correct[i]) / total[i] if total[i] > 0. else 0. for i in range(1, int(args.seq_length/args.n_classes))] return [ float(correct[i]) / total[i] if total[i] > 0. else 0. for i in range(1, 11) ], total[1:11]
def test_f(args, y, output): correct = [0] * args.seq_length #correctly predicted total = [0] * args.seq_length #total predicted if args.label_type == 'one_hot': y_decode = one_hot_decode(y) #getting the index of the arg max output_decode = one_hot_decode(output) #getting the index of argmax elif args.label_type == 'five_hot': y_decode = five_hot_decode(y) output_decode = five_hot_decode(output) for i in range( np.shape(y)[0] ): #this is iterating through the each predicted example in the batch y_i = y_decode[i] #one y_i have 50 elementns print("Printing the correct classes of the sequence", y_i) output_i = output_decode[i] # print(y_i) # print(output_i) class_count = {} for j in range(args.seq_length): #now for each time step we iterate. print(j) if y_i[j] not in class_count: #get the first class in the starting time step. Check whether the sequence saw it before class_count[y_i[j]] = 0 #start for the that class with sero class_count[y_i[ j]] += 1 #add one for the class #each time when this sees a class it will up the counts print("Printing the class counts", class_count) print( "printing the class cout of the current correct-class in the sequence", class_count[y_i[j]]) total[class_count[y_i[j]]] += 1 print(total) if y_i[j] == output_i[ j]: #if corerctly predicted the current time step one correct[class_count[y_i[ j]]] += 1 #This is to basically find how many times networks see a class and how many times network correctly predicted a class. print("Printing the correctness thing", correct) #basically here we calculate end of each time step how many times I have seen this examples and how many times my network predicted correctly. #here total is a [0,8,2,3,......49] of there 8 in second position is in the batch the network has seen same class for twice while and . return [ float(correct[i]) / total[i] if total[i] > 0. else 0. for i in range(1, 11) ] # accuracy is get by how many time steps in a back has seen sa
def main(path_test, path_model, path_result): # ------------------------------ # ---- LOAD DATA AND MODELS ---- # ------------------------------ print("Loading data...", end=" ") data = pd.read_csv(path_test, names=["mr"], skiprows=1) ### len_seq = 25 with open('models/word2idx_mr.pkl', 'rb') as handle: w2i_mr = pickle.load(handle) with open('models/idx2word_ref.pkl', 'rb') as handle: i2w_ref = pickle.load(handle) size_voc_mr = len(w2i_mr.values()) size_voc_ref = len(i2w_ref.values()) ### nhid = 128 model = Sequential() model.add( LSTM(nhid, return_sequences=True, input_shape=(len_seq, size_voc_mr + 1))) model.add(AttentionDecoder(nhid, size_voc_ref + 1)) model.load_weights(path_model) print("ok!") # -------------------- # ---- PREPROCESS ---- # -------------------- # -- Preprocessing MRs -- # ----------------------- print("Preprocessing MRs...", end=" ") # Extract Name, Food and Near features data["mr_name"] = data.mr.map(lambda mr: extract_feature(mr, "name")) data["mr_food"] = data.mr.map(lambda mr: extract_feature(mr, "food")) data["mr_near"] = data.mr.map(lambda mr: extract_feature(mr, "near")) # Delexicalize MRs data["mr_delexicalized"] = data.mr\ .map(lambda mr: delexicalize_tokenize_mr(mr)) print("ok!") # ------------------------ # ---- CREATE DATASET ---- # ------------------------ # -- Create X (features)-- # ------------------------ print("Creating features...", end=" ") data["mr_encoded"] = data.mr_delexicalized\ .map(lambda mr: encode(mr, w2i_mr)) data["mr_padded"] = list(pad_sequences(data.mr_encoded, maxlen=len_seq)) X = [] for i in range(len(data)): one_hot_encoded = one_hot_encode(data.mr_padded[i], size_voc_mr + 1) X.append(one_hot_encoded) X = np.array(X) print("ok!") # ----------------- # ---- PREDICT ---- # ----------------- print("Predicting...", end=" ") predictions = [] for i in range(len(X)): prediction = decode(one_hot_decode(model.predict(X[i:i + 1])[0]), i2w_ref) predictions.append(prediction) data["pred"] = predictions print("ok!") # ---------------------- # ---- POST-PROCESS ---- # ---------------------- print("Postprocessing and saving...", end=" ") data["pred"] = data.apply( lambda row: relexicalize_ref(row, "mr_name", "name_tag"), axis=1) data["pred"] = data.apply( lambda row: relexicalize_ref(row, "mr_food", "food_tag"), axis=1) data["pred"] = data.apply( lambda row: relexicalize_ref(row, "mr_near", "near_tag"), axis=1) data["pred"] = data.pred.map(lambda pred: pred.replace("<begin>", "")) data["pred"] = data.pred.map(lambda pred: pred.replace("<end>", "")) np.savetxt(path_result, list(data.pred), fmt='%s') print("ok!")