def optimize_model(): print("Start training model with data poisoning attacks!") last_rmse = None for iteration in xrange(n_iters): t1 = time.time() ALS(n_user, n_item, n_feature, mal_user, train, mean_rating_, mal_mean_rating_, mal_ratings, lamda_u, lamda_v, \ user_features_, mal_user_features_, item_features_) train_preds = predict(train.take([0, 1], axis=1), user_features_, item_features_, mean_rating_) train_rmse = RMSE(train_preds, train.take(2, axis=1)) t2 = time.time() print("The %d th iteration \t time: %ds \t RMSE: %f " % (iteration + 1, t2 - t1, train_rmse)) # stop when converge if last_rmse and abs(train_rmse - last_rmse) < converge: break else: last_rmse = train_rmse return last_rmse
def optimize_model_origin(converge, n_user, n_item, n_feature, train, mean_rating_, lamda_u, lamda_v, user_features_origin_, item_features_origin_): print("Start training model without data poisoning attacks!") last_rmse = None n_iters = 100 for iteration in range(n_iters): t1 = time.time() user_features_origin_, item_features_origin_ = ALS_origin( n_user, n_item, n_feature, train, mean_rating_, lamda_u, lamda_v, user_features_origin_, item_features_origin_) train_preds = predict(train.take([0, 1], axis=1), user_features_origin_, item_features_origin_) train_rmse = RMSE(train_preds, train.take(2, axis=1) - 3) t2 = time.time() print("The %d th iteration \t time: %ds \t RMSE: %f " % (iteration + 1, t2 - t1, train_rmse)) # stop when converge if last_rmse and abs(train_rmse - last_rmse) < converge: break else: last_rmse = train_rmse return last_rmse
if args['--output-dir'] is not None: os.environ["OUT_DIR"] = args['--output-dir'] else: assert "OUT_DIR" in os.environ if args['--config'] is not None: config = load_config_from_file(args['--config']) else: from seq2seq import config print("Using configuration", config.__file__) if args['--test'] is True: test_file = config.filename_test eval_type = 'test' elif args['--dev'] is True: test_file = config.filename_dev eval_type = 'dev' else: raise ValueError('Specify --dev or --test.') config_holder = ConfigHolder(config) model = Model(config_holder) df = evaluation.predict(model, config_holder, test_file) acc_dict = evaluation.calculate_accuracy(df) acc_verbose = evaluation.accuracy_to_string_verbose(acc_dict) evaluation.save_results(df, acc_dict, acc_verbose, "Et-morf-yh", eval_type, config.out_dir)
def run(args): # Add underscore to the tag args.tag = ("_" + args.tag) if args.tag is not None else "" # Parse prefix and postfix prefix = "{}{}".format("-Subword" if args.subword else "", "-Attention" if args.attention else "") postfix = "{}{}{}".format("_subword" if args.subword else "", ("_" + args.data_tag) if args.data_tag is not None else "", ("_d" if args.description else "")) # Parse directory name if not args.model_dir.endswith("/"): args.model_dir += "/" if args.matching: print("Matching problem.") ######################################### # Load models (TO-BE-REVISED) tokenizers = pkl.load(open(args.tokenizers, "rb")) n_classes = len(tokenizers["mlb"].classes_) try: desc_tokenizer = tokenizers["description"] except: desc_tokenizer = None ######################################### # Building Model print("Building computational graph...") model = EntityTypingNet( architecture=args.arch, n_classes=n_classes, context_tokenizer=tokenizers["context"], mention_tokenizer=tokenizers["mention"], desc_tokenizer=desc_tokenizer, context_emb=args.context_emb, context_embedding_dim=args.context_embedding_dim, mention_emb=args.mention_emb, mention_embedding_dim=args.mention_embedding_dim, desc_emb=args.desc_emb, desc_embedding_dim=args.desc_embedding_dim, same_emb=args.same_emb, n_words=MAX_NUM_WORDS, n_mention=MAX_NUM_MENTION_WORDS, n_description=MAX_NUM_DESCRIPTION_WORDS, len_context=MAX_SEQUENCE_LENGTH, len_mention=MAX_MENTION_LENGTH, len_description=MAX_DESCRIPTION_LENGTH, attention=args.attention, subword=args.subword, indicator=args.indicator, description=False, # args.description, matching=args.matching, merge_mode=args.merge_mode, dropout=args.dropout, use_softmax=args.use_softmax, optimizer=args.optimizer, learning_rate=args.learning_rate) print(model.summary()) # Save weights at the end of each epoch save_prefix = "{:s}{:s}-weights{:s}".format(args.arch, prefix, args.tag) filename = save_prefix + "-{epoch:02d}.hdf5" checkpoint = ModelCheckpoint( filename, monitor="val_loss", verbose=1, save_best_only=False, mode="min") early = EarlyStopping(monitor="val_loss", mode="min", patience=20) callbacks_list = [checkpoint, early] X_train, Z_train, y_train, D_train = load_pkl_data( args.model_dir, "training", postfix, indicator=args.indicator, matching=args.matching) ###################################################### """ print(X_train.shape, y_train.shape) print("Stacking positive samples") n_instance = X_train.shape[0] // 6 idxs = [i * 6 for i in range(n_instance)] tmp = np.vstack([X_train[idxs] for _ in range(4)]) X_train = np.vstack([X_train, tmp]) del tmp tmp = np.vstack([Z_train[idxs] for _ in range(4)]) Z_train = np.vstack([Z_train, tmp]) del tmp tmp = np.hstack([y_train[idxs] for _ in range(4)]) y_train = np.hstack([y_train, tmp]) del tmp if args.description: tmp = np.vstack([D_train[idxs] for _ in range(4)]) D_train = np.vstack([D_train, tmp]) """ ###################################################### # input = [X_train, Z_train] print(X_train.shape, Z_train.shape, y_train.shape) #if args.use_softmax: # y_train = np.array(mlb.inverse_transform(y_train)).flatten() input = [X_train, Z_train, D_train] if args.description else [X_train, Z_train] print("Begin training...") model.fit( input, y_train, batch_size=args.batch_size, epochs=args.epochs, validation_split=0.01, callbacks=callbacks_list) # Evaluation record = 0 index = 0 X_val, Z_val, y_val, D_val = load_pkl_data( args.model_dir, "validation", postfix, indicator=args.indicator, description=args.description) print("Loading trained weights for validation...") for i in range(1, args.epochs + 1, 1): # Deal with model_name for each epoch model_name = "{:s}-{:02d}.hdf5".format(save_prefix, i) model.load_weights(model_name) f = predict( model, X_val, Z_val, y_val, model_name, "results.txt", return_mf1=True, use_softmax=args.use_softmax) # Always choose model trained with more epoch when the F-1 score is same if record <= f: record = f index = i print("\n * Best micro-F1 at Validation: epoch #{:02d}".format(index)) # Test model with best micro F1 score model_name = "{:s}-{:02d}.hdf5".format(save_prefix, index) just_test( model=model, filename=model_name, postfix=postfix, use_softmax=args.use_softmax, indicator=args.indicator) K.clear_session()
def get(self, link_video): videofile = "/video/video.mp4" for x in Data: if x['Data'] == link_video: #os.system("./predict unseen-weights178.h5 anshu.mp4") from lipnet.lipreading.videos import Video from lipnet.lipreading.visualization import show_video_subtitle from lipnet.core.decoders import Decoder from lipnet.lipreading.helpers import labels_to_text from lipnet.utils.spell import Spell from lipnet.model2 import LipNet from keras.optimizers import Adam from keras import backend as K import numpy as np import sys import os np.random.seed(55) CURRENT_PATH = os.path.dirname(os.path.abspath(__file__)) FACE_PREDICTOR_PATH = os.path.join( CURRENT_PATH, '..', 'common', 'predictors', 'shape_predictor_68_face_landmarks.dat') PREDICT_GREEDY = False PREDICT_BEAM_WIDTH = 200 PREDICT_DICTIONARY = os.path.join(CURRENT_PATH, '..', 'common', 'dictionaries', 'grid.txt') def predict(weight_path, video_path, absolute_max_string_len=32, output_size=28): print "\nLoading data from disk..." video = Video(vtype='face', face_predictor_path=FACE_PREDICTOR_PATH) if os.path.isfile(video_path): video.from_video(video_path) else: video.from_frames(video_path) print "Data loaded.\n" if K.image_data_format() == 'channels_first': img_c, frames_n, img_w, img_h = video.data.shape else: frames_n, img_w, img_h, img_c = video.data.shape lipnet = LipNet( img_c=img_c, img_w=img_w, img_h=img_h, frames_n=frames_n, absolute_max_string_len=absolute_max_string_len, output_size=output_size) adam = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08) lipnet.model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, optimizer=adam) lipnet.model.load_weights(weight_path) spell = Spell(path=PREDICT_DICTIONARY) decoder = Decoder( greedy=PREDICT_GREEDY, beam_width=PREDICT_BEAM_WIDTH, postprocessors=[labels_to_text, spell.sentence]) X_data = np.array([video.data]).astype(np.float32) / 255 input_length = np.array([len(video.data)]) y_pred = lipnet.predict(X_data) result = decoder.decode(y_pred, input_length)[0] return (video, result) video, result = predict("unseen-weights178.h5", "anshu.mp4") return result return {'Data': None}
else: os.environ["OUT_DIR"] = args['--output-dir'] if args['--config'] is not None: config = load_config_from_file(args['--config']) else: from mcml import config print("Using configuration", config.__file__) if args['--test'] is True: test_file = config.filename_test eval_type = 'test' elif args['--dev'] is True: test_file = config.filename_dev eval_type = 'dev' else: raise ValueError('Specify --dev or --test.') config_holder = ConfigHolder(config) model = Model(config_holder) df = evaluation.predict(model, config_holder, test_file, predict_sentence_callback=predict_sentence) acc_dict = evaluation.calculate_accuracy(df) acc_verbose = evaluation.accuracy_to_string_verbose(acc_dict) evaluation.save_results(df, acc_dict, acc_verbose, lang_key, eval_type, config.out_dir)
def evaluate(model, config_holder, test_file, lang_key, eval_type, out_dir): df = evaluation.predict(model, config_holder, test_file) acc_dict = evaluation.calculate_accuracy(df) acc_verbose = evaluation.accuracy_to_string_verbose(acc_dict) evaluation.save_results(df, acc_dict, acc_verbose, lang_key, eval_type, out_dir)