def learning_curve(dataset_size, model_type): """ runs cross validation to plot learning curve """ print "LEARNING CURVE", dataset_size, model_type dset = dataset.read('contradictions', dataset_size) data, targets = [], [] for case in dset['content']: data.append(get_features(case['sentence'], case['hypothesis'])) targets.append(case['contradiction']) model = ClassificationModel(model_type) train_sizes, train_scores, test_scores = model.learning_curve( data, targets) with open(config.LEARNING_CURVE_PATH.format(dataset_size, model_type), 'wb') as csvfile: csv_writer = csv.writer(csvfile, delimiter=',') csv_writer.writerow( ['model', 'dataset_size', 'train_size', 'train_mse', 'test_mse']) for (train_size, train_score, test_score) in zip(train_sizes, train_scores, test_scores): csv_writer.writerow([ model_type, dataset_size, train_size, ','.join(np.char.mod('%f', train_score)), ','.join(np.char.mod('%f', test_score)) ]) plot = plotter.learning_curve(train_sizes, train_scores, test_scores) plot.savefig("../res/plot/learning_{}_{}.pdf".format( dataset_size, model_type))
def test(): c = ClassificationModel() _, normVector, _ = c.buildModel(is_training=False) restore_vars = [] for var in tf.global_variables(): if('temp' not in var.name): restore_vars.append(var) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(var_list=restore_vars) saver.restore(sess, 'step/model.ckpt-2700') speaker_path = '/home/logview/workspace/projects/TI-SV/samples' utterance_specs = [] files = os.listdir(speaker_path) files.sort() for utter_name in files: utter_path = os.path.join(speaker_path, utter_name) # path of each utterance utter, sr = librosa.core.load(utter_path, 16000) # load utterance audio utter_trim, index = librosa.effects.trim(utter, top_db=20) # voice activity detection, only trim S = librosa.feature.mfcc(y=utter_trim, sr=sr, n_mfcc=40) inputs = S.transpose((1,0))[:160] print(inputs.shape) utterance_specs.append(inputs) utterance_specs = np.array(utterance_specs) print(utterance_specs.shape) vectors = sess.run(normVector, feed_dict={c.melInputs:utterance_specs}) similar(vectors)
def train(): lr = 0.002 c = ClassificationModel() cost, normVector, alfas_mean = c.buildModel() gStep = tf.Variable(tf.constant(0)) learning_rate = tf.train.exponential_decay(float(lr), gStep, 300, 0.9, staircase=True) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) gradients, vars = zip(*optimizer.compute_gradients(cost)) #var_list=train_vars gradients, _ = tf.clip_by_global_norm(gradients, 100) train_op = optimizer.apply_gradients(zip(gradients, vars)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # saver = tf.train.Saver(var_list=restore_vars) #var_list=restore_vars # saver.restore(sess, 'step/model.ckpt-'+ckpoint) saver = tf.train.Saver() for step in range(10000): mels, ids = c.getBatch_data_label(10) feed_dict = {c.melInputs:mels, c.labelInputs: ids, gStep: step} _, _lr, _loss, _alfas_mean = sess.run([train_op, learning_rate, cost, alfas_mean], feed_dict=feed_dict) print('{}: lr = {:.6f}, loss = {}, alfas_mean = {}'.format(step, _lr, _loss, _alfas_mean)) if(step%300==0 and step!=0): saver.save(sess, 'step/model.ckpt', global_step=step)
def get_model(model_path): label = get_label("./util/label.txt") model = ClassificationModel( class_list=label, img_width=256, img_height=256, ) status = model.load(model_path) if not status: raise Exception("model load failed...") return model
def export(output_dir, ckpt=None, model_version=1): # Define model. audio_meta_train = VoxCelebMeta(hp.train.data_path, hp.train.meta_path) model = ClassificationModel(num_classes=audio_meta_train.num_speaker, **hp.model) with TowerContext('', is_training=False): input = PlaceholderInput() input.setup(model.get_inputs_desc()) model.build_graph(*input.get_input_tensors()) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Restore variables from training checkpoints. ckpt = ckpt if ckpt else tf.train.latest_checkpoint(hp.logdir) if ckpt: tf.train.Saver().restore(sess, ckpt) print('Successfully loaded model: {} from {}'.format(ckpt, ckpt)) else: print('No checkpoint file found at {}'.format(ckpt)) return # Export inference model. output_path = os.path.join( tf.compat.as_bytes(output_dir), tf.compat.as_bytes(str(model_version))) print('Exporting trained model to', output_path) builder = tf.saved_model.builder.SavedModelBuilder(output_path) # Build the signature_def_map. inputs_tensor_info = tf.saved_model.utils.build_tensor_info(model.x) prob_output_tensor_info = tf.saved_model.utils.build_tensor_info(model.prob) embedding_output_tensor_info = tf.saved_model.utils.build_tensor_info(model.y) predict_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={'x': inputs_tensor_info}, outputs={ 'prob': prob_output_tensor_info, 'embedding': embedding_output_tensor_info, }, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME )) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ 'predict': predict_signature }) builder.save() print('Successfully exported model to %s' % output_dir)
def main(): # Read data X, Y = load_cfar10_batch('datasets/cifar-10-batches-py', 1) Xs, Ys = load_cfar10_test('datasets/cifar-10-batches-py') for i in range(1, 5): _X, _Y = load_cfar10_batch('datasets/cifar-10-batches-py', i + 1) X = np.concatenate((X, _X)) Y = np.concatenate((Y, _Y)) X = np.reshape(X, [X.shape[0], -1]) Xs = np.reshape(Xs, [Xs.shape[0], -1]) Y = np.reshape(Y, [-1, 1]) Ys = np.reshape(Ys, [-1, 1]) # pre-processing X_mean = np.mean(X, 0) X_std = np.std(X, 0) X = (X - X_mean) / (X_std + 1e-7) Xs = (Xs - X_mean) / (X_std + 1e-7) print("=== DATA SUMMARY ===") print("X is normalized.") print("Y is not whitened. Y variance: ", np.var(Y)) model = ClassificationModel(args.layers, args.num_inducing) def predict_accuracy(): a, b = model.predict(Xs) c = np.argmax(a, axis=1) - (Ys[:, 0]) L = np.abs(c) count = 0 for i in range(len(L)): if L[i] == 0: count += 1 print("Test accuracy: ", count / len(L)) def train_accuracy(): a, b = model.predict(X[0:1000]) c = np.argmax(a, axis=1) - (Y[0:1000, 0]) L = np.abs(c) count = 0 for i in range(len(L)): if L[i] == 0: count += 1 print("Train accuracy: ", count / len(L)) for epoch in range(1000): print("EPOCH", epoch) model.fit(X, Y) train_accuracy() predict_accuracy()
def main(unused_argv): if len(unused_argv) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity(tf.logging.INFO) # choose what level of logging you want tf.logging.info('Running the code in %s mode...', (FLAGS.mode)) # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name) if not os.path.exists(FLAGS.log_root): if FLAGS.mode=="train": os.makedirs(FLAGS.log_root) else: raise Exception("Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root)) vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a source vocabulary # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = ['adam_epsilon','mode', 'loss', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_enc_steps'] hps_dict = {} for key,val in FLAGS.__flags.iteritems(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data tf.set_random_seed(1233) # a seed value for randomness if hps.mode == 'train': print "creating model..." batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=False) model = ClassificationModel(hps, vocab) # batcher = None setup_training(model, batcher, vocab, hps) elif hps.mode == 'eval': model = ClassificationModel(hps, vocab) run_eval(model, vocab, hps) elif hps.mode == 'decode': model = ClassificationModel(hps, vocab) run_decode(model, vocab, hps) # elif hps.mode == 'decode': # decode_model_hps = hps # This will be the hyperparameters for the decoder model # decode_model_hps = hps._replace(max_dec_steps=1) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def main(): # Image Augmentations transform = transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.RandomHorizontalFlip() ]) valid_data_loader = None train_data_loader = None if valid_dataset_path: val_dataset = ImageFolder(valid_dataset_path, transform=transform) train_dataset = ImageFolder(train_dataset_path, transform=transform) train_data_loader, valid_data_loader = get_data_loaders_from_dataset( train_dataset, val_dataset) else: image_ds = ImageFolder(train_dataset_path, transform=transform) print(image_ds.classes) val_size = int(val_pct * len(image_ds)) train_size = len(image_ds) - val_size train_dataset, val_dataset = random_split(image_ds, [train_size, val_size]) train_data_loader, valid_data_loader = get_data_loaders_from_dataset( train_dataset, val_dataset) flowers_model = ClassificationModel() trainer = pl.Trainer(gpus=1, max_epochs=max_epochs) trainer.fit(flowers_model, train_dataloader=train_data_loader, val_dataloaders=valid_data_loader)
def inference_random(): # 加载验证集验证 model = ClassificationModel(len(cfg.char2idx)) model = load_custom_model(model, cfg.save_model_path).to(cfg.device) tokenizer = Tokenizer(cfg.char2idx) error = 0 with open(cfg.test_data_path, 'r', encoding='utf-8') as f: lines = f.readlines() for line in lines: pairs = line.split('\t') label, text = pairs[0], pairs[1] input_index, _ = tokenizer.encode(text, max_length=cfg.max_seq_len) inputs = torch.tensor(input_index).unsqueeze(0) inputs_mask = (inputs > 0).to(torch.float32) with torch.no_grad(): scores = model(inputs, inputs_mask) prediction = scores.argmax(-1).item() if prediction != int(label): print(scores[:, int(label)].item()) print(label) print(text) print('-' * 50) error += 1 print(error)
def new_client(client, server, state=state): state["num_clients"] += 1 if not state["model_loaded"]: logger.info("Loading model") state["ftModel"] = ClassificationModel(path=model_path) state["model_loaded"] = True logger.info("New client connected and was given id %d" % client['id']) server.send_message(client, time_str)
def analyze_campus_policies(): """ runs tests with the trained Random Forest model, with each pair of intents in the campi dataset """ print "MODEL TEST USING CAMPI" dset = dataset.read('contradictions', 'campi') intents = [] for case in dset['intents']: # if case['university'] not in intents: # intents[case['university']] = [] intents.append((case['university'], case['text'], case['nile'])) model = ClassificationModel('forest') results = [] if model.load_model(10000): # for (uni, intents) in intents.items(): for i in range(len(intents)): (uni_stn, text_stn, sentence) = intents[i] for j in range(i + 1, len(intents)): (uni_hyp, text_hyp, hypothesis) = intents[j] if sentence != hypothesis: results.append( (uni_stn, uni_hyp, text_stn, text_hyp, sentence, hypothesis, model.predict([get_features(sentence, hypothesis)]))) with open( config.CONTRADICTIONS_RESULTS_PATH.format('summary', 'campi'), 'wb') as csvfile: csv_writer = csv.writer(csvfile, delimiter=',') csv_writer.writerow([ 'university stn', 'university hyp', 'text stn', 'text hyp', 'sentence', 'hypothesis', 'prediction' ]) for (uni_stn, uni_hyp, text_stn, text_hyp, sentence, hypothesis, prediction) in results: csv_writer.writerow([ uni_stn, uni_hyp, text_stn, text_hyp, sentence, hypothesis, prediction[0] ]) else: print "Problem loading model"
def output_prob(text, end_to_end=e2e, state=state): if not state["model_loaded"]: logger.info("Loading model") state["ftModel"] = ClassificationModel(path=model_path) state["model_loaded"] = True report_text = "IMPRESSION: " + text + "\nEND OF IMPRESSION" processed_report_text, ground_truth = e2e.transform([report_text])[0] logger.info(processed_report_text) processed_report_text = " ".join(processed_report_text) prediction = state["ftModel"].predict(processed_report_text) logger.info(prediction) return (processed_report_text, ground_truth, prediction)
def validate(dataset_size, model_type): """ runs cross validation in classification model """ print "MODEL VALIDATION", dataset_size, model_type dset = dataset.read('contradictions', dataset_size) data, targets = [], [] for case in dset['content']: data.append(get_features(case['sentence'], case['hypothesis'])) targets.append(case['contradiction']) model = ClassificationModel(model_type) scores = model.cross_validate(data, targets) print "scores", scores print "FIT TIME", scores['fit_time'] print "VALIDATION TIME", scores['score_time'] print "PRECISION", scores['test_precision_macro'] print "RECALL", scores['test_recall_macro'] print "F1 SCORE", scores['test_f1_macro'] return scores['fit_time'], scores['score_time'], scores[ 'test_precision_macro'], scores['test_recall_macro'], scores[ 'test_f1_macro']
def test(dataset_size, model_type): """ opens fit dataset and trains SVM/LogReg/Forest model with it, then tests it""" print "MODEL TEST", dataset_size, model_type dset = dataset.read('contradictions', dataset_size) data, targets = [], [] for case in dset['content']: data.append(case) targets.append(case['contradiction']) fit_data, test_data = [], [] fit_cases, test_cases, fit_target, test_target = train_test_split( data, targets, test_size=0.25, shuffle=True, random_state=0) for fit_case in fit_cases: fit_data.append( get_features(fit_case['sentence'], fit_case['hypothesis'])) for test_case in test_cases: test_data.append( get_features(test_case['sentence'], test_case['hypothesis'])) model = ClassificationModel(model_type) start_time = time.time() model.train(fit_data, fit_target, dataset_size) elapsed_time = time.time() - start_time test_results = model.test(test_data) with open( config.CONTRADICTIONS_RESULTS_PATH.format(dataset_size, model_type), 'wb') as csvfile: csv_writer = csv.writer(csvfile, delimiter=',') csv_writer.writerow([ 'hypothesis', 'sentence', 'type', 'contradiction', 'prediction', 'features' ]) for (test_case, result, features) in zip(test_cases, test_results, test_data): csv_writer.writerow([ test_case['hypothesis'], test_case['sentence'], test_case['type'], test_case['contradiction'], result, features ]) precision = metrics.precision_score(test_target, test_results) recall = metrics.recall_score(test_target, test_results) f1_score = metrics.f1_score(test_target, test_results) print "FIT TIME", elapsed_time print "PRECISION", precision print "RECALL", recall print "F1 SCORE", f1_score model.save(dataset_size)
def roc_curve(dataset_size): """ runs cross validation to plot precision recall curve """ print "ROC CURVE", dataset_size dset = dataset.read('contradictions', dataset_size) data, targets = [], [] for case in dset['content']: data.append(get_features(case['sentence'], case['hypothesis'])) targets.append(case['contradiction']) for mtype in ['svm', 'log', 'forest']: model = ClassificationModel(mtype) plot = plotter.plot_roc_curve(dataset_size, mtype, model, data, targets) plot.savefig("../res/plot/roc_{}_{}.pdf".format(dataset_size, mtype), bbox_inches='tight')
def train(): # 加载数据 tokenizer = Tokenizer(cfg.char2idx) train_dataset = CustomDataset(cfg.train_data_path, tokenizer, cfg) train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, collate_fn=padding, shuffle=True, num_workers=4, pin_memory=True) model = ClassificationModel(len(cfg.char2idx)) # model = load_pretrained_bert(model, cfg.pretrained_model_path, keep_tokens=cfg.keep_tokens).to(cfg.device) model = load_custom_model(model, cfg.save_model_path).to(cfg.device) loss_function = nn.CrossEntropyLoss().to(cfg.device) optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learn_rate) # 迭代训练 iteration, train_loss = 0, 0 model.train() for inputs, mask, targets in tqdm(train_dataloader, position=0, leave=True): inputs, mask, targets = inputs.to(cfg.device), mask.to( cfg.device), targets.to(cfg.device) prediction = model(inputs, mask) loss = loss_function(prediction, targets.reshape(-1)) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() iteration += 1 if iteration % cfg.print_loss_steps == 0: eval_loss = evaluate(model, tokenizer, loss_function) print('') print('train_loss:{}'.format(train_loss / cfg.print_loss_steps)) print('evalu_loss:{}'.format(eval_loss)) accuracy(model, tokenizer, cfg.valid_data_path) accuracy(model, tokenizer, cfg.test_data_path) model.train() train_loss = 0 if iteration % cfg.save_model_steps == 0: torch.save(model.state_dict(), cfg.save_model_path)
# set hyper-parameters from yaml file hp.set_hparam_yaml(case=args.case) # dataflow audio_meta = AudioMeta(hp.train.data_path) if args.remote: df = get_remote_dataflow(args.port, hp.train.batch_size) else: df = DataLoader(audio_meta, hp.train.batch_size).dataflow(nr_prefetch=5000, nr_thread=int(multiprocessing.cpu_count() // 1.5)) # set logger for event and model saver logger.set_logger_dir(hp.logdir) if True: train_conf = TrainConfig( model=ClassificationModel(num_classes=audio_meta.num_speaker, **hp.model), data=FlexibleQueueInput(df, capacity=500), callbacks=[ ModelSaver(checkpoint_dir=hp.logdir), EvalCallback() ], steps_per_epoch=hp.train.steps_per_epoch, # session_config=session_config ) ckpt = args.ckpt if args.ckpt else tf.train.latest_checkpoint(hp.logdir) if ckpt and not args.r: train_conf.session_init = SaverRestore(ckpt) if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
import torch from model import ClassificationModel from torchvision import transforms from config import classes from PIL import Image modelpath = 'epoch=9-step=1769.ckpt' model = ClassificationModel() model.load_state_dict(torch.load(modelpath)['state_dict'], strict=False) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) model.eval() transform = transforms.Compose([ transforms.ToTensor(), ]) def check_for_gloves(image): with torch.no_grad(): image = transform(image) print(image.shape) image = image.unsqueeze(0) print(image.shape) image = image.to(device) print(image.shape) output = model(image) op, predicted = torch.max(output.data, 1) print(predicted.item(), classes[predicted.item()], op.item()) return predicted.item(), classes[predicted.item()], op.item()
from konlpy.tag import Komoran from tensorflow.keras import Model from model import ClassificationModel, input_shape if __name__ == "__main__": komoran = Komoran() model_parent = ClassificationModel() model = model_parent.build_model() embedding = model_parent.embedding model.load_weights("curse_detection/weights-short.h5") att_model = Model(inputs=[model.input], outputs=model.layers[10].output) while True: inp = input(':') inp, mask = embedding([komoran.morphs(inp)]) out = model.predict((inp, mask)).squeeze(1) att = att_model.predict((inp, mask))[1].squeeze(2) print(att) print(out)
import tensorflow as tf from data_loader import DataLoader from model import ClassificationModel from config import * # Create tensorflow session sess = tf.Session() # Build model graph model = ClassificationModel(sess, "DBC") # Initialize the model graph sess.run(tf.global_variables_initializer()) # Build dataset pipeline graph train_dataset = DataLoader(BATCH_SIZE) # Get end of dataset pipeline img, labels = train_dataset.get_train_data() img_val, label_val = train_dataset.get_val_data() epoch = 0 iter = 0 while True: try: # Fetch the dataset (tf.Tensor -> numpy array) _img, _label = sess.run([img, labels]) # print(_img.shape) # print(_img[0].shape) # print(_label) # import cv2 # cv2.imshow("img", _img[0]) # cv2.waitKey(0)