示例#1
0
def learning_curve(dataset_size, model_type):
    """ runs cross validation to plot learning curve """
    print "LEARNING CURVE", dataset_size, model_type

    dset = dataset.read('contradictions', dataset_size)
    data, targets = [], []
    for case in dset['content']:
        data.append(get_features(case['sentence'], case['hypothesis']))
        targets.append(case['contradiction'])

    model = ClassificationModel(model_type)
    train_sizes, train_scores, test_scores = model.learning_curve(
        data, targets)
    with open(config.LEARNING_CURVE_PATH.format(dataset_size, model_type),
              'wb') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=',')
        csv_writer.writerow(
            ['model', 'dataset_size', 'train_size', 'train_mse', 'test_mse'])
        for (train_size, train_score,
             test_score) in zip(train_sizes, train_scores, test_scores):
            csv_writer.writerow([
                model_type, dataset_size, train_size,
                ','.join(np.char.mod('%f', train_score)),
                ','.join(np.char.mod('%f', test_score))
            ])

    plot = plotter.learning_curve(train_sizes, train_scores, test_scores)
    plot.savefig("../res/plot/learning_{}_{}.pdf".format(
        dataset_size, model_type))
示例#2
0
def test():
    c = ClassificationModel()
    _, normVector, _ = c.buildModel(is_training=False)

    restore_vars = []
    for var in tf.global_variables():
        if('temp' not in var.name):
            restore_vars.append(var)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(var_list=restore_vars)
        saver.restore(sess, 'step/model.ckpt-2700')

        speaker_path = '/home/logview/workspace/projects/TI-SV/samples'
        utterance_specs = []
        files = os.listdir(speaker_path)
        files.sort()
        for utter_name in files:

            utter_path = os.path.join(speaker_path, utter_name)         # path of each utterance
            utter, sr = librosa.core.load(utter_path, 16000)        # load utterance audio
            utter_trim, index = librosa.effects.trim(utter, top_db=20)  # voice activity detection, only trim


            S = librosa.feature.mfcc(y=utter_trim, sr=sr, n_mfcc=40)
            inputs = S.transpose((1,0))[:160]
            print(inputs.shape)
            utterance_specs.append(inputs)

        utterance_specs = np.array(utterance_specs)
        print(utterance_specs.shape)

        vectors = sess.run(normVector, feed_dict={c.melInputs:utterance_specs})
        similar(vectors)
示例#3
0
def train():

    lr = 0.002
    c = ClassificationModel()
    cost, normVector, alfas_mean = c.buildModel()

    gStep = tf.Variable(tf.constant(0))
    learning_rate = tf.train.exponential_decay(float(lr), gStep, 300, 0.9, staircase=True)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    gradients, vars = zip(*optimizer.compute_gradients(cost))  #var_list=train_vars
    gradients, _ = tf.clip_by_global_norm(gradients, 100)
    train_op = optimizer.apply_gradients(zip(gradients, vars))

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # saver = tf.train.Saver(var_list=restore_vars)    #var_list=restore_vars
        # saver.restore(sess, 'step/model.ckpt-'+ckpoint)
        saver = tf.train.Saver()

        for step in range(10000):
            mels, ids = c.getBatch_data_label(10)
            feed_dict = {c.melInputs:mels, c.labelInputs: ids, gStep: step}
            _, _lr, _loss, _alfas_mean = sess.run([train_op, learning_rate, cost, alfas_mean], feed_dict=feed_dict)
            print('{}: lr = {:.6f}, loss = {}, alfas_mean = {}'.format(step, _lr, _loss, _alfas_mean))

            if(step%300==0 and step!=0):
                saver.save(sess, 'step/model.ckpt', global_step=step)
def get_model(model_path):
    label = get_label("./util/label.txt")
    model = ClassificationModel(
        class_list=label,
        img_width=256,
        img_height=256,
    )
    status = model.load(model_path)
    if not status:
        raise Exception("model load failed...")

    return model
示例#5
0
def export(output_dir, ckpt=None, model_version=1):
  # Define model.
  audio_meta_train = VoxCelebMeta(hp.train.data_path, hp.train.meta_path)
  model = ClassificationModel(num_classes=audio_meta_train.num_speaker, **hp.model)

  with TowerContext('', is_training=False):
    input = PlaceholderInput()
    input.setup(model.get_inputs_desc())
    model.build_graph(*input.get_input_tensors())

  with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # Restore variables from training checkpoints.
    ckpt = ckpt if ckpt else tf.train.latest_checkpoint(hp.logdir)
    if ckpt:
      tf.train.Saver().restore(sess, ckpt)
      print('Successfully loaded model: {} from {}'.format(ckpt, ckpt))
    else:
      print('No checkpoint file found at {}'.format(ckpt))
      return

    # Export inference model.
    output_path = os.path.join(
      tf.compat.as_bytes(output_dir),
      tf.compat.as_bytes(str(model_version)))
    print('Exporting trained model to', output_path)
    builder = tf.saved_model.builder.SavedModelBuilder(output_path)

    # Build the signature_def_map.
    inputs_tensor_info = tf.saved_model.utils.build_tensor_info(model.x)
    prob_output_tensor_info = tf.saved_model.utils.build_tensor_info(model.prob)
    embedding_output_tensor_info = tf.saved_model.utils.build_tensor_info(model.y)

    predict_signature = (
      tf.saved_model.signature_def_utils.build_signature_def(
        inputs={'x': inputs_tensor_info},
        outputs={
          'prob': prob_output_tensor_info,
          'embedding': embedding_output_tensor_info,
        },
        method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
      ))

    builder.add_meta_graph_and_variables(
      sess, [tf.saved_model.tag_constants.SERVING],
      signature_def_map={
        'predict': predict_signature
      })

    builder.save()
    print('Successfully exported model to %s' % output_dir)
示例#6
0
def main():

    # Read data
    X, Y = load_cfar10_batch('datasets/cifar-10-batches-py', 1)
    Xs, Ys = load_cfar10_test('datasets/cifar-10-batches-py')
    for i in range(1, 5):
        _X, _Y = load_cfar10_batch('datasets/cifar-10-batches-py', i + 1)
        X = np.concatenate((X, _X))
        Y = np.concatenate((Y, _Y))

    X = np.reshape(X, [X.shape[0], -1])
    Xs = np.reshape(Xs, [Xs.shape[0], -1])
    Y = np.reshape(Y, [-1, 1])
    Ys = np.reshape(Ys, [-1, 1])

    # pre-processing
    X_mean = np.mean(X, 0)
    X_std = np.std(X, 0)
    X = (X - X_mean) / (X_std + 1e-7)
    Xs = (Xs - X_mean) / (X_std + 1e-7)
    print("=== DATA SUMMARY ===")
    print("X is normalized.")
    print("Y is not whitened. Y variance: ", np.var(Y))

    model = ClassificationModel(args.layers, args.num_inducing)

    def predict_accuracy():
        a, b = model.predict(Xs)
        c = np.argmax(a, axis=1) - (Ys[:, 0])
        L = np.abs(c)
        count = 0
        for i in range(len(L)):
            if L[i] == 0:
                count += 1
        print("Test accuracy: ", count / len(L))

    def train_accuracy():
        a, b = model.predict(X[0:1000])
        c = np.argmax(a, axis=1) - (Y[0:1000, 0])
        L = np.abs(c)
        count = 0
        for i in range(len(L)):
            if L[i] == 0:
                count += 1
        print("Train accuracy: ", count / len(L))

    for epoch in range(1000):
        print("EPOCH", epoch)
        model.fit(X, Y)
        train_accuracy()
        predict_accuracy()
示例#7
0
def main(unused_argv):
  if len(unused_argv) != 1: # prints a message if you've entered flags incorrectly
    raise Exception("Problem with flags: %s" % unused_argv)

  tf.logging.set_verbosity(tf.logging.INFO) # choose what level of logging you want
  tf.logging.info('Running the code in %s mode...', (FLAGS.mode))

  # Change log_root to FLAGS.log_root/FLAGS.exp_name and create the dir if necessary
  FLAGS.log_root = os.path.join(FLAGS.log_root, FLAGS.exp_name)
  if not os.path.exists(FLAGS.log_root):
    if FLAGS.mode=="train":
      os.makedirs(FLAGS.log_root)
    else:
      raise Exception("Logdir %s doesn't exist. Run in train mode to create it." % (FLAGS.log_root))

  vocab = Vocab(FLAGS.vocab_path, FLAGS.vocab_size) # create a source vocabulary

  # Make a namedtuple hps, containing the values of the hyperparameters that the model needs
  hparam_list = ['adam_epsilon','mode', 'loss', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_enc_steps']
  hps_dict = {}
  for key,val in FLAGS.__flags.iteritems(): # for each flag
    if key in hparam_list: # if it's in the list
      hps_dict[key] = val # add it to the dict
  hps = namedtuple("HParams", hps_dict.keys())(**hps_dict)

  # Create a batcher object that will create minibatches of data

  tf.set_random_seed(1233) # a seed value for randomness

  if hps.mode == 'train':
    print "creating model..."
    batcher = Batcher(FLAGS.data_path, vocab, hps, single_pass=False)
    model = ClassificationModel(hps, vocab)
    # batcher = None
    setup_training(model, batcher, vocab, hps)

  elif hps.mode == 'eval':
  	model = ClassificationModel(hps, vocab)
  	run_eval(model, vocab, hps)

  elif hps.mode == 'decode':
    model = ClassificationModel(hps, vocab)
    run_decode(model, vocab, hps)

  # elif hps.mode == 'decode':
  #   decode_model_hps = hps  # This will be the hyperparameters for the decoder model
  #   decode_model_hps = hps._replace(max_dec_steps=1) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries


  else:
    raise ValueError("The 'mode' flag must be one of train/eval/decode")
示例#8
0
def main():
    # Image Augmentations
    transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(),
        transforms.RandomHorizontalFlip()
    ])

    valid_data_loader = None
    train_data_loader = None

    if valid_dataset_path:
        val_dataset = ImageFolder(valid_dataset_path, transform=transform)
        train_dataset = ImageFolder(train_dataset_path, transform=transform)
        train_data_loader, valid_data_loader = get_data_loaders_from_dataset(
            train_dataset, val_dataset)
    else:
        image_ds = ImageFolder(train_dataset_path, transform=transform)
        print(image_ds.classes)
        val_size = int(val_pct * len(image_ds))
        train_size = len(image_ds) - val_size
        train_dataset, val_dataset = random_split(image_ds,
                                                  [train_size, val_size])
        train_data_loader, valid_data_loader = get_data_loaders_from_dataset(
            train_dataset, val_dataset)

    flowers_model = ClassificationModel()
    trainer = pl.Trainer(gpus=1, max_epochs=max_epochs)
    trainer.fit(flowers_model,
                train_dataloader=train_data_loader,
                val_dataloaders=valid_data_loader)
def inference_random():
    # 加载验证集验证
    model = ClassificationModel(len(cfg.char2idx))
    model = load_custom_model(model, cfg.save_model_path).to(cfg.device)

    tokenizer = Tokenizer(cfg.char2idx)
    error = 0
    with open(cfg.test_data_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
    for line in lines:
        pairs = line.split('\t')
        label, text = pairs[0], pairs[1]
        input_index, _ = tokenizer.encode(text, max_length=cfg.max_seq_len)
        inputs = torch.tensor(input_index).unsqueeze(0)
        inputs_mask = (inputs > 0).to(torch.float32)
        with torch.no_grad():
            scores = model(inputs, inputs_mask)
            prediction = scores.argmax(-1).item()
        if prediction != int(label):
            print(scores[:, int(label)].item())
            print(label)
            print(text)
            print('-' * 50)
            error += 1
    print(error)
def new_client(client, server, state=state):
    state["num_clients"] += 1
    if not state["model_loaded"]:
        logger.info("Loading model")
        state["ftModel"] = ClassificationModel(path=model_path)
        state["model_loaded"] = True

    logger.info("New client connected and was given id %d" % client['id'])
    server.send_message(client, time_str)
示例#11
0
def analyze_campus_policies():
    """ runs tests with the trained Random Forest model, with each pair of intents in the campi dataset """
    print "MODEL TEST USING CAMPI"

    dset = dataset.read('contradictions', 'campi')
    intents = []
    for case in dset['intents']:
        # if case['university'] not in intents:
        #     intents[case['university']] = []
        intents.append((case['university'], case['text'], case['nile']))

    model = ClassificationModel('forest')
    results = []
    if model.load_model(10000):
        # for (uni, intents) in intents.items():
        for i in range(len(intents)):
            (uni_stn, text_stn, sentence) = intents[i]
            for j in range(i + 1, len(intents)):
                (uni_hyp, text_hyp, hypothesis) = intents[j]
                if sentence != hypothesis:
                    results.append(
                        (uni_stn, uni_hyp, text_stn, text_hyp, sentence,
                         hypothesis,
                         model.predict([get_features(sentence, hypothesis)])))

        with open(
                config.CONTRADICTIONS_RESULTS_PATH.format('summary', 'campi'),
                'wb') as csvfile:
            csv_writer = csv.writer(csvfile, delimiter=',')
            csv_writer.writerow([
                'university stn', 'university hyp', 'text stn', 'text hyp',
                'sentence', 'hypothesis', 'prediction'
            ])
            for (uni_stn, uni_hyp, text_stn, text_hyp, sentence, hypothesis,
                 prediction) in results:
                csv_writer.writerow([
                    uni_stn, uni_hyp, text_stn, text_hyp, sentence, hypothesis,
                    prediction[0]
                ])
    else:
        print "Problem loading model"
def output_prob(text, end_to_end=e2e, state=state):
    if not state["model_loaded"]:
        logger.info("Loading model")
        state["ftModel"] = ClassificationModel(path=model_path)
        state["model_loaded"] = True
    report_text = "IMPRESSION: " + text + "\nEND OF IMPRESSION"
    processed_report_text, ground_truth = e2e.transform([report_text])[0]
    logger.info(processed_report_text)
    processed_report_text = " ".join(processed_report_text)
    prediction = state["ftModel"].predict(processed_report_text)
    logger.info(prediction)
    return (processed_report_text, ground_truth, prediction)
示例#13
0
def validate(dataset_size, model_type):
    """ runs cross validation in classification model """
    print "MODEL VALIDATION", dataset_size, model_type

    dset = dataset.read('contradictions', dataset_size)
    data, targets = [], []
    for case in dset['content']:
        data.append(get_features(case['sentence'], case['hypothesis']))
        targets.append(case['contradiction'])

    model = ClassificationModel(model_type)
    scores = model.cross_validate(data, targets)
    print "scores", scores

    print "FIT TIME", scores['fit_time']
    print "VALIDATION TIME", scores['score_time']
    print "PRECISION", scores['test_precision_macro']
    print "RECALL", scores['test_recall_macro']
    print "F1 SCORE", scores['test_f1_macro']
    return scores['fit_time'], scores['score_time'], scores[
        'test_precision_macro'], scores['test_recall_macro'], scores[
            'test_f1_macro']
示例#14
0
def test(dataset_size, model_type):
    """ opens fit dataset and trains SVM/LogReg/Forest model with it, then tests it"""
    print "MODEL TEST", dataset_size, model_type

    dset = dataset.read('contradictions', dataset_size)
    data, targets = [], []
    for case in dset['content']:
        data.append(case)
        targets.append(case['contradiction'])

    fit_data, test_data = [], []
    fit_cases, test_cases, fit_target, test_target = train_test_split(
        data, targets, test_size=0.25, shuffle=True, random_state=0)
    for fit_case in fit_cases:
        fit_data.append(
            get_features(fit_case['sentence'], fit_case['hypothesis']))

    for test_case in test_cases:
        test_data.append(
            get_features(test_case['sentence'], test_case['hypothesis']))

    model = ClassificationModel(model_type)
    start_time = time.time()
    model.train(fit_data, fit_target, dataset_size)
    elapsed_time = time.time() - start_time
    test_results = model.test(test_data)

    with open(
            config.CONTRADICTIONS_RESULTS_PATH.format(dataset_size,
                                                      model_type),
            'wb') as csvfile:
        csv_writer = csv.writer(csvfile, delimiter=',')
        csv_writer.writerow([
            'hypothesis', 'sentence', 'type', 'contradiction', 'prediction',
            'features'
        ])
        for (test_case, result, features) in zip(test_cases, test_results,
                                                 test_data):
            csv_writer.writerow([
                test_case['hypothesis'], test_case['sentence'],
                test_case['type'], test_case['contradiction'], result, features
            ])

    precision = metrics.precision_score(test_target, test_results)
    recall = metrics.recall_score(test_target, test_results)
    f1_score = metrics.f1_score(test_target, test_results)

    print "FIT TIME", elapsed_time
    print "PRECISION", precision
    print "RECALL", recall
    print "F1 SCORE", f1_score
    model.save(dataset_size)
示例#15
0
def roc_curve(dataset_size):
    """ runs cross validation to plot precision recall curve """
    print "ROC CURVE", dataset_size

    dset = dataset.read('contradictions', dataset_size)
    data, targets = [], []
    for case in dset['content']:
        data.append(get_features(case['sentence'], case['hypothesis']))
        targets.append(case['contradiction'])

    for mtype in ['svm', 'log', 'forest']:
        model = ClassificationModel(mtype)
        plot = plotter.plot_roc_curve(dataset_size, mtype, model, data,
                                      targets)
        plot.savefig("../res/plot/roc_{}_{}.pdf".format(dataset_size, mtype),
                     bbox_inches='tight')
def train():
    # 加载数据
    tokenizer = Tokenizer(cfg.char2idx)
    train_dataset = CustomDataset(cfg.train_data_path, tokenizer, cfg)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=cfg.batch_size,
                                  collate_fn=padding,
                                  shuffle=True,
                                  num_workers=4,
                                  pin_memory=True)
    model = ClassificationModel(len(cfg.char2idx))
    # model = load_pretrained_bert(model, cfg.pretrained_model_path, keep_tokens=cfg.keep_tokens).to(cfg.device)
    model = load_custom_model(model, cfg.save_model_path).to(cfg.device)

    loss_function = nn.CrossEntropyLoss().to(cfg.device)
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg.learn_rate)
    # 迭代训练
    iteration, train_loss = 0, 0
    model.train()
    for inputs, mask, targets in tqdm(train_dataloader, position=0,
                                      leave=True):
        inputs, mask, targets = inputs.to(cfg.device), mask.to(
            cfg.device), targets.to(cfg.device)
        prediction = model(inputs, mask)
        loss = loss_function(prediction, targets.reshape(-1))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        iteration += 1

        if iteration % cfg.print_loss_steps == 0:
            eval_loss = evaluate(model, tokenizer, loss_function)
            print('')
            print('train_loss:{}'.format(train_loss / cfg.print_loss_steps))
            print('evalu_loss:{}'.format(eval_loss))
            accuracy(model, tokenizer, cfg.valid_data_path)
            accuracy(model, tokenizer, cfg.test_data_path)
            model.train()
            train_loss = 0

        if iteration % cfg.save_model_steps == 0:
            torch.save(model.state_dict(), cfg.save_model_path)
示例#17
0
    # set hyper-parameters from yaml file
    hp.set_hparam_yaml(case=args.case)

    # dataflow
    audio_meta = AudioMeta(hp.train.data_path)
    if args.remote:
        df = get_remote_dataflow(args.port, hp.train.batch_size)
    else:
        df = DataLoader(audio_meta, hp.train.batch_size).dataflow(nr_prefetch=5000, nr_thread=int(multiprocessing.cpu_count() // 1.5))

    # set logger for event and model saver
    logger.set_logger_dir(hp.logdir)
    if True:
        train_conf = TrainConfig(
            model=ClassificationModel(num_classes=audio_meta.num_speaker, **hp.model),
            data=FlexibleQueueInput(df, capacity=500),
            callbacks=[
                ModelSaver(checkpoint_dir=hp.logdir),
                EvalCallback()
            ],
            steps_per_epoch=hp.train.steps_per_epoch,
            # session_config=session_config
        )

        ckpt = args.ckpt if args.ckpt else tf.train.latest_checkpoint(hp.logdir)
        if ckpt and not args.r:
            train_conf.session_init = SaverRestore(ckpt)

        if args.gpu:
            os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
import torch
from model import ClassificationModel
from torchvision import transforms
from config import classes
from PIL import Image

modelpath = 'epoch=9-step=1769.ckpt'
model = ClassificationModel()
model.load_state_dict(torch.load(modelpath)['state_dict'], strict=False)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

transform = transforms.Compose([
    transforms.ToTensor(),
])


def check_for_gloves(image):
    with torch.no_grad():
        image = transform(image)
        print(image.shape)
        image = image.unsqueeze(0)
        print(image.shape)
        image = image.to(device)
        print(image.shape)
        output = model(image)

    op, predicted = torch.max(output.data, 1)
    print(predicted.item(), classes[predicted.item()], op.item())
    return predicted.item(), classes[predicted.item()], op.item()
示例#19
0
from konlpy.tag import Komoran
from tensorflow.keras import Model

from model import ClassificationModel, input_shape

if __name__ == "__main__":
    komoran = Komoran()

    model_parent = ClassificationModel()
    model = model_parent.build_model()
    embedding = model_parent.embedding

    model.load_weights("curse_detection/weights-short.h5")

    att_model = Model(inputs=[model.input], outputs=model.layers[10].output)

    while True:
        inp = input(':')
        inp, mask = embedding([komoran.morphs(inp)])
        out = model.predict((inp, mask)).squeeze(1)
        att = att_model.predict((inp, mask))[1].squeeze(2)
        print(att)
        print(out)
import tensorflow as tf
from data_loader import DataLoader
from model import ClassificationModel
from config import *

# Create tensorflow session
sess = tf.Session()
# Build model graph
model = ClassificationModel(sess, "DBC")
# Initialize the model graph
sess.run(tf.global_variables_initializer())

# Build dataset pipeline graph
train_dataset = DataLoader(BATCH_SIZE)
# Get end of dataset pipeline
img, labels = train_dataset.get_train_data()
img_val, label_val = train_dataset.get_val_data()


epoch = 0
iter = 0
while True:
    try:
        # Fetch the dataset (tf.Tensor -> numpy array)
        _img, _label = sess.run([img, labels])
        # print(_img.shape)
        # print(_img[0].shape)
        # print(_label)
        # import cv2
        # cv2.imshow("img", _img[0])
        # cv2.waitKey(0)