예제 #1
0
    def __init__(self, sess, args):
        self.sess = sess
        self.phase = args.phase
        self.continue_train = args.continue_train
        self.data_dir = args.data_dir
        self.log_dir = args.log_dir
        self.ckpt_dir = args.ckpt_dir
        self.sample_dir = args.sample_dir
        self.test_dir = args.test_dir
        self.epoch = args.epoch
        self.batch_size = args.batch_size
        self.input_size = args.input_size
        self.image_c = args.image_c
        self.label_n = args.label_n
        self.nf = args.nf
        self.lr = args.lr
        self.beta1 = args.beta1
        self.sample_step = args.sample_step
        self.log_step = args.log_step
        self.ckpt_step = args.ckpt_step

        # hyper parameter for building module
        OPTIONS = namedtuple('options',
                             ['batch_size', 'nf', 'label_n', 'phase'])
        self.options = OPTIONS(self.batch_size, self.nf, self.label_n,
                               self.phase)

        # build model & make checkpoint saver
        self.build_model()
        self.saver = tf.train.Saver()

        # labels
        self.labels_dic = util.get_labels(os.path.join('data', 'labels.xlsx'))
예제 #2
0
    def eval(self, batches):
        loss = 0.0
        count = 0
        (tp, fp, tn, fn) = (0, 0, 0, 0)
        print("Evaluator ... ")
        for batch in batches:
            pairs = util.get_tuples(batch, volatile=True)
            labels = util.get_labels(batch, volatile=True)
            print("batch {}: \n".format(count))
            print("\t pairs: ", pairs)
            #           print("\t labels: ",labels)
            #            try:
            score = self.model.forward(pairs).squeeze()
            print("\t computed score: {}".format(score))
            print("\t actual labels : {}".format(labels))

            (tp, fp, tn, fn) = self.compare(count, score, labels,
                                            (tp, fp, tn, fn))

            print("score (at batch {}): {}".format(count,
                                                   score).encode('utf-8'))
            loss += self.criterion(score, labels).data.cpu().numpy()[0]
            print("loss (after {} batches): {}".format(count,
                                                       loss).encode('utf-8'))
            #            except:
            #                print("Error in evaluation {}".format(sys.exc_info()))
            count += 1

        print("Counts: (TP,FP,TN,FN): ", (tp, fp, tn, fn))

        print("Precision: ", tp / (tp + fp))
        print("Recall:    ", tp / (tp + fn))

        return loss / count
예제 #3
0
def generate_toy(num_x):

    # get the x features
    df = DataFrame(np.random.randn(cts.num_examples, num_x), columns=util.get_feature_names(cts.x, num_x))
    df = concat([df, df.apply(lambda row: Series(util.get_labels(row)), axis=1)], axis=1)

    return df
예제 #4
0
def main():
    model_name = 'generator_e_59'
    model_name = os.path.join(pp.MODEL_SAVES, model_name)
    model = Generator()
    chainer.serializers.load_npz(model_name, model)

    num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TEST)
    all_names = np.array(util.get_names_h5_file(pp.FC6_TEST_H5))

    y_tmp = np.zeros((num_features, 32 * 32 * 3), dtype=np.float32)
    target_tmp = np.zeros((num_features, 32 * 32 * 3), dtype=np.float32)

    save_list_names = os.listdir('/home/gabi/Documents/temp_datasets/test_celeba_reconstruction_m99')
    save_list_names = [i.split('_')[0]+'.jpg' for i in save_list_names]
    # save_list = random.sample(xrange(num_features), 100)
    # save_list_names = [''] * 100
    cnt = 0



    # for i in save_list:
    #     save_list_names[cnt] = util.sed_line(pp.CELEB_FACES_FC6_TEST, i).strip().split(',')[0]
    #     cnt += 1

    cnt = 0
    for i in all_names:
        features = util.get_features_h5_in_batches([i], train=False)
        features = util.to_correct_input(features)
        labels = util.get_labels([i])
        labels = np.asarray(labels, dtype=np.float32)
        target_tmp[cnt] = labels

        with chainer.using_config('train', False):
            f = np.expand_dims(features[0], 0)
            prediction = model(f)
            y_tmp[cnt] = prediction.data[0]
            if i in save_list_names:
                util.save_image(prediction, i, epoch=0)
                print("image '%s' saved" % i)

        cnt += 1

    # calculate validation loss
    y_tmp.astype(np.float32)
    target_tmp.astype(np.float32)
    loss = chainer.functions.mean_absolute_error(y_tmp, target_tmp)
    print('model: ', model_name, ' loss model: ', loss)
예제 #5
0
    def fprop(self,batch, volatile=False):

        pairs= util.get_tuples(batch,volatile)
        labels = util.get_labels(batch,volatile)
        
#        print("fprop ==> pairs: {}; labels {}".format(pairs,labels))
        
        score = self.model.forward(pairs).squeeze()
        
#        print("fprop ==> score: {}".format(score))
#        print("fprop ==> score: ",score)
        
        loss = self.criterion(score,labels)
        
#        print("fprop ==> loss: {}".format(loss))
#        print("fprop ==> loss: ",loss)

        return loss
예제 #6
0
num_svms=6
width=0.5

svmList = [None]*num_svms
trainfeatList = [None]*num_svms
traindatList = [None]*num_svms
trainlabList = [None]*num_svms
trainlabsList = [None]*num_svms
kernelList = [None]*num_svms

for i in range(num_svms):
	pos=util.get_realdata(True)
	neg=util.get_realdata(False)
	traindatList[i] = concatenate((pos, neg), axis=1)
	trainfeatList[i] = util.get_realfeatures(pos, neg)
	trainlabsList[i] = util.get_labels(True)
	trainlabList[i] = util.get_labels()
	kernelList[i] = GaussianKernel(trainfeatList[i], trainfeatList[i], width)
	svmList[i] = LibSVM(10, kernelList[i], trainlabList[i])

for i in range(num_svms):
	print "Training svm nr. %d" % (i)
	currentSVM = svmList[i]
	currentSVM.train()
	print currentSVM.get_num_support_vectors()
	print "Done."
	x, y, z=util.compute_output_plot_isolines(
		currentSVM, kernelList[i], trainfeatList[i])
	subplot(num_svms/2, 2, i+1)
	pcolor(x, y, z, shading='interp')
	contour(x, y, z, linewidths=1, colors='black', hold=True)
예제 #7
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--data_dir",
        default="",
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the training files for the regard classification task.",
    )
    parser.add_argument(
        "--test_file",
        default=None,
        type=str,
        required=False,
        help="Test file, if None, defaults to `test.tsv` file in data_dir.")
    parser.add_argument(
        "--model_version",
        default=2,
        type=int,
        required=False,
        help="1 or 2.",
    )
    parser.add_argument(
        "--model_type",
        default=None,
        type=str,
        required=True,
        help="Model type selected in the list: " +
        ", ".join(MODEL_CLASSES.keys()),
    )
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help="Path to pre-trained model or shortcut name selected in the list: "
        + ", ".join(ALL_MODELS),
    )
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The output directory where the model predictions and checkpoints will be written.",
    )

    # Other parameters
    parser.add_argument(
        "--config_name",
        default="",
        type=str,
        help="Pretrained config name or path if not the same as model_name")
    parser.add_argument(
        "--tokenizer_name",
        default="",
        type=str,
        help="Pretrained tokenizer name or path if not the same as model_name",
    )
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help=
        "Where do you want to store the pre-trained models downloaded from s3",
    )
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help=
        "The maximum total input sequence length after tokenization. Sequences longer "
        "than this will be truncated, sequences shorter will be padded.",
    )
    parser.add_argument("--do_train",
                        action="store_true",
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action="store_true",
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--do_predict",
                        action="store_true",
                        help="Whether to run predictions on the test set.")
    parser.add_argument(
        "--evaluate_during_training",
        action="store_true",
        help="Whether to run evaluation during training at each logging step.",
    )
    parser.add_argument(
        "--do_lower_case",
        action="store_true",
        help="Set this flag if you are using an uncased model.")

    parser.add_argument("--per_gpu_train_batch_size",
                        default=8,
                        type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--per_gpu_eval_batch_size",
                        default=8,
                        type=int,
                        help="Batch size per GPU/CPU for evaluation.")
    parser.add_argument(
        "--gradient_accumulation_steps",
        type=int,
        default=1,
        help=
        "Number of updates steps to accumulate before performing a backward/update pass.",
    )
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay",
                        default=0.0,
                        type=float,
                        help="Weight decay if we apply some.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm",
                        default=1.0,
                        type=float,
                        help="Max gradient norm.")
    parser.add_argument("--num_train_epochs",
                        default=3.0,
                        type=float,
                        help="Total number of training epochs to perform.")
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help=
        "If > 0: set total number of training steps to perform. Override num_train_epochs.",
    )
    parser.add_argument("--warmup_steps",
                        default=0,
                        type=int,
                        help="Linear warmup over warmup_steps.")

    parser.add_argument("--logging_steps",
                        type=int,
                        default=50,
                        help="Log every X updates steps.")
    parser.add_argument("--save_steps",
                        type=int,
                        default=50,
                        help="Save checkpoint every X updates steps.")
    parser.add_argument(
        "--eval_all_checkpoints",
        action="store_true",
        help=
        "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number",
    )
    parser.add_argument("--no_cuda",
                        action="store_true",
                        help="Avoid using CUDA when available")
    parser.add_argument("--overwrite_output_dir",
                        action="store_true",
                        help="Overwrite the content of the output directory")
    parser.add_argument(
        "--overwrite_cache",
        action="store_true",
        help="Overwrite the cached training and evaluation sets")
    parser.add_argument("--seed",
                        type=int,
                        default=42,
                        help="random seed for initialization")

    parser.add_argument(
        "--fp16",
        action="store_true",
        help=
        "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
    )
    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help=
        "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
        "See details at https://nvidia.github.io/apex/amp.html",
    )
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="For distributed training: local_rank")
    parser.add_argument("--server_ip",
                        type=str,
                        default="",
                        help="For distant debugging.")
    parser.add_argument("--server_port",
                        type=str,
                        default="",
                        help="For distant debugging.")
    args = parser.parse_args()

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train and not args.overwrite_output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    set_seed(args)

    # Prepare regard classification task
    labels = get_labels(model_version=args.model_version)
    num_labels = len(labels)
    # Use cross entropy ignore index as padding label id so that only real label ids contribute to the loss later
    pad_token_label_id = CrossEntropyLoss().ignore_index

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
    config = config_class.from_pretrained(
        args.config_name if args.config_name else args.model_name_or_path,
        num_labels=num_labels,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    tokenizer = tokenizer_class.from_pretrained(
        args.tokenizer_name
        if args.tokenizer_name else args.model_name_or_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    model = model_class.from_pretrained(
        args.model_name_or_path,
        from_tf=bool(".ckpt" in args.model_name_or_path),
        config=config,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )

    if args.local_rank == 0:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                tokenizer,
                                                labels,
                                                pad_token_label_id,
                                                data_file=TRAIN_FILE_PATTERN,
                                                is_test=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer,
                                     labels, pad_token_label_id)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = (model.module if hasattr(model, "module") else model
                         )  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

    # Evaluation
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("pytorch_transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("Evaluate the following checkpoints: %s", checkpoints)
        for checkpoint in checkpoints:
            global_step = checkpoint.split(
                "-")[-1] if len(checkpoints) > 1 else ""
            model = model_class.from_pretrained(checkpoint)
            model.to(args.device)
            result, _ = evaluate(args,
                                 model,
                                 tokenizer,
                                 labels,
                                 pad_token_label_id,
                                 mode=DEV_FILE_PATTERN,
                                 prefix=global_step,
                                 is_test=False)
            if global_step:
                result = {
                    "{}_{}".format(global_step, k): v
                    for k, v in result.items()
                }
            results.update(result)
        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as writer:
            for key in sorted(results.keys()):
                writer.write("{} = {}\n".format(key, str(results[key])))

    if args.do_predict and args.local_rank in [-1, 0]:
        tokenizer = tokenizer_class.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        model = model_class.from_pretrained(args.model_name_or_path)
        model.to(args.device)
        if args.test_file:
            test_file = args.test_file
        elif os.path.exists(os.path.join(args.data_dir, TEST_FILE_PATTERN)):
            test_file = TEST_FILE_PATTERN
        else:
            raise NotImplementedError(
                "No test_file provided and %s DNE." %
                os.path.join(args.data_dir, TEST_FILE_PATTERN))
        result, predictions = evaluate(args,
                                       model,
                                       tokenizer,
                                       labels,
                                       pad_token_label_id,
                                       mode=test_file,
                                       is_test=True)
        test_file_basename = os.path.basename(test_file).split('.')[0]
        # Save predictions
        output_test_predictions_file = os.path.join(
            args.output_dir, test_file_basename + "_predictions.txt")
        with open(output_test_predictions_file, "w") as writer:
            with open(os.path.join(args.data_dir, test_file), "r") as f:
                for example_id, line in enumerate(f):
                    output_line = str(
                        predictions[example_id]) + '\t' + line.split(
                            '\t')[-1].strip() + "\n"
                    writer.write(output_line)

    return results
예제 #8
0
import util

util.set_title('KernelRidgeRegression')

width = 20

# positive examples
pos = util.get_realdata(True)
plot(pos[0, :], pos[1, :], "r.")

# negative examples
neg = util.get_realdata(False)
plot(neg[0, :], neg[1, :], "b.")

# train krr
labels = util.get_labels(type='regression')
train = util.get_realfeatures(pos, neg)
gk = GaussianKernel(train, train, width)
krr = KernelRidgeRegression()
krr.set_labels(labels)
krr.set_kernel(gk)
krr.set_tau(1e-3)
krr.train()

# compute output plot iso-lines
x, y, z = util.compute_output_plot_isolines(krr, gk, train, regression=True)

pcolor(x, y, z)
contour(x, y, z, linewidths=1, colors='black', hold=True)

connect('key_press_event', util.quit)
예제 #9
0
from util import get_labels
import lightgbm as lgb
from datetime import datetime

import matplotlib.pylab as plt
#matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 12, 4
import numpy as np
import pandas as pd

np.random.seed(seed=SEED)

n_estimators = 100

labels = get_labels()

for net in networks.keys():
    print(f'Loading training data for {net}...')
    with open(f'bottleneck_features_avg/{net}_avg_features_train.npy',
              'rb') as f:
        x_train = np.load(f)
        print(f'Features shape: {x_train.shape}')

        le = LabelEncoder()
    le.fit(labels['breed'])
    y_train = le.transform(labels['breed'])

    print('Creating train/val split...')
    x_train, x_val, y_train, y_val = train_test_split(x_train,
                                                      y_train,
import util

util.set_title('KernelRidgeRegression')

width=20

# positive examples
pos=util.get_realdata(True)
plot(pos[0,:], pos[1,:], "r.")

# negative examples
neg=util.get_realdata(False)
plot(neg[0,:], neg[1,:], "b.")

# train svm
labels = util.get_labels(type='regression')
train = util.get_realfeatures(pos, neg)
gk=GaussianKernel(train, train, width)
krr = KernelRidgeRegression()
krr.set_labels(labels)
krr.set_kernel(gk)
krr.set_tau(1e-3)
krr.train()

# compute output plot iso-lines
x, y, z=util.compute_output_plot_isolines(krr, gk, train, regression=True)

pcolor(x, y, z, shading='interp')
contour(x, y, z, linewidths=1, colors='black', hold=True)

connect('key_press_event', util.quit)
예제 #11
0
        return G, node_sentences_pair


if __name__ == '__main__':

    print('### Tokenizing and Preprocessing ###')
    G, node_sentences_pair = GenerateGraph(
        train_input_h='corpora/assin+msr/train/train-h.txt',
        train_input_t='corpora/assin+msr/train/train-t.txt',
        test_input_h='corpora/assin+msr/test/test-h.txt',
        test_input_t='corpora/assin+msr/test/test-t.txt',
    ).create_graph()

    print(len(node_sentences_pair))
    train_labels = util.get_labels('corpora/assin+msr/labels-train.txt')
    test_labels = util.get_labels('corpora/assin+msr/labels-test.txt')
    reg = Regularization()
    reg.regulariza(G,
                   node_sentences_pair,
                   train_labels,
                   '',
                   total_pre_anotados=0.4,
                   method='llgc')

    df_train = pd.read_csv("features_2800_pre_anotados_train.csv")
    df_test = pd.read_csv("features_2800_pre_anotados_test.csv")

    clf = MLPClassifier(solver='adam',
                        hidden_layer_sizes=(20, 20),
                        random_state=42,
예제 #12
0
"""
train, valid, test = get_dis(data_dir, prefix, params.corpus)
word_vec = build_vocab(train['s1'] + train['s2'] +
                       valid['s1'] + valid['s2'] +
                       test['s1'] + test['s2'], glove_path)

# unknown words instead of map to <unk>, this directly takes them out
for split in ['s1', 's2']:
    for data_type in ['train', 'valid', 'test']:
        eval(data_type)[split] = np.array([['<s>'] +
                                           [word for word in sent.split() if word in word_vec] +
                                           ['</s>'] for sent in eval(data_type)[split]])

params.word_emb_dim = 300

dis_labels = get_labels(params.corpus)
label_size = len(dis_labels)

"""
MODEL
"""
# model config
config_dis_model = {
    'n_words': len(word_vec),
    'word_emb_dim': params.word_emb_dim,
    'enc_lstm_dim': params.enc_lstm_dim,
    'n_enc_layers': params.n_enc_layers,
    'dpout_emb': params.dpout_emb,
    'dpout_model': params.dpout_model,
    'dpout_fc': params.dpout_fc,
    'fc_dim': params.fc_dim,
예제 #13
0
num_svms = 6
width = 0.5

svmList = [None] * num_svms
trainfeatList = [None] * num_svms
traindatList = [None] * num_svms
trainlabList = [None] * num_svms
trainlabsList = [None] * num_svms
kernelList = [None] * num_svms

for i in range(num_svms):
    pos = util.get_realdata(True)
    neg = util.get_realdata(False)
    traindatList[i] = concatenate((pos, neg), axis=1)
    trainfeatList[i] = util.get_realfeatures(pos, neg)
    trainlabsList[i] = util.get_labels(True)
    trainlabList[i] = util.get_labels()
    kernelList[i] = GaussianKernel(trainfeatList[i], trainfeatList[i], width)
    svmList[i] = LibSVM(10, kernelList[i], trainlabList[i])

for i in range(num_svms):
    print "Training svm nr. %d" % (i)
    currentSVM = svmList[i]
    currentSVM.train()
    print currentSVM.get_num_support_vectors()
    print "Done."
    x, y, z = util.compute_output_plot_isolines(currentSVM, kernelList[i],
                                                trainfeatList[i])
    subplot(num_svms / 2, 2, i + 1)
    pcolor(x, y, z)
    contour(x, y, z, linewidths=1, colors='black', hold=True)
예제 #14
0
from keras.applications import inception_v3
from keras.models import load_model
from tqdm import tqdm
from util import get_labels, get_images
import csv
import numpy as np

# Define constants
INPUT_SIZE = 299
fname = 'model1_finetune.h5'
nr_predictions = 10357

# Get ids, label names and images
print('Load data...')
labels = get_labels().sort_values(by=['breed']).breed.unique()
ids = []
images = np.zeros((nr_predictions, INPUT_SIZE, INPUT_SIZE, 3), dtype='float16')
for i, (img, img_id) in tqdm(enumerate(get_images('test', INPUT_SIZE))):
    x = inception_v3.preprocess_input(np.expand_dims(img, axis=0))
    images[i] = x
    ids.append(img_id)

# Load model weights
print(f'Load model from {fname}')
model = load_model(fname)

# Make predictions on input images
print('Predict...')
predictions = model.predict(images, verbose=1)
예제 #15
0
def training():
    print('setting up...')

    if pc.TRAIN:
        num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TRAIN)
        # num_features = util.get_number_of_features_from_train(pp.CELEB_FACES_FC6_TRAIN)  # for server
        all_names = np.array(util.get_names_h5_file(pp.FC6_TRAIN_H5))
        path_images = pp.CELEB_FACES_FC6_TRAIN
    else:
        num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TEST)
        all_names = np.array(util.get_names_h5_file(pp.FC6_TEST_H5))
        path_images = pp.CELEB_FACES_FC6_TEST

    total_steps = num_features / pc.BATCH_SIZE
    mask_L_sti = util.get_L_sti_mask()

    # ----------------------------------------------------------------
    # GENERATOR
    generator = Generator()
    # generator = GeneratorPaper()
    generator_train_loss = np.zeros(pc.EPOCHS)
    generator_optimizer = chainer.optimizers.Adam(alpha=0.0002,
                                                  beta1=0.9,
                                                  beta2=0.999,
                                                  eps=10**-8)
    generator_optimizer.setup(generator)
    # ----------------------------------------------------------------
    # DISCRIMINATOR
    discriminator = Discriminator()
    # discriminator = DiscriminatorPaper()
    discriminator_train_loss = np.zeros(pc.EPOCHS)
    discriminator_optimizer = chainer.optimizers.Adam(alpha=0.0002,
                                                      beta1=0.9,
                                                      beta2=0.999,
                                                      eps=10**-8)
    discriminator_optimizer.setup(discriminator)
    # ----------------------------------------------------------------
    # VGG16 FOR FEATURE LOSS
    vgg16 = VGG16Layers()
    # ----------------------------------------------------------------

    save_list = random.sample(xrange(num_features), 20)
    save_list_names = [''] * 20
    cnt = 0

    for i in save_list:
        save_list_names[cnt] = util.sed_line(path_images,
                                             i).strip().split(',')[0]
        cnt += 1

    ones1 = util.make_ones(generator)
    zeros = util.make_zeros(generator)

    print('training...')
    for epoch in range(pc.EPOCHS):

        # shuffle training instances
        order = range(num_features)
        random.shuffle(order)

        names_order = all_names[order]
        train_gen = True
        train_dis = True

        print('epoch %d' % epoch)
        for step in range(total_steps):
            names = names_order[step * pc.BATCH_SIZE:(step + 1) *
                                pc.BATCH_SIZE]
            features = util.get_features_h5_in_batches(names, train=pc.TRAIN)
            features = util.to_correct_input(features)
            labels_32, labels_224 = util.get_labels(names)
            # labels_32 = util.get_labels(names)
            # vgg16_features = util.get_features_h5_in_batches(names, train=pc.TRAIN, which_features='vgg16')
            # vgg16_features = util.to_correct_input(vgg16_features)
            # labels_32 = np.asarray(labels_32, dtype=np.float32)

            with chainer.using_config('train', train_gen):
                generator.cleargrads()
                prediction = generator(chainer.Variable(features))

            with chainer.using_config('train', train_dis):
                discriminator.cleargrads()
                print('prediction shape', np.shape(prediction.data))
                data = np.reshape(
                    generator(chainer.Variable(features)).data,
                    (pc.BATCH_SIZE, 32, 32, 3))
                data = np.transpose(data, (0, 3, 1, 2))
                fake_prob = discriminator(chainer.Variable(data))

                other_data = np.reshape(labels_32, (pc.BATCH_SIZE, 32, 32, 3))
                other_data = np.transpose(other_data, (0, 3, 1, 2))
                real_prob = discriminator(chainer.Variable(other_data))

                feature_truth = vgg16(labels_224,
                                      layers=['conv3_3'])['conv3_3']
                feature_reconstruction = vgg16(util.fix_prediction_for_vgg16(
                    prediction, vgg16),
                                               layers=['conv3_3'])['conv3_3']
                # feature_reconstruction = None
                # ----------------------------------------------------------------
                # CALCULATE LOSS
                lambda_adv = 10**2
                lambda_sti = 2 * (10**-6)
                lambda_fea = 10**-2
                l_adv = lambda_adv * F.sigmoid_cross_entropy(
                    fake_prob, ones1.data)
                # TODO: mask is probably breaking the graph, fix this
                thing_1 = util.apply_mask(labels_32, mask_L_sti)
                thing_2 = util.apply_mask(prediction.data, mask_L_sti)
                l_sti = lambda_sti * F.mean_squared_error(thing_1, thing_2)
                l_fea = lambda_fea * F.mean_squared_error(
                    feature_truth, feature_reconstruction)
                generator_loss = l_adv + l_sti + l_fea
                generator_loss.backward()
                generator_optimizer.update()
                generator_train_loss[epoch] += generator_loss.data

                lambda_dis = 10**2
                discriminator_loss = lambda_dis * (
                    F.sigmoid_cross_entropy(real_prob, ones1.data) +
                    F.sigmoid_cross_entropy(fake_prob, zeros.data))
                discriminator_loss.backward()
                discriminator_optimizer.update()
                discriminator_train_loss[epoch] += discriminator_loss.data

                # ----------------------------------------------------------------
                # when to suspend / resume training
                dis_adv_ratio = discriminator_loss.data / l_adv.data

                if dis_adv_ratio < 0.1:
                    train_dis = False
                if dis_adv_ratio > 0.5:
                    train_dis = True

                if dis_adv_ratio > 10:
                    train_gen = False
                if dis_adv_ratio < 2:
                    train_gen = True

                # print('%d/%d %d/%d  generator: %f   l_adv: %f  l_sti: %f   discriminator: %f  l3: %f  l4: %f' % (
                # epoch, pc.EPOCHS, step, total_steps, generator_loss.data, l_adv.data, l_sti.data, discriminator_loss.data,
                # l3.data, l4.data))
                print(
                    '%d/%d %d/%d  generator: %f   l_adv: %f  l_sti: %f   l_fea: %f    discriminator: %f    dis/adv: %f'
                    % (epoch, pc.EPOCHS, step, total_steps,
                       generator_loss.data, l_adv.data, l_sti.data, l_fea.data,
                       discriminator_loss.data, dis_adv_ratio))

                # information = util.update_information(information1, step, generator_loss.data, l_adv.data, l_sti.data)
                # information = util.update_information(information2, step, discriminator_loss.data, l3.data, l4.data)

                # visualizing loss
                # prev_max_ax1 = util.plot_everything(information1, fig1, lines1, ax1, prev_max_ax1, step)
                # prev_max_ax2 = util.plot_everything(information2, fig2, lines2, ax2, prev_max_ax2, step)

            with chainer.using_config('train', False):
                for i in range(len(names)):
                    if names[i] in save_list_names:
                        f = np.expand_dims(features[i], 0)
                        prediction = generator(f)
                        util.save_image(prediction, names[i], epoch,
                                        pp.RECONSTRUCTION_FOLDER)
                        print("image '%s' saved" % names[i])

        # if (epoch+1) % pc.SAVE_EVERY_N_STEPS == 0:
        #     util.save_model(generator, epoch)

        generator_train_loss[epoch] /= total_steps
        print(generator_train_loss[epoch])

        discriminator_train_loss[epoch] /= total_steps
        print(discriminator_train_loss[epoch])
예제 #16
0
    def create_db(self, rpt_file, db_file):
        import sqlite3
        con = sqlite3.connect(db_file)
        # con.execute("PRAGMA foreign_keys = ON")

        # Create table
        con.execute("""
        CREATE TABLE IF NOT EXISTS nodes
        (id INTEGER PRIMARY KEY,
         parent INTEGER,
         last INTEGER,
         name TEXT,
         cell TEXT,
         internal REAL,
         switching REAL,
         leakage REAL,
         total REAL,
         FOREIGN KEY(parent) REFERENCES nodes(id),
         FOREIGN KEY(last) REFERENCES nodes(id))
        """)

        header = []

        with open(rpt_file) as f:
            hier = []
            rows = []
            skip_header = True
            for k, line in enumerate(f):
                line_num = k + 1
                if skip_header:
                    if "----" in line:
                        skip_header = False
                        temp = []
                        for header_line in reversed(header):
                            if header_line.strip() == '':
                                break
                            temp.append(header_line)
                        labels = get_labels("\n".join(reversed(temp)))
                        print(labels)
                    else:
                        header.append(line)
                    continue

                if line == "1\n":
                    print(f"Done on line {line_num}")
                    break

                info = line.split()
                if len(info) == 6:
                    cell = None
                    name, internal, switching, leakage, total, percent = info
                elif len(info) == 7:
                    name, cell, internal, switching, leakage, total, percent = info
                elif len(info) == 10:
                    cell = None
                    name, internal, switching, leakage, peak_power, peak_time, glitch_power, x_tran_power, total, percent = info
                elif len(info) == 11:
                    name, cell, internal, switching, leakage, peak_power, peak_time, glitch_power, x_tran_power, total, percent = info
                else:
                    raise NotImplementedError(line)

                if cell is not None:
                    cell = cell.lstrip("(").rstrip(")")
                if total == "N/A":
                    total = None

                info = {
                    "indent": len(line) - len(line.lstrip(' ')),
                    "id": line_num,
                    "name": name,
                    "cell": cell,
                    "internal": internal,
                    "switching": switching,
                    "leakage": leakage,
                    "total": total,
                }

                while len(hier) > 0 and info["indent"] <= hier[-1]["indent"]:
                    node = hier.pop()
                    node["last"] = line_num - 1
                    node["parent"] = hier[-1]["id"] if len(hier) > 0 else None
                    rows.append((
                        node["id"],
                        node["parent"],
                        node["last"],
                        node["name"],
                        node["cell"],
                        node["internal"],
                        node["switching"],
                        node["leakage"],
                        node["total"],
                    ))

                if len(rows) > self.batch_size:
                    con.executemany(
                        "INSERT INTO nodes VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
                        rows)
                    rows.clear()

                hier.append(info)

            while len(hier) > 0:
                node = hier.pop()
                node["last"] = line_num - 1
                node["parent"] = hier[-1]["id"] if len(hier) > 0 else None
                rows.append((
                    node["id"],
                    node["parent"],
                    node["last"],
                    node["name"],
                    node["cell"],
                    node["internal"],
                    node["switching"],
                    node["leakage"],
                    node["total"],
                ))

            con.executemany(
                "INSERT INTO nodes VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", rows)

        con.commit()
        foreign_key_failures = con.execute(
            "PRAGMA foreign_key_check").fetchall()
        if len(foreign_key_failures) > 0:
            raise sqlite3.IntegrityError(
                f"Failed foreign key checks: {foreign_key_failures}")

        # print(f"split:   {time_split}")
        # print(f"update:  {time_update}")
        return con
예제 #17
0
    def create_df(self, rpt_file, db_file):
        header = []

        with open(rpt_file) as f:
            hier = []
            rows = []
            skip_header = True
            for k, line in enumerate(f):
                line_num = k + 1
                if skip_header:
                    if "----" in line:
                        skip_header = False
                        temp = []
                        for header_line in reversed(header):
                            if header_line.strip() == '':
                                break
                            temp.append(header_line)
                        labels = get_labels("\n".join(reversed(temp)))
                        print(labels)
                    else:
                        header.append(line)
                    continue

                if line == "1\n":
                    print(f"Done on line {line_num}")
                    break

                info = line.split()
                if len(info) == 6:
                    cell = None
                    name, internal, switching, leakage, total, percent = info
                elif len(info) == 7:
                    name, cell, internal, switching, leakage, total, percent = info
                elif len(info) == 10:
                    cell = None
                    name, internal, switching, leakage, peak_power, peak_time, glitch_power, x_tran_power, total, percent = info
                elif len(info) == 11:
                    name, cell, internal, switching, leakage, peak_power, peak_time, glitch_power, x_tran_power, total, percent = info
                else:
                    raise NotImplementedError(line)

                if cell is not None:
                    cell = cell.lstrip("(").rstrip(")")

                info = {
                    "indent": len(line) - len(line.lstrip(' ')),
                    "id": line_num,
                    "name": name,
                    "cell": cell,
                    "internal": None if internal == "N/A" else float(internal),
                    "switching":
                    None if switching == "N/A" else float(switching),
                    "leakage": None if leakage == "N/A" else float(leakage),
                    "total": None if total == "N/A" else float(total),
                }

                while len(hier) > 0 and info["indent"] <= hier[-1]["indent"]:
                    node = hier.pop()
                    node["last"] = line_num - 1
                    node["parent"] = hier[-1]["id"] if len(hier) > 0 else None
                    rows.append(node)

                hier.append(info)

            while len(hier) > 0:
                node = hier.pop()
                node["last"] = line_num - 1
                node["parent"] = hier[-1]["id"] if len(hier) > 0 else None
                rows.append(node)

        return pd.DataFrame(rows)
예제 #18
0
n_pre_epochs = 10
n_epochs = 100
batch_size = 32
n_images = 300
USE_PCA = True
USE_GENSEL = False
USE_AUTOENC = False #TODO
USE_ICA = True
USE_CANNY = True
USE_CORNERHARRIS = True
TRAIN = False
CHANNELS = 3

# Load labels
print('Load labels...')
labels = get_labels()[:n_images]

# Load training data
print('Load training data...')
x_train = np.zeros((n_images, INPUT_SIZE, INPUT_SIZE, 3), dtype=K.floatx())
for i, (img, img_id) in tqdm(enumerate(get_images('train', INPUT_SIZE, amount=n_images))):
    x = inception_v3.preprocess_input(np.expand_dims(img, axis=0))
    x_train[i] = x
y_train = one_hot(labels['breed'].values, num_classes=NUM_CLASSES)

# Arguments of ImageDataGenerator define types of augmentation to be performed
# E.g: Horizontal flip, rotation, etc...
# no fitting required since we don't use centering/normalization/whitening
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=.2,
예제 #19
0
def preprocess(input, segmentation, extraction, use_letters, dim):
    segmented = segmentation.segment(input)
    feature_vector = extraction.extract(segmented)
    labels = get_labels(input.shape[1], input.shape[0], dim, dim, use_letters)

    return [feature_vector, labels]
예제 #20
0
class Classification:
    def __init__(self, model, features, labels):
        self.model = joblib.load(model)
        self.x = features
        self.y = labels

    def classifier(self):
        y_pred = self.model.predict(self.x)
        return classification_report(self.y, y_pred)


if __name__ == '__main__':

    # algorithms = ['bayes.pkl', 'lre.pkl', 'nn.pkl', 'svm.pkl', 'tree.pkl']
    algorithms = ['svm.pkl']
    # model = 'trained-model/assin_res_mod_skip300'
    model = 'trained-model/assin+msr_mod_glove50'
    # model = 'trained-model/mod_'
    # test = 'features/test/features-test-all1.txt'
    print('### Extracting features ###')
    features, _ = ExtractFeatures(
        model='model/glove50.txt',
        input_h='assin+msr/test/test-h.txt',
        input_t='assin+msr/test/test-t.txt').extract_features()
    # test = 'baseline/test/features-test.txt'
    labels = util.get_labels('assin+msr/labels-test.txt')
    print('### Classifying ###')
    for a in algorithms:
        print(Classification(model + a, features, labels).classifier())
예제 #21
0
def training():
    print('setting up...')
    # num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TRAIN)
    num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TEST)
    total_steps = num_features / pc.BATCH_SIZE
    # all_names = np.array(util.get_names_h5_file(pp.FC6_TRAIN_H5))
    all_names = np.array(util.get_names_h5_file(pp.FC6_TEST_H5))
    generator = Generator()
    train_loss = np.zeros(pc.EPOCHS)
    # generator = GeneratorPaper()
    optimizer = chainer.optimizers.Adam(alpha=0.0002,
                                        beta1=0.5,
                                        beta2=0.999,
                                        eps=10e-8)
    optimizer.setup(generator)
    save_list = random.sample(xrange(num_features), 20)
    save_list_names = [''] * 20
    cnt = 0

    for i in save_list:
        # save_list_names[cnt] = util.sed_line(pp.CELEB_FACES_FC6_TRAIN, i).strip().split(',')[0]
        save_list_names[cnt] = util.sed_line(pp.CELEB_FACES_FC6_TEST,
                                             i).strip().split(',')[0]
        cnt += 1

    print('training...')
    for epoch in range(pc.EPOCHS):

        # shuffle training instances
        order = range(num_features)
        random.shuffle(order)

        names_order = all_names[order]

        print('epoch %d' % epoch)
        for step in range(total_steps):
            # names, features = util.get_features_in_batches(step, train=True)
            names = names_order[step * pc.BATCH_SIZE:(step + 1) *
                                pc.BATCH_SIZE]
            # features = util.get_features_h5_in_batches(names, train=True)
            features = util.get_features_h5_in_batches(names, train=False)
            features = util.to_correct_input(features)
            labels = util.get_labels(names)
            labels = np.asarray(labels, dtype=np.float32)

            with chainer.using_config('train', True):
                generator.cleargrads()
                prediction = generator(features)
                loss = chainer.functions.mean_absolute_error(
                    prediction, labels)
                # print('loss', loss.data)
                print('%d/%d %d/%d loss: %f' %
                      (epoch, pc.EPOCHS, step, total_steps, float(loss.data)))
                loss.backward()
                optimizer.update()
                train_loss[epoch] += loss.data

            # with chainer.using_config('train', False):
            #     for i in range(len(names)):
            #         if names[i] in save_list_names:
            #             f = np.expand_dims(features[i], 0)
            #             prediction = generator(f)
            #             util.save_image(prediction, names[i], epoch)
            #             print("image '%s' saved" % names[i])

        if (epoch + 1) % pc.SAVE_EVERY_N_STEPS == 0:
            util.save_model(generator, epoch)

        train_loss[epoch] /= total_steps
        print(train_loss[epoch])
예제 #22
0
파일: prc.py 프로젝트: frx/shogun
from pylab import plot,grid,title,subplot,xlabel,ylabel,text,subplots_adjust,fill_between,mean,connect,show
from shogun.Kernel import GaussianKernel
from shogun.Classifier import LibSVM, LDA
from shogun.Evaluation import PRCEvaluation
import util

util.set_title('PRC example')
util.DISTANCE=0.5
subplots_adjust(hspace=0.3)

pos=util.get_realdata(True)
neg=util.get_realdata(False)
features=util.get_realfeatures(pos, neg)
labels=util.get_labels()

# classifiers
gk=GaussianKernel(features, features, 1.0)
svm = LibSVM(1000.0, gk, labels)
svm.train()
lda=LDA(1,features,labels)
lda.train()

## plot points
subplot(211)
plot(pos[0,:], pos[1,:], "r.")
plot(neg[0,:], neg[1,:], "b.")
grid(True)
title('Data',size=10)

# plot PRC for SVM
subplot(223)
def detect_object_in( model_path , label_map_path , video_src):
    PATH_TO_FROZEN_GRAPH = model_path + '/frozen_inference_graph.pb'
    PATH_TO_LABELS = label_map_path
    detection_graph = tf.Graph()

    with detection_graph.as_default():
      od_graph_def = tf.GraphDef()
      with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
    with detection_graph.as_default():
        with create_session()  as sess:
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {output.name for op in ops for output in op.outputs}
            tensor_dict = {}
            for key in [
                'num_detections', 'detection_boxes', 'detection_scores',
                'detection_classes', 'detection_masks'
                ]:
                tensor_name = key + ':0'

                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)

            cap = cv2.VideoCapture(video_src)
            video_running = True

            while True:
                ret , image_np = cap.read()

                output_dict = run_inference_for_single_image(sess,image_np, detection_graph, tensor_dict)

                detected_labels = get_labels(
                        output_dict['detection_boxes'],
                        output_dict['detection_classes'],
                        output_dict['detection_scores'],
                        category_index
                        )

                print(detected_labels)
                print("=================")
                vis_util.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        output_dict['detection_boxes'],
                        output_dict['detection_classes'],
                        output_dict['detection_scores'],
                        category_index,
                        instance_masks=output_dict.get('detection_masks'),
                        use_normalized_coordinates=True,
                        line_thickness=8)
                cv_image = cv2.resize(image_np, (800, 600))
                cv2.imshow("Press q to quit" , cv_image)

                if cv2.waitKey(25) & 0xFF == ord("q"):
                    cap.release()
                    cv2.destroyAllWindows()
                    break