def __init__(self, sess, args): self.sess = sess self.phase = args.phase self.continue_train = args.continue_train self.data_dir = args.data_dir self.log_dir = args.log_dir self.ckpt_dir = args.ckpt_dir self.sample_dir = args.sample_dir self.test_dir = args.test_dir self.epoch = args.epoch self.batch_size = args.batch_size self.input_size = args.input_size self.image_c = args.image_c self.label_n = args.label_n self.nf = args.nf self.lr = args.lr self.beta1 = args.beta1 self.sample_step = args.sample_step self.log_step = args.log_step self.ckpt_step = args.ckpt_step # hyper parameter for building module OPTIONS = namedtuple('options', ['batch_size', 'nf', 'label_n', 'phase']) self.options = OPTIONS(self.batch_size, self.nf, self.label_n, self.phase) # build model & make checkpoint saver self.build_model() self.saver = tf.train.Saver() # labels self.labels_dic = util.get_labels(os.path.join('data', 'labels.xlsx'))
def eval(self, batches): loss = 0.0 count = 0 (tp, fp, tn, fn) = (0, 0, 0, 0) print("Evaluator ... ") for batch in batches: pairs = util.get_tuples(batch, volatile=True) labels = util.get_labels(batch, volatile=True) print("batch {}: \n".format(count)) print("\t pairs: ", pairs) # print("\t labels: ",labels) # try: score = self.model.forward(pairs).squeeze() print("\t computed score: {}".format(score)) print("\t actual labels : {}".format(labels)) (tp, fp, tn, fn) = self.compare(count, score, labels, (tp, fp, tn, fn)) print("score (at batch {}): {}".format(count, score).encode('utf-8')) loss += self.criterion(score, labels).data.cpu().numpy()[0] print("loss (after {} batches): {}".format(count, loss).encode('utf-8')) # except: # print("Error in evaluation {}".format(sys.exc_info())) count += 1 print("Counts: (TP,FP,TN,FN): ", (tp, fp, tn, fn)) print("Precision: ", tp / (tp + fp)) print("Recall: ", tp / (tp + fn)) return loss / count
def generate_toy(num_x): # get the x features df = DataFrame(np.random.randn(cts.num_examples, num_x), columns=util.get_feature_names(cts.x, num_x)) df = concat([df, df.apply(lambda row: Series(util.get_labels(row)), axis=1)], axis=1) return df
def main(): model_name = 'generator_e_59' model_name = os.path.join(pp.MODEL_SAVES, model_name) model = Generator() chainer.serializers.load_npz(model_name, model) num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TEST) all_names = np.array(util.get_names_h5_file(pp.FC6_TEST_H5)) y_tmp = np.zeros((num_features, 32 * 32 * 3), dtype=np.float32) target_tmp = np.zeros((num_features, 32 * 32 * 3), dtype=np.float32) save_list_names = os.listdir('/home/gabi/Documents/temp_datasets/test_celeba_reconstruction_m99') save_list_names = [i.split('_')[0]+'.jpg' for i in save_list_names] # save_list = random.sample(xrange(num_features), 100) # save_list_names = [''] * 100 cnt = 0 # for i in save_list: # save_list_names[cnt] = util.sed_line(pp.CELEB_FACES_FC6_TEST, i).strip().split(',')[0] # cnt += 1 cnt = 0 for i in all_names: features = util.get_features_h5_in_batches([i], train=False) features = util.to_correct_input(features) labels = util.get_labels([i]) labels = np.asarray(labels, dtype=np.float32) target_tmp[cnt] = labels with chainer.using_config('train', False): f = np.expand_dims(features[0], 0) prediction = model(f) y_tmp[cnt] = prediction.data[0] if i in save_list_names: util.save_image(prediction, i, epoch=0) print("image '%s' saved" % i) cnt += 1 # calculate validation loss y_tmp.astype(np.float32) target_tmp.astype(np.float32) loss = chainer.functions.mean_absolute_error(y_tmp, target_tmp) print('model: ', model_name, ' loss model: ', loss)
def fprop(self,batch, volatile=False): pairs= util.get_tuples(batch,volatile) labels = util.get_labels(batch,volatile) # print("fprop ==> pairs: {}; labels {}".format(pairs,labels)) score = self.model.forward(pairs).squeeze() # print("fprop ==> score: {}".format(score)) # print("fprop ==> score: ",score) loss = self.criterion(score,labels) # print("fprop ==> loss: {}".format(loss)) # print("fprop ==> loss: ",loss) return loss
num_svms=6 width=0.5 svmList = [None]*num_svms trainfeatList = [None]*num_svms traindatList = [None]*num_svms trainlabList = [None]*num_svms trainlabsList = [None]*num_svms kernelList = [None]*num_svms for i in range(num_svms): pos=util.get_realdata(True) neg=util.get_realdata(False) traindatList[i] = concatenate((pos, neg), axis=1) trainfeatList[i] = util.get_realfeatures(pos, neg) trainlabsList[i] = util.get_labels(True) trainlabList[i] = util.get_labels() kernelList[i] = GaussianKernel(trainfeatList[i], trainfeatList[i], width) svmList[i] = LibSVM(10, kernelList[i], trainlabList[i]) for i in range(num_svms): print "Training svm nr. %d" % (i) currentSVM = svmList[i] currentSVM.train() print currentSVM.get_num_support_vectors() print "Done." x, y, z=util.compute_output_plot_isolines( currentSVM, kernelList[i], trainfeatList[i]) subplot(num_svms/2, 2, i+1) pcolor(x, y, z, shading='interp') contour(x, y, z, linewidths=1, colors='black', hold=True)
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument( "--data_dir", default="", type=str, required=True, help= "The input data dir. Should contain the training files for the regard classification task.", ) parser.add_argument( "--test_file", default=None, type=str, required=False, help="Test file, if None, defaults to `test.tsv` file in data_dir.") parser.add_argument( "--model_version", default=2, type=int, required=False, help="1 or 2.", ) parser.add_argument( "--model_type", default=None, type=str, required=True, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()), ) parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), ) parser.add_argument( "--output_dir", default=None, type=str, required=True, help= "The output directory where the model predictions and checkpoints will be written.", ) # Other parameters parser.add_argument( "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name") parser.add_argument( "--tokenizer_name", default="", type=str, help="Pretrained tokenizer name or path if not the same as model_name", ) parser.add_argument( "--cache_dir", default="", type=str, help= "Where do you want to store the pre-trained models downloaded from s3", ) parser.add_argument( "--max_seq_length", default=128, type=int, help= "The maximum total input sequence length after tokenization. Sequences longer " "than this will be truncated, sequences shorter will be padded.", ) parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the dev set.") parser.add_argument("--do_predict", action="store_true", help="Whether to run predictions on the test set.") parser.add_argument( "--evaluate_during_training", action="store_true", help="Whether to run evaluation during training at each logging step.", ) parser.add_argument( "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model.") parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument( "--gradient_accumulation_steps", type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass.", ) parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--max_steps", default=-1, type=int, help= "If > 0: set total number of training steps to perform. Override num_train_epochs.", ) parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.") parser.add_argument("--save_steps", type=int, default=50, help="Save checkpoint every X updates steps.") parser.add_argument( "--eval_all_checkpoints", action="store_true", help= "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number", ) parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available") parser.add_argument("--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory") parser.add_argument( "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets") parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") parser.add_argument( "--fp16", action="store_true", help= "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit", ) parser.add_argument( "--fp16_opt_level", type=str, default="O1", help= "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html", ) parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.") parser.add_argument("--server_port", type=str, default="", help="For distant debugging.") args = parser.parse_args() if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir): raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16, ) # Set seed set_seed(args) # Prepare regard classification task labels = get_labels(model_version=args.model_version) num_labels = len(labels) # Use cross entropy ignore index as padding label id so that only real label ids contribute to the loss later pad_token_label_id = CrossEntropyLoss().ignore_index # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, num_labels=num_labels, cache_dir=args.cache_dir if args.cache_dir else None, ) tokenizer = tokenizer_class.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, ) model = model_class.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, cache_dir=args.cache_dir if args.cache_dir else None, ) if args.local_rank == 0: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab model.to(args.device) logger.info("Training/evaluation parameters %s", args) # Training if args.do_train: train_dataset = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, data_file=TRAIN_FILE_PATTERN, is_test=False) global_step, tr_loss = train(args, train_dataset, model, tokenizer, labels, pad_token_label_id) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): # Create output directory if needed if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` model_to_save = (model.module if hasattr(model, "module") else model ) # Take care of distributed/parallel training model_to_save.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(args.output_dir, "training_args.bin")) # Evaluation results = {} if args.do_eval and args.local_rank in [-1, 0]: tokenizer = tokenizer_class.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))) logging.getLogger("pytorch_transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split( "-")[-1] if len(checkpoints) > 1 else "" model = model_class.from_pretrained(checkpoint) model.to(args.device) result, _ = evaluate(args, model, tokenizer, labels, pad_token_label_id, mode=DEV_FILE_PATTERN, prefix=global_step, is_test=False) if global_step: result = { "{}_{}".format(global_step, k): v for k, v in result.items() } results.update(result) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: for key in sorted(results.keys()): writer.write("{} = {}\n".format(key, str(results[key]))) if args.do_predict and args.local_rank in [-1, 0]: tokenizer = tokenizer_class.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) model = model_class.from_pretrained(args.model_name_or_path) model.to(args.device) if args.test_file: test_file = args.test_file elif os.path.exists(os.path.join(args.data_dir, TEST_FILE_PATTERN)): test_file = TEST_FILE_PATTERN else: raise NotImplementedError( "No test_file provided and %s DNE." % os.path.join(args.data_dir, TEST_FILE_PATTERN)) result, predictions = evaluate(args, model, tokenizer, labels, pad_token_label_id, mode=test_file, is_test=True) test_file_basename = os.path.basename(test_file).split('.')[0] # Save predictions output_test_predictions_file = os.path.join( args.output_dir, test_file_basename + "_predictions.txt") with open(output_test_predictions_file, "w") as writer: with open(os.path.join(args.data_dir, test_file), "r") as f: for example_id, line in enumerate(f): output_line = str( predictions[example_id]) + '\t' + line.split( '\t')[-1].strip() + "\n" writer.write(output_line) return results
import util util.set_title('KernelRidgeRegression') width = 20 # positive examples pos = util.get_realdata(True) plot(pos[0, :], pos[1, :], "r.") # negative examples neg = util.get_realdata(False) plot(neg[0, :], neg[1, :], "b.") # train krr labels = util.get_labels(type='regression') train = util.get_realfeatures(pos, neg) gk = GaussianKernel(train, train, width) krr = KernelRidgeRegression() krr.set_labels(labels) krr.set_kernel(gk) krr.set_tau(1e-3) krr.train() # compute output plot iso-lines x, y, z = util.compute_output_plot_isolines(krr, gk, train, regression=True) pcolor(x, y, z) contour(x, y, z, linewidths=1, colors='black', hold=True) connect('key_press_event', util.quit)
from util import get_labels import lightgbm as lgb from datetime import datetime import matplotlib.pylab as plt #matplotlib inline from matplotlib.pylab import rcParams rcParams['figure.figsize'] = 12, 4 import numpy as np import pandas as pd np.random.seed(seed=SEED) n_estimators = 100 labels = get_labels() for net in networks.keys(): print(f'Loading training data for {net}...') with open(f'bottleneck_features_avg/{net}_avg_features_train.npy', 'rb') as f: x_train = np.load(f) print(f'Features shape: {x_train.shape}') le = LabelEncoder() le.fit(labels['breed']) y_train = le.transform(labels['breed']) print('Creating train/val split...') x_train, x_val, y_train, y_val = train_test_split(x_train, y_train,
import util util.set_title('KernelRidgeRegression') width=20 # positive examples pos=util.get_realdata(True) plot(pos[0,:], pos[1,:], "r.") # negative examples neg=util.get_realdata(False) plot(neg[0,:], neg[1,:], "b.") # train svm labels = util.get_labels(type='regression') train = util.get_realfeatures(pos, neg) gk=GaussianKernel(train, train, width) krr = KernelRidgeRegression() krr.set_labels(labels) krr.set_kernel(gk) krr.set_tau(1e-3) krr.train() # compute output plot iso-lines x, y, z=util.compute_output_plot_isolines(krr, gk, train, regression=True) pcolor(x, y, z, shading='interp') contour(x, y, z, linewidths=1, colors='black', hold=True) connect('key_press_event', util.quit)
return G, node_sentences_pair if __name__ == '__main__': print('### Tokenizing and Preprocessing ###') G, node_sentences_pair = GenerateGraph( train_input_h='corpora/assin+msr/train/train-h.txt', train_input_t='corpora/assin+msr/train/train-t.txt', test_input_h='corpora/assin+msr/test/test-h.txt', test_input_t='corpora/assin+msr/test/test-t.txt', ).create_graph() print(len(node_sentences_pair)) train_labels = util.get_labels('corpora/assin+msr/labels-train.txt') test_labels = util.get_labels('corpora/assin+msr/labels-test.txt') reg = Regularization() reg.regulariza(G, node_sentences_pair, train_labels, '', total_pre_anotados=0.4, method='llgc') df_train = pd.read_csv("features_2800_pre_anotados_train.csv") df_test = pd.read_csv("features_2800_pre_anotados_test.csv") clf = MLPClassifier(solver='adam', hidden_layer_sizes=(20, 20), random_state=42,
""" train, valid, test = get_dis(data_dir, prefix, params.corpus) word_vec = build_vocab(train['s1'] + train['s2'] + valid['s1'] + valid['s2'] + test['s1'] + test['s2'], glove_path) # unknown words instead of map to <unk>, this directly takes them out for split in ['s1', 's2']: for data_type in ['train', 'valid', 'test']: eval(data_type)[split] = np.array([['<s>'] + [word for word in sent.split() if word in word_vec] + ['</s>'] for sent in eval(data_type)[split]]) params.word_emb_dim = 300 dis_labels = get_labels(params.corpus) label_size = len(dis_labels) """ MODEL """ # model config config_dis_model = { 'n_words': len(word_vec), 'word_emb_dim': params.word_emb_dim, 'enc_lstm_dim': params.enc_lstm_dim, 'n_enc_layers': params.n_enc_layers, 'dpout_emb': params.dpout_emb, 'dpout_model': params.dpout_model, 'dpout_fc': params.dpout_fc, 'fc_dim': params.fc_dim,
num_svms = 6 width = 0.5 svmList = [None] * num_svms trainfeatList = [None] * num_svms traindatList = [None] * num_svms trainlabList = [None] * num_svms trainlabsList = [None] * num_svms kernelList = [None] * num_svms for i in range(num_svms): pos = util.get_realdata(True) neg = util.get_realdata(False) traindatList[i] = concatenate((pos, neg), axis=1) trainfeatList[i] = util.get_realfeatures(pos, neg) trainlabsList[i] = util.get_labels(True) trainlabList[i] = util.get_labels() kernelList[i] = GaussianKernel(trainfeatList[i], trainfeatList[i], width) svmList[i] = LibSVM(10, kernelList[i], trainlabList[i]) for i in range(num_svms): print "Training svm nr. %d" % (i) currentSVM = svmList[i] currentSVM.train() print currentSVM.get_num_support_vectors() print "Done." x, y, z = util.compute_output_plot_isolines(currentSVM, kernelList[i], trainfeatList[i]) subplot(num_svms / 2, 2, i + 1) pcolor(x, y, z) contour(x, y, z, linewidths=1, colors='black', hold=True)
from keras.applications import inception_v3 from keras.models import load_model from tqdm import tqdm from util import get_labels, get_images import csv import numpy as np # Define constants INPUT_SIZE = 299 fname = 'model1_finetune.h5' nr_predictions = 10357 # Get ids, label names and images print('Load data...') labels = get_labels().sort_values(by=['breed']).breed.unique() ids = [] images = np.zeros((nr_predictions, INPUT_SIZE, INPUT_SIZE, 3), dtype='float16') for i, (img, img_id) in tqdm(enumerate(get_images('test', INPUT_SIZE))): x = inception_v3.preprocess_input(np.expand_dims(img, axis=0)) images[i] = x ids.append(img_id) # Load model weights print(f'Load model from {fname}') model = load_model(fname) # Make predictions on input images print('Predict...') predictions = model.predict(images, verbose=1)
def training(): print('setting up...') if pc.TRAIN: num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TRAIN) # num_features = util.get_number_of_features_from_train(pp.CELEB_FACES_FC6_TRAIN) # for server all_names = np.array(util.get_names_h5_file(pp.FC6_TRAIN_H5)) path_images = pp.CELEB_FACES_FC6_TRAIN else: num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TEST) all_names = np.array(util.get_names_h5_file(pp.FC6_TEST_H5)) path_images = pp.CELEB_FACES_FC6_TEST total_steps = num_features / pc.BATCH_SIZE mask_L_sti = util.get_L_sti_mask() # ---------------------------------------------------------------- # GENERATOR generator = Generator() # generator = GeneratorPaper() generator_train_loss = np.zeros(pc.EPOCHS) generator_optimizer = chainer.optimizers.Adam(alpha=0.0002, beta1=0.9, beta2=0.999, eps=10**-8) generator_optimizer.setup(generator) # ---------------------------------------------------------------- # DISCRIMINATOR discriminator = Discriminator() # discriminator = DiscriminatorPaper() discriminator_train_loss = np.zeros(pc.EPOCHS) discriminator_optimizer = chainer.optimizers.Adam(alpha=0.0002, beta1=0.9, beta2=0.999, eps=10**-8) discriminator_optimizer.setup(discriminator) # ---------------------------------------------------------------- # VGG16 FOR FEATURE LOSS vgg16 = VGG16Layers() # ---------------------------------------------------------------- save_list = random.sample(xrange(num_features), 20) save_list_names = [''] * 20 cnt = 0 for i in save_list: save_list_names[cnt] = util.sed_line(path_images, i).strip().split(',')[0] cnt += 1 ones1 = util.make_ones(generator) zeros = util.make_zeros(generator) print('training...') for epoch in range(pc.EPOCHS): # shuffle training instances order = range(num_features) random.shuffle(order) names_order = all_names[order] train_gen = True train_dis = True print('epoch %d' % epoch) for step in range(total_steps): names = names_order[step * pc.BATCH_SIZE:(step + 1) * pc.BATCH_SIZE] features = util.get_features_h5_in_batches(names, train=pc.TRAIN) features = util.to_correct_input(features) labels_32, labels_224 = util.get_labels(names) # labels_32 = util.get_labels(names) # vgg16_features = util.get_features_h5_in_batches(names, train=pc.TRAIN, which_features='vgg16') # vgg16_features = util.to_correct_input(vgg16_features) # labels_32 = np.asarray(labels_32, dtype=np.float32) with chainer.using_config('train', train_gen): generator.cleargrads() prediction = generator(chainer.Variable(features)) with chainer.using_config('train', train_dis): discriminator.cleargrads() print('prediction shape', np.shape(prediction.data)) data = np.reshape( generator(chainer.Variable(features)).data, (pc.BATCH_SIZE, 32, 32, 3)) data = np.transpose(data, (0, 3, 1, 2)) fake_prob = discriminator(chainer.Variable(data)) other_data = np.reshape(labels_32, (pc.BATCH_SIZE, 32, 32, 3)) other_data = np.transpose(other_data, (0, 3, 1, 2)) real_prob = discriminator(chainer.Variable(other_data)) feature_truth = vgg16(labels_224, layers=['conv3_3'])['conv3_3'] feature_reconstruction = vgg16(util.fix_prediction_for_vgg16( prediction, vgg16), layers=['conv3_3'])['conv3_3'] # feature_reconstruction = None # ---------------------------------------------------------------- # CALCULATE LOSS lambda_adv = 10**2 lambda_sti = 2 * (10**-6) lambda_fea = 10**-2 l_adv = lambda_adv * F.sigmoid_cross_entropy( fake_prob, ones1.data) # TODO: mask is probably breaking the graph, fix this thing_1 = util.apply_mask(labels_32, mask_L_sti) thing_2 = util.apply_mask(prediction.data, mask_L_sti) l_sti = lambda_sti * F.mean_squared_error(thing_1, thing_2) l_fea = lambda_fea * F.mean_squared_error( feature_truth, feature_reconstruction) generator_loss = l_adv + l_sti + l_fea generator_loss.backward() generator_optimizer.update() generator_train_loss[epoch] += generator_loss.data lambda_dis = 10**2 discriminator_loss = lambda_dis * ( F.sigmoid_cross_entropy(real_prob, ones1.data) + F.sigmoid_cross_entropy(fake_prob, zeros.data)) discriminator_loss.backward() discriminator_optimizer.update() discriminator_train_loss[epoch] += discriminator_loss.data # ---------------------------------------------------------------- # when to suspend / resume training dis_adv_ratio = discriminator_loss.data / l_adv.data if dis_adv_ratio < 0.1: train_dis = False if dis_adv_ratio > 0.5: train_dis = True if dis_adv_ratio > 10: train_gen = False if dis_adv_ratio < 2: train_gen = True # print('%d/%d %d/%d generator: %f l_adv: %f l_sti: %f discriminator: %f l3: %f l4: %f' % ( # epoch, pc.EPOCHS, step, total_steps, generator_loss.data, l_adv.data, l_sti.data, discriminator_loss.data, # l3.data, l4.data)) print( '%d/%d %d/%d generator: %f l_adv: %f l_sti: %f l_fea: %f discriminator: %f dis/adv: %f' % (epoch, pc.EPOCHS, step, total_steps, generator_loss.data, l_adv.data, l_sti.data, l_fea.data, discriminator_loss.data, dis_adv_ratio)) # information = util.update_information(information1, step, generator_loss.data, l_adv.data, l_sti.data) # information = util.update_information(information2, step, discriminator_loss.data, l3.data, l4.data) # visualizing loss # prev_max_ax1 = util.plot_everything(information1, fig1, lines1, ax1, prev_max_ax1, step) # prev_max_ax2 = util.plot_everything(information2, fig2, lines2, ax2, prev_max_ax2, step) with chainer.using_config('train', False): for i in range(len(names)): if names[i] in save_list_names: f = np.expand_dims(features[i], 0) prediction = generator(f) util.save_image(prediction, names[i], epoch, pp.RECONSTRUCTION_FOLDER) print("image '%s' saved" % names[i]) # if (epoch+1) % pc.SAVE_EVERY_N_STEPS == 0: # util.save_model(generator, epoch) generator_train_loss[epoch] /= total_steps print(generator_train_loss[epoch]) discriminator_train_loss[epoch] /= total_steps print(discriminator_train_loss[epoch])
def create_db(self, rpt_file, db_file): import sqlite3 con = sqlite3.connect(db_file) # con.execute("PRAGMA foreign_keys = ON") # Create table con.execute(""" CREATE TABLE IF NOT EXISTS nodes (id INTEGER PRIMARY KEY, parent INTEGER, last INTEGER, name TEXT, cell TEXT, internal REAL, switching REAL, leakage REAL, total REAL, FOREIGN KEY(parent) REFERENCES nodes(id), FOREIGN KEY(last) REFERENCES nodes(id)) """) header = [] with open(rpt_file) as f: hier = [] rows = [] skip_header = True for k, line in enumerate(f): line_num = k + 1 if skip_header: if "----" in line: skip_header = False temp = [] for header_line in reversed(header): if header_line.strip() == '': break temp.append(header_line) labels = get_labels("\n".join(reversed(temp))) print(labels) else: header.append(line) continue if line == "1\n": print(f"Done on line {line_num}") break info = line.split() if len(info) == 6: cell = None name, internal, switching, leakage, total, percent = info elif len(info) == 7: name, cell, internal, switching, leakage, total, percent = info elif len(info) == 10: cell = None name, internal, switching, leakage, peak_power, peak_time, glitch_power, x_tran_power, total, percent = info elif len(info) == 11: name, cell, internal, switching, leakage, peak_power, peak_time, glitch_power, x_tran_power, total, percent = info else: raise NotImplementedError(line) if cell is not None: cell = cell.lstrip("(").rstrip(")") if total == "N/A": total = None info = { "indent": len(line) - len(line.lstrip(' ')), "id": line_num, "name": name, "cell": cell, "internal": internal, "switching": switching, "leakage": leakage, "total": total, } while len(hier) > 0 and info["indent"] <= hier[-1]["indent"]: node = hier.pop() node["last"] = line_num - 1 node["parent"] = hier[-1]["id"] if len(hier) > 0 else None rows.append(( node["id"], node["parent"], node["last"], node["name"], node["cell"], node["internal"], node["switching"], node["leakage"], node["total"], )) if len(rows) > self.batch_size: con.executemany( "INSERT INTO nodes VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", rows) rows.clear() hier.append(info) while len(hier) > 0: node = hier.pop() node["last"] = line_num - 1 node["parent"] = hier[-1]["id"] if len(hier) > 0 else None rows.append(( node["id"], node["parent"], node["last"], node["name"], node["cell"], node["internal"], node["switching"], node["leakage"], node["total"], )) con.executemany( "INSERT INTO nodes VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", rows) con.commit() foreign_key_failures = con.execute( "PRAGMA foreign_key_check").fetchall() if len(foreign_key_failures) > 0: raise sqlite3.IntegrityError( f"Failed foreign key checks: {foreign_key_failures}") # print(f"split: {time_split}") # print(f"update: {time_update}") return con
def create_df(self, rpt_file, db_file): header = [] with open(rpt_file) as f: hier = [] rows = [] skip_header = True for k, line in enumerate(f): line_num = k + 1 if skip_header: if "----" in line: skip_header = False temp = [] for header_line in reversed(header): if header_line.strip() == '': break temp.append(header_line) labels = get_labels("\n".join(reversed(temp))) print(labels) else: header.append(line) continue if line == "1\n": print(f"Done on line {line_num}") break info = line.split() if len(info) == 6: cell = None name, internal, switching, leakage, total, percent = info elif len(info) == 7: name, cell, internal, switching, leakage, total, percent = info elif len(info) == 10: cell = None name, internal, switching, leakage, peak_power, peak_time, glitch_power, x_tran_power, total, percent = info elif len(info) == 11: name, cell, internal, switching, leakage, peak_power, peak_time, glitch_power, x_tran_power, total, percent = info else: raise NotImplementedError(line) if cell is not None: cell = cell.lstrip("(").rstrip(")") info = { "indent": len(line) - len(line.lstrip(' ')), "id": line_num, "name": name, "cell": cell, "internal": None if internal == "N/A" else float(internal), "switching": None if switching == "N/A" else float(switching), "leakage": None if leakage == "N/A" else float(leakage), "total": None if total == "N/A" else float(total), } while len(hier) > 0 and info["indent"] <= hier[-1]["indent"]: node = hier.pop() node["last"] = line_num - 1 node["parent"] = hier[-1]["id"] if len(hier) > 0 else None rows.append(node) hier.append(info) while len(hier) > 0: node = hier.pop() node["last"] = line_num - 1 node["parent"] = hier[-1]["id"] if len(hier) > 0 else None rows.append(node) return pd.DataFrame(rows)
n_pre_epochs = 10 n_epochs = 100 batch_size = 32 n_images = 300 USE_PCA = True USE_GENSEL = False USE_AUTOENC = False #TODO USE_ICA = True USE_CANNY = True USE_CORNERHARRIS = True TRAIN = False CHANNELS = 3 # Load labels print('Load labels...') labels = get_labels()[:n_images] # Load training data print('Load training data...') x_train = np.zeros((n_images, INPUT_SIZE, INPUT_SIZE, 3), dtype=K.floatx()) for i, (img, img_id) in tqdm(enumerate(get_images('train', INPUT_SIZE, amount=n_images))): x = inception_v3.preprocess_input(np.expand_dims(img, axis=0)) x_train[i] = x y_train = one_hot(labels['breed'].values, num_classes=NUM_CLASSES) # Arguments of ImageDataGenerator define types of augmentation to be performed # E.g: Horizontal flip, rotation, etc... # no fitting required since we don't use centering/normalization/whitening datagen = ImageDataGenerator( rotation_range=20, width_shift_range=.2,
def preprocess(input, segmentation, extraction, use_letters, dim): segmented = segmentation.segment(input) feature_vector = extraction.extract(segmented) labels = get_labels(input.shape[1], input.shape[0], dim, dim, use_letters) return [feature_vector, labels]
class Classification: def __init__(self, model, features, labels): self.model = joblib.load(model) self.x = features self.y = labels def classifier(self): y_pred = self.model.predict(self.x) return classification_report(self.y, y_pred) if __name__ == '__main__': # algorithms = ['bayes.pkl', 'lre.pkl', 'nn.pkl', 'svm.pkl', 'tree.pkl'] algorithms = ['svm.pkl'] # model = 'trained-model/assin_res_mod_skip300' model = 'trained-model/assin+msr_mod_glove50' # model = 'trained-model/mod_' # test = 'features/test/features-test-all1.txt' print('### Extracting features ###') features, _ = ExtractFeatures( model='model/glove50.txt', input_h='assin+msr/test/test-h.txt', input_t='assin+msr/test/test-t.txt').extract_features() # test = 'baseline/test/features-test.txt' labels = util.get_labels('assin+msr/labels-test.txt') print('### Classifying ###') for a in algorithms: print(Classification(model + a, features, labels).classifier())
def training(): print('setting up...') # num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TRAIN) num_features = util.get_number_of_features(pp.CELEB_FACES_FC6_TEST) total_steps = num_features / pc.BATCH_SIZE # all_names = np.array(util.get_names_h5_file(pp.FC6_TRAIN_H5)) all_names = np.array(util.get_names_h5_file(pp.FC6_TEST_H5)) generator = Generator() train_loss = np.zeros(pc.EPOCHS) # generator = GeneratorPaper() optimizer = chainer.optimizers.Adam(alpha=0.0002, beta1=0.5, beta2=0.999, eps=10e-8) optimizer.setup(generator) save_list = random.sample(xrange(num_features), 20) save_list_names = [''] * 20 cnt = 0 for i in save_list: # save_list_names[cnt] = util.sed_line(pp.CELEB_FACES_FC6_TRAIN, i).strip().split(',')[0] save_list_names[cnt] = util.sed_line(pp.CELEB_FACES_FC6_TEST, i).strip().split(',')[0] cnt += 1 print('training...') for epoch in range(pc.EPOCHS): # shuffle training instances order = range(num_features) random.shuffle(order) names_order = all_names[order] print('epoch %d' % epoch) for step in range(total_steps): # names, features = util.get_features_in_batches(step, train=True) names = names_order[step * pc.BATCH_SIZE:(step + 1) * pc.BATCH_SIZE] # features = util.get_features_h5_in_batches(names, train=True) features = util.get_features_h5_in_batches(names, train=False) features = util.to_correct_input(features) labels = util.get_labels(names) labels = np.asarray(labels, dtype=np.float32) with chainer.using_config('train', True): generator.cleargrads() prediction = generator(features) loss = chainer.functions.mean_absolute_error( prediction, labels) # print('loss', loss.data) print('%d/%d %d/%d loss: %f' % (epoch, pc.EPOCHS, step, total_steps, float(loss.data))) loss.backward() optimizer.update() train_loss[epoch] += loss.data # with chainer.using_config('train', False): # for i in range(len(names)): # if names[i] in save_list_names: # f = np.expand_dims(features[i], 0) # prediction = generator(f) # util.save_image(prediction, names[i], epoch) # print("image '%s' saved" % names[i]) if (epoch + 1) % pc.SAVE_EVERY_N_STEPS == 0: util.save_model(generator, epoch) train_loss[epoch] /= total_steps print(train_loss[epoch])
from pylab import plot,grid,title,subplot,xlabel,ylabel,text,subplots_adjust,fill_between,mean,connect,show from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVM, LDA from shogun.Evaluation import PRCEvaluation import util util.set_title('PRC example') util.DISTANCE=0.5 subplots_adjust(hspace=0.3) pos=util.get_realdata(True) neg=util.get_realdata(False) features=util.get_realfeatures(pos, neg) labels=util.get_labels() # classifiers gk=GaussianKernel(features, features, 1.0) svm = LibSVM(1000.0, gk, labels) svm.train() lda=LDA(1,features,labels) lda.train() ## plot points subplot(211) plot(pos[0,:], pos[1,:], "r.") plot(neg[0,:], neg[1,:], "b.") grid(True) title('Data',size=10) # plot PRC for SVM subplot(223)
def detect_object_in( model_path , label_map_path , video_src): PATH_TO_FROZEN_GRAPH = model_path + '/frozen_inference_graph.pb' PATH_TO_LABELS = label_map_path detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True) with detection_graph.as_default(): with create_session() as sess: # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name) cap = cv2.VideoCapture(video_src) video_running = True while True: ret , image_np = cap.read() output_dict = run_inference_for_single_image(sess,image_np, detection_graph, tensor_dict) detected_labels = get_labels( output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index ) print(detected_labels) print("=================") vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) cv_image = cv2.resize(image_np, (800, 600)) cv2.imshow("Press q to quit" , cv_image) if cv2.waitKey(25) & 0xFF == ord("q"): cap.release() cv2.destroyAllWindows() break