def load(videos_fp, labels_fp, limit, val_limit): videos = np.load(videos_fp) labels = np.load(labels_fp) videos = videos / 255. videos = normalize(videos) videos = select_data(videos, VIDEOS_MAX_BATCH) if limit: videos = videos[:limit] labels = labels[:limit] if val_limit: videos_eval = videos[:val_limit][:] labels_eval = labels[:val_limit][:] else: videos_eval = videos[:EVAL_TRAIN_SIZE][:] labels_eval = labels[:EVAL_TRAIN_SIZE][:] videos_test = normalize(np.load(TRIMMED_VIDEO_VALID_FP) / 255.) videos_test = select_data(videos_test, VIDEOS_MAX_BATCH) labels_test = np.load(TRIMMED_LABEL_VALID_FP) global AVAILABLE_SIZE AVAILABLE_SIZE = videos.shape[0] batch_gen = Batch_generator( x=videos, y=labels, batch=BATCHSIZE, drop_last=True, ) return batch_gen, videos_eval, labels_eval, videos_test, labels_test
def evaluation(mode, model_fp, limit): if mode == "test": videos = np.load(VIDEOS_TEST_FP) labels = np.load(LABELS_TEST_FP) if limit: videos = videos[:limit] labels = labels[:limit] videos = normalize(videos / 255.) videos = select_data(videos, VIDEOS_MAX_BATCH) model = tor.load(model_fp).cuda() correct, total = 0, len(labels) for i, (x, label) in enumerate(zip(videos, labels), 1): print("Process: {}/{}".format(i, total)) x = Variable(tor.FloatTensor(x)).permute(0, 3, 1, 2).cuda() out = model(x) out = out.mean(dim=0).unsqueeze(0) pred = model.pred(out) y = tor.max(pred, 1)[1] if int(y[0].data) == label: correct += 1 acc = correct / total print("|Acc on {}: {}".format(mode, round(acc, 6))) return acc
def main(): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('--blc', nargs=2, type=int, help='Bottom left corner of the data box') parser.add_argument('--trc', nargs=2, type=int, help='Top right corner of the data box') parser.add_argument('image', type=str, help='FITS file name') parser.add_argument('out', type=str, help='Output plot file name') args = parser.parse_args() # Open data img = fits.open(os.path.expanduser(args.image))[0] data = np.squeeze(img.data) # Select data npix, x0, x1, y0, y1 = select_data(data.shape, blc=args.blc, trc=args.trc) # Create figure fig, ax = get_figure(npix) # Plot if args.trc and args.blc: ax.imshow(data[y0:y1, x0:x1]) else: ax.imshow(data) # Save fig.savefig(args.out, dpi=600)
def prediction(model_fp, data_fp, label_fp, output_fp, limit): model = tor.load(model_fp) model.cuda() ### Load data l = getVideoList(label_fp) videos_output, labels_output = [], [] total = len(l["Video_category"]) if not limit else limit for i in range(total): print("Convert videos into numpy: {}/{} \r".format( i + 1, len(l["Video_category"])), end="") cat = l["Video_category"][i] name = l["Video_name"][i] label = l["Action_labels"][i] data = readShortVideo(data_fp, cat, name, downsample_factor=12).astype(np.int8) videos_output.append(data.astype(np.int16)) labels_output.append(int(label)) videos, labels = np.array(videos_output), np.array(labels_output).astype( np.uint8) ### Prediction correct, total = 0, len(labels) preds = [] videos = normalize(videos / 255.) videos = select_data(videos, VIDEOS_MAX_BATCH) for i, (x, label) in enumerate(zip(videos, labels), 1): print("Process: {}/{}".format(i, len(videos))) x = tor.Tensor(x).permute(0, 3, 1, 2).cuda() out = model(x) out = out.mean(dim=0).unsqueeze(0) pred = model.pred(out) y = tor.max(pred, 1)[1] pred = int(y[0].data) if pred == label: correct += 1 preds.append(pred) acc = correct / total print(acc) with open(os.path.join(output_fp, "p1_valid.txt"), "w") as f: for i, item in enumerate(preds): if i != len(preds) - 1: f.write(str(item) + "\n") else: f.write(str(item))
def plot_tsne(model_fp, output_fp, limit, mode): videos = np.load(VIDEOS_TEST_FP) labels = np.load(LABELS_TEST_FP) model = tor.load(model_fp) model.cuda() videos = normalize(videos / 255.) videos = select_data(videos, VIDEOS_MAX_BATCH) correct, total = 0, len(labels) features_rnn = [] for i, (x, label) in enumerate(zip(videos, labels), 1): print("Process: {}/{}".format(i, total)) x = tor.Tensor(x).permute(0, 3, 1, 2).cuda() if mode == "rnn": f = model.get_feature(x).cpu().data.numpy() else: f = model(x).cpu().data.numpy() features_rnn.append(f[0]) features_rnn = np.array(features_rnn) ### tSNE tsne = TSNE( n_components=2, random_state=0, ) f_tsne = tsne.fit_transform(features_rnn) for i in range(11): plt.scatter(f_tsne[labels == i, 0], f_tsne[labels == i, 1]) plt.legend(["Label {}".format(i) for i in range(11)]) fn = "tSNE_RNN.jpg" if mode == "rnn" else "tSNE_CNN.jpg" plt.savefig(os.path.join(output_fp, fn))
-inputs: target: the actual class labels predicted: prediction from the classifier given the attributes -output: mse: the mean square error """ return mean_squared_error(target, predicted) # url_train, url_test = ut.get_urls("features-1") # url_train, url_test = ut.get_urls("features-2") # url_train, url_test = ut.get_urls("features-3") data_set_train, data_sets_test = ut.get_data() columns = [30, 53] data_set_train_selected, data_sets_test_selected = ut.select_data( data_set_train, data_sets_test, columns) data_set_train_selected[:, 0] = np.square(data_set_train_selected[:, 0]) for i in range(len(data_sets_test_selected)): data_sets_test_selected[i][:, 0] = np.square(data_sets_test_selected[i][:, 0]) # download training data file # raw_data = urllib.urlopen(url_train) # load the CSV file as a numpy matrix # data_set_train = np.loadtxt(raw_data, delimiter=",", skiprows=1) # separate the data from the target attributes num_attributes = len(data_set_train_selected[0]) - 1 X = data_set_train_selected[:, 0:num_attributes] y = data_set_train_selected[:, num_attributes] clf = GaussianNB()
def main(): parser = argparse.ArgumentParser( description= 'A script for training model(s). It can select specific part fo the training dta and feed it for training to some model. At the end it evaluated the trained models with the proper scoring rules, generates rank historgams, calculated feature importance data as well as plots over the entire dataset in order to visually show the performance of the trained model(s)' ) parser.add_argument( '--model', dest='model', action='store', help= 'The model to be trained. This can be \'bnn\', \'mdn\' or \'bothn\'.') parser.add_argument( '--config', dest='config', action='store', required=True, help= 'The configuration file with setting for the architecture of the models' ) parser.add_argument( '--station', dest='station', action='store', default="SBC", help='The target LUBW station the trained model aims to predict') parser.add_argument( '--predictor', dest='pred_value', action='store', default="P1", help='The value(s) that should be used as features (P1, P2 of P1P2)') parser.add_argument('--period', dest='period', action='store', default="1D", help='intEgration period for the data (1D, 1H, 12H)') parser.add_argument( '--outvalue', dest='out_value', action='store', default="P1", help='Output air pollution value of the model(P1 or P2)') parser.add_argument('--take_lubw', dest='take_lu_bw', action='store_true', default=False, help='Should the LU BW station be taken as feature') parser.add_argument( '--random_split', dest='random_split', action='store_true', default=False, help='Should the data be split randomly for the test train split') parser.add_argument( '--dest', dest='dest', action='store', required=False, default="/home/arnaud/code/pollution/test_eval", help='destination for the evaluation and for the build models') parser.add_argument('--base-dir', dest='base_dir', action='store', required=False, default="/home/arnaud/code/pollution/env/data_frames", help='The directory where the data frames reside') parser.add_argument( '--load-mdn', dest='load_mdn', action='store', required=False, default=None, help= 'Load the MDB model from specific folder and do not train a new one') parser.add_argument( '--load-bnn', dest='load_bnn', action='store', required=False, default=None, help= 'Load the BNN model from specific folder and do not train a new one') parser.add_argument('--emp', dest='emp', action='store_true', default=False, help='emp') args = parser.parse_args() station = args.station in_value = args.pred_value period = args.period out_value = args.out_value train_per = 0.75 take_lu_bw = args.take_lu_bw random_split = args.random_split base_dir = args.base_dir dest = args.dest X, y, col_names, out_name = select_data(station, in_value, period, include_lu_bw=take_lu_bw, output_value=out_value, base_dir=base_dir) X_train, X_test, y_train, y_test = test_train_split(X, y, train_size=train_per, random=random_split) y_train = y_train.reshape(y_train.shape[0], 1) y_test = y_test.reshape(y_test.shape[0], 1) config_file = open(args.config, "r") config = json.load(config_file) config_file.close() print("Period: " + period) print("Training samples: ", X_train.shape[0]) print("Test samples: ", X_test.shape[0]) print("Number of features: ", X_train.shape[1]) print("Input features: " + in_value) print("Target station", station) print("Input features:", col_names) print("Outpute value", out_name) print("-------------") ev_samples_cnt = config["ev_samples_cnt"] mdn_iter = config["mdn"]["mdn_iter"] mdn_layers = config["mdn"]["mdn_layers"] mdn_mixture_cnt = config["mdn"]["mdn_mixture_cnt"] mdn_id = "mdn_l" + str(mdn_layers) + "_i" + str(mdn_iter) + "_mc" + str( mdn_mixture_cnt) bnn_samples = config["bnn"]["bnn_samples"] bnn_iter = config["bnn"]["bnn_iter"] bnn_layers = config["bnn"]["bnn_layers"] bnn_id = "bnn_l" + str(bnn_layers) + "_i" + str(bnn_iter) + "_s" + str( bnn_samples) desc = "" desc += "\nPeriod: " + str(period) desc += "\nTraining samples: " + str(X_train.shape[0]) desc += "\nTest samples: " + str(X_test.shape[0]) desc += "\nNumber of features: " + str(X_train.shape[1]) desc += "\nTaking LU BW as feature: " + str(take_lu_bw) desc += "\nInput value: " + str(in_value) desc += "\nTarget station: " + str(station) desc += "\nInput features: " + str(col_names) desc += "\nOutpute value: " + str(out_name) desc += "\nTest-train split ratio: " + str(train_per) desc += "\n-------------\n" desc += "\nMDN Configuration: " desc += "\nIterations: " + str(mdn_iter) desc += "\nLayers: " + str(mdn_layers) desc += "\nMixtures Count: " + str(mdn_mixture_cnt) desc += "\n-------------\n" desc += "\nBNN Configuration: " desc += "\nIterations: " + str(bnn_iter) desc += "\nLayers: " + str(bnn_layers) desc += "\nSamples for vatiational inference: " + str(bnn_samples) desc += "\n-------------\n" desc += "\nEvaluation Configuration" desc += "\nSamples drawn from models for each observation: " + str( ev_samples_cnt) ev = Evaluator(dest, desc, out_value) ev.set_test_train_split(X_train, X_test, y_train, y_test) ev.set_names(col_names, out_name) os.makedirs(dest + "/bnn_train_plots") os.makedirs(dest + "/mdn_train_plots") def get_mdn(): if args.load_mdn is None: mdn_model = Mdn("MDN Model", X_train, y_train, inner_dims=mdn_layers, num_mixtures=mdn_mixture_cnt) mdn_model.fit(num_iter=mdn_iter, callback=lambda mod, j: plot_mdn( X_train, X_test, y_train, y_test, dest + "/mdn_train_plots", mod, j)) mdn_model.save(dest + "/mdn_model") return mdn_model else: print("Loading MDN from file") mdn_model = Mdn("MDN Model", X_train, y_train, inner_dims=mdn_layers, num_mixtures=mdn_mixture_cnt, model_file=args.load_mdn) mdn_model.save(dest + "/mdn_model") return mdn_model def get_bnn(): if args.load_bnn is None: bnn_model = Bnn("BNN Model") bnn_model.build(X_train.shape[1], 1, layers_defs=bnn_layers, examples=X_train.shape[0]) bnn_model.fit(X_train, np.squeeze(y_train), epochs=bnn_iter, samples=bnn_samples, callback=lambda mod, j: plot_bnn( X_train, X_test, y_train, y_test, dest + "/bnn_train_plots", mod, j)) bnn_model.save(dest + "/bnn_model", "bnn_model") return bnn_model else: print("Loading BNN from file") bnn_model = Bnn("BNN Model") bnn_model.load(args.load_bnn, name="bnn_model") bnn_model.save(dest + "/bnn_model", "bnn_model") return bnn_model if args.model == "bnn": print("Fitting the BNN") bnn_model = get_bnn() ev.evaluate_bnn(bnn_model, bnn_id, samples=ev_samples_cnt) elif args.model == "mdn": print("Fitting the MDN") mdn_model = get_mdn() ev.evaluate_mdn(mdn_model, mdn_id, samples=ev_samples_cnt) else: print("Fitting the MDN") mdn_model = get_mdn() ev.evaluate_mdn(mdn_model, mdn_id, samples=ev_samples_cnt) tf.reset_default_graph() print("Fitting the BNN") bnn_model = get_bnn() ev.evaluate_bnn(bnn_model, bnn_id, samples=ev_samples_cnt) if arg.emp: ev.evaluate_empirical(samples=ev_samples_cnt)
def calculate(start, end, name): sentences, vnps, adjectives, items = select_data(start, end) # individual freq filename = 'items_%d_%s.csv' % (start.year, name) print filename items_freq = calculate_frequency(sentences, items, 'referred_items') save_table(items_freq, open(filename, 'w')) filename = 'vnps_%d_%s.csv' % (start.year, name) print filename vnps_freq = calculate_frequency(sentences, vnps, 'verb_noun_pair') save_table(vnps_freq, open(filename, 'w')) filename = 'adjectives_%d_%s.csv' % (start.year, name) print filename adjs_freq = calculate_frequency(sentences, adjectives, 'adjectives') save_table(adjs_freq, open(filename, 'w')) # user networks filename = 'vnp_user_net_%d_%s.csv' % (start.year, name) print filename vnp_links = count_user_links(sentences, vnps, 'verb_noun_pair') save_table(vnp_links, open(filename, 'w')) filename = 'adj_user_net_%d_%s.csv' % (start.year, name) print filename adj_links = count_user_links(sentences, adjectives, 'adjectives') save_table(adj_links, open(filename, 'w')) filename = 'itm_user_net_%d_%s.csv' % (start.year, name) print filename itm_links = count_user_links(sentences, items, 'referred_items') save_table(itm_links, open(filename, 'w')) # multi-frequencies filename = 'item_vs_vnp_%d_%s.csv' % (start.year, name) print filename item_vs_vnp = calculate_dual_frequency(sentences, items, 'referred_items', vnps, 'verb_noun_pair', diary=True) save_table(item_vs_vnp, open(filename, 'w')) filename = 'item_vs_adj_%d_%s.csv' % (start.year, name) print filename item_vs_adj = calculate_dual_frequency(sentences, items, 'referred_items', adjectives, 'adjectives', diary=True) save_table(item_vs_adj, open(filename, 'w')) filename = 'vnp_vs_vnp_%d_%s.csv' % (start.year, name) print filename vnp_vs_vnp = calculate_dual_frequency(sentences, vnps, 'verb_noun_pair', vnps, 'verb_noun_pair', diary=True) save_table(vnp_vs_vnp, open(filename, 'w')) filename = 'adj_vs_adj_%d_%s.csv' % (start.year, name) print filename adj_vs_adj = calculate_dual_frequency(sentences, adjectives, 'adjectives', adjectives, 'adjectives', diary=True) save_table(adj_vs_adj, open(filename, 'w')) filename = 'user_vs_item_%d_%s.csv' % (start.year, name) print filename user_vs_item = calculate_frequency_user(sentences, items, 'referred_items') save_table(user_vs_item, open(filename, 'w')) filename = 'user_vs_vnp_%d_%s.csv' % (start.year, name) print filename user_vs_vnp = calculate_frequency_user(sentences, vnps, 'verb_noun_pair') save_table(user_vs_vnp, open(filename, 'w')) return
def main(): # Parse arguments parser = argparse.ArgumentParser() parser.add_argument('-c', '--chanrange', nargs=2, type=int, help='Channel range') parser.add_argument('--blc', nargs=2, type=int, help='Bottom left corner of the data box') parser.add_argument('--trc', nargs=2, type=int, help='Top right corner of the data box') parser.add_argument('--xcoverage', default=0.8, type=float, help='Coverage of the spectrum range over each pixel') parser.add_argument('--level', default=None, type=float, help='Ignore spectra below level') parser.add_argument('--mask', default=None, type=float, help='Read a mask from FITS file') parser.add_argument('--every', type=int, default=None, help='Select one pixel every n pixels from peak') parser.add_argument('--autolimit', action='store_true', help='Use std and mean to determine if spectra will be plot') parser.add_argument('--nsigma', type=int, default=3, help='Use std and mean to determine if spectra will be plot') parser.add_argument('--color', default='k', help='Line color') parser.add_argument('cube', type=str, help='FITS cube file name') parser.add_argument('out', type=str, help='Output plot file name') args = parser.parse_args() # Open cube cube = fits.open(os.path.expanduser(args.cube))[0] # Select data npix, x0, x1, y0, y1 = select_data(cube.shape, blc=args.blc, trc=args.trc) if args.chanrange: lenspec = abs(args.chanrange[1]-args.chanrange[0]) + 1 s0, s1 = args.chanrange[0], args.chanrange[1]+1 else: lenspec = cube.shape[-3] s0, s1 = 0, lenspec subcube = cube.data[0, s0:s1, y0:y1, x0:x1] # Create mask if args.mask: mask = fits.open(args.mask)[0] mask = np.squeeze(mask.data).astype(bool) elif args.level: mask = np.any(subcube > args.level, axis=0) elif args.autolimit: mean = np.mean(subcube) std = np.std(subcube) mask = np.any(subcube>mean+args.nsigma*std, axis=0) | \ np.any(subcube<mean-args.nsigma*std, axis=0) else: mask = np.ones(subcube.shape[1:], dtype=bool) if args.every: maxmap = np.nanmax(subcube, axis=0) ymax, xmax = np.unravel_index(np.nanargmax(maxmap), maxmap.shape) mask = mask & mask_every(subcube.shape[1:], args.every, row=ymax, col=xmax) # Data scaling scaling = 1.01*np.nanmax(subcube) xempty = (1. - args.xcoverage)*0.5 xaxis = np.linspace(xempty,1.-xempty, lenspec) # Create figure fig, ax = get_figure(npix, alpha=True) # Limits ax.set_xlim(0, npix) ax.set_ylim(-0.5, npix-0.5) # Plot for y, x in np.transpose(np.nonzero(mask)): # Spectrum spec = subcube[:, y, x] if np.any(np.isnan(spec)): continue # X axis wlg = xaxis+x # Plot ax.plot(wlg, spec/scaling+y, '%s-' % args.color, lw=0.05) fig.savefig(args.out, dpi=600)