Python select_data示例，utils.select_data Python示例

示例#1

0

显示文件

文件： train.py 项目： JackInTaiwan/DLCV2018SPRING

def load(videos_fp, labels_fp, limit, val_limit):
    videos = np.load(videos_fp)
    labels = np.load(labels_fp)
    videos = videos / 255.
    videos = normalize(videos)
    videos = select_data(videos, VIDEOS_MAX_BATCH)

    if limit:
        videos = videos[:limit]
        labels = labels[:limit]

    if val_limit:
        videos_eval = videos[:val_limit][:]
        labels_eval = labels[:val_limit][:]

    else:
        videos_eval = videos[:EVAL_TRAIN_SIZE][:]
        labels_eval = labels[:EVAL_TRAIN_SIZE][:]

    videos_test = normalize(np.load(TRIMMED_VIDEO_VALID_FP) / 255.)
    videos_test = select_data(videos_test, VIDEOS_MAX_BATCH)
    labels_test = np.load(TRIMMED_LABEL_VALID_FP)

    global AVAILABLE_SIZE
    AVAILABLE_SIZE = videos.shape[0]

    batch_gen = Batch_generator(
        x=videos,
        y=labels,
        batch=BATCHSIZE,
        drop_last=True,
    )

    return batch_gen, videos_eval, labels_eval, videos_test, labels_test

示例#2

0

显示文件

def evaluation(mode, model_fp, limit):
    if mode == "test":
        videos = np.load(VIDEOS_TEST_FP)
        labels = np.load(LABELS_TEST_FP)

    if limit:
        videos = videos[:limit]
        labels = labels[:limit]

    videos = normalize(videos / 255.)
    videos = select_data(videos, VIDEOS_MAX_BATCH)

    model = tor.load(model_fp).cuda()

    correct, total = 0, len(labels)

    for i, (x, label) in enumerate(zip(videos, labels), 1):
        print("Process: {}/{}".format(i, total))
        x = Variable(tor.FloatTensor(x)).permute(0, 3, 1, 2).cuda()
        out = model(x)
        out = out.mean(dim=0).unsqueeze(0)
        pred = model.pred(out)
        y = tor.max(pred, 1)[1]
        if int(y[0].data) == label:
            correct += 1

    acc = correct / total

    print("|Acc on {}: {}".format(mode, round(acc, 6)))

    return acc

示例#3

0

显示文件

def main():
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--blc',
                        nargs=2,
                        type=int,
                        help='Bottom left corner of the data box')
    parser.add_argument('--trc',
                        nargs=2,
                        type=int,
                        help='Top right corner of the data box')
    parser.add_argument('image', type=str, help='FITS file name')
    parser.add_argument('out', type=str, help='Output plot file name')
    args = parser.parse_args()

    # Open data
    img = fits.open(os.path.expanduser(args.image))[0]
    data = np.squeeze(img.data)

    # Select data
    npix, x0, x1, y0, y1 = select_data(data.shape, blc=args.blc, trc=args.trc)

    # Create figure
    fig, ax = get_figure(npix)

    # Plot
    if args.trc and args.blc:
        ax.imshow(data[y0:y1, x0:x1])
    else:
        ax.imshow(data)

    # Save
    fig.savefig(args.out, dpi=600)

示例#4

0

显示文件

def prediction(model_fp, data_fp, label_fp, output_fp, limit):
    model = tor.load(model_fp)
    model.cuda()

    ### Load data
    l = getVideoList(label_fp)
    videos_output, labels_output = [], []

    total = len(l["Video_category"]) if not limit else limit

    for i in range(total):
        print("Convert videos into numpy: {}/{} \r".format(
            i + 1, len(l["Video_category"])),
              end="")

        cat = l["Video_category"][i]
        name = l["Video_name"][i]
        label = l["Action_labels"][i]
        data = readShortVideo(data_fp, cat, name,
                              downsample_factor=12).astype(np.int8)
        videos_output.append(data.astype(np.int16))
        labels_output.append(int(label))

    videos, labels = np.array(videos_output), np.array(labels_output).astype(
        np.uint8)

    ### Prediction
    correct, total = 0, len(labels)
    preds = []

    videos = normalize(videos / 255.)
    videos = select_data(videos, VIDEOS_MAX_BATCH)

    for i, (x, label) in enumerate(zip(videos, labels), 1):
        print("Process: {}/{}".format(i, len(videos)))
        x = tor.Tensor(x).permute(0, 3, 1, 2).cuda()
        out = model(x)
        out = out.mean(dim=0).unsqueeze(0)
        pred = model.pred(out)
        y = tor.max(pred, 1)[1]
        pred = int(y[0].data)
        if pred == label:
            correct += 1

        preds.append(pred)

    acc = correct / total
    print(acc)

    with open(os.path.join(output_fp, "p1_valid.txt"), "w") as f:
        for i, item in enumerate(preds):
            if i != len(preds) - 1:
                f.write(str(item) + "\n")
            else:
                f.write(str(item))

示例#5

0

显示文件

def plot_tsne(model_fp, output_fp, limit, mode):

    videos = np.load(VIDEOS_TEST_FP)
    labels = np.load(LABELS_TEST_FP)

    model = tor.load(model_fp)
    model.cuda()

    videos = normalize(videos / 255.)
    videos = select_data(videos, VIDEOS_MAX_BATCH)

    correct, total = 0, len(labels)

    features_rnn = []

    for i, (x, label) in enumerate(zip(videos, labels), 1):
        print("Process: {}/{}".format(i, total))
        x = tor.Tensor(x).permute(0, 3, 1, 2).cuda()
        if mode == "rnn":
            f = model.get_feature(x).cpu().data.numpy()
        else:
            f = model(x).cpu().data.numpy()
        features_rnn.append(f[0])

    features_rnn = np.array(features_rnn)
    ### tSNE
    tsne = TSNE(
        n_components=2,
        random_state=0,
    )
    f_tsne = tsne.fit_transform(features_rnn)

    for i in range(11):
        plt.scatter(f_tsne[labels == i, 0], f_tsne[labels == i, 1])

    plt.legend(["Label {}".format(i) for i in range(11)])

    fn = "tSNE_RNN.jpg" if mode == "rnn" else "tSNE_CNN.jpg"
    plt.savefig(os.path.join(output_fp, fn))

示例#6

0

显示文件

	-inputs:
		target: the actual class labels
		predicted: prediction from the classifier given the attributes
	-output:
		mse: the mean square error
	"""
    return mean_squared_error(target, predicted)


# url_train, url_test = ut.get_urls("features-1")
# url_train, url_test = ut.get_urls("features-2")
# url_train, url_test = ut.get_urls("features-3")

data_set_train, data_sets_test = ut.get_data()
columns = [30, 53]
data_set_train_selected, data_sets_test_selected = ut.select_data(
    data_set_train, data_sets_test, columns)
data_set_train_selected[:, 0] = np.square(data_set_train_selected[:, 0])
for i in range(len(data_sets_test_selected)):
    data_sets_test_selected[i][:, 0] = np.square(data_sets_test_selected[i][:,
                                                                            0])

# download training data file
# raw_data = urllib.urlopen(url_train)
# load the CSV file as a numpy matrix
# data_set_train = np.loadtxt(raw_data, delimiter=",", skiprows=1)
# separate the data from the target attributes
num_attributes = len(data_set_train_selected[0]) - 1

X = data_set_train_selected[:, 0:num_attributes]
y = data_set_train_selected[:, num_attributes]
clf = GaussianNB()

示例#7

0

显示文件

def main():

    parser = argparse.ArgumentParser(
        description=
        'A script for training model(s). It can select specific part fo the training dta and feed it for training to some model. At the end it evaluated the trained models with the proper scoring rules, generates rank historgams, calculated feature importance data as well as plots over the entire dataset in order to visually show the performance of the trained model(s)'
    )

    parser.add_argument(
        '--model',
        dest='model',
        action='store',
        help=
        'The model to be trained. This can be \'bnn\', \'mdn\' or \'bothn\'.')

    parser.add_argument(
        '--config',
        dest='config',
        action='store',
        required=True,
        help=
        'The configuration file with setting for the architecture of the models'
    )

    parser.add_argument(
        '--station',
        dest='station',
        action='store',
        default="SBC",
        help='The target LUBW station the trained model aims to predict')

    parser.add_argument(
        '--predictor',
        dest='pred_value',
        action='store',
        default="P1",
        help='The value(s) that should be used as features (P1, P2 of P1P2)')

    parser.add_argument('--period',
                        dest='period',
                        action='store',
                        default="1D",
                        help='intEgration period for the data (1D, 1H, 12H)')

    parser.add_argument(
        '--outvalue',
        dest='out_value',
        action='store',
        default="P1",
        help='Output air pollution value of the model(P1 or P2)')

    parser.add_argument('--take_lubw',
                        dest='take_lu_bw',
                        action='store_true',
                        default=False,
                        help='Should the LU BW station be taken as feature')

    parser.add_argument(
        '--random_split',
        dest='random_split',
        action='store_true',
        default=False,
        help='Should the data be split randomly for the test train split')

    parser.add_argument(
        '--dest',
        dest='dest',
        action='store',
        required=False,
        default="/home/arnaud/code/pollution/test_eval",
        help='destination for the evaluation and for the build models')

    parser.add_argument('--base-dir',
                        dest='base_dir',
                        action='store',
                        required=False,
                        default="/home/arnaud/code/pollution/env/data_frames",
                        help='The directory where the data frames reside')

    parser.add_argument(
        '--load-mdn',
        dest='load_mdn',
        action='store',
        required=False,
        default=None,
        help=
        'Load the MDB model from specific folder and do not train a new one')

    parser.add_argument(
        '--load-bnn',
        dest='load_bnn',
        action='store',
        required=False,
        default=None,
        help=
        'Load the BNN model from specific folder and do not train a new one')

    parser.add_argument('--emp',
                        dest='emp',
                        action='store_true',
                        default=False,
                        help='emp')

    args = parser.parse_args()

    station = args.station
    in_value = args.pred_value
    period = args.period
    out_value = args.out_value
    train_per = 0.75
    take_lu_bw = args.take_lu_bw
    random_split = args.random_split
    base_dir = args.base_dir
    dest = args.dest

    X, y, col_names, out_name = select_data(station,
                                            in_value,
                                            period,
                                            include_lu_bw=take_lu_bw,
                                            output_value=out_value,
                                            base_dir=base_dir)
    X_train, X_test, y_train, y_test = test_train_split(X,
                                                        y,
                                                        train_size=train_per,
                                                        random=random_split)
    y_train = y_train.reshape(y_train.shape[0], 1)
    y_test = y_test.reshape(y_test.shape[0], 1)

    config_file = open(args.config, "r")
    config = json.load(config_file)
    config_file.close()

    print("Period: " + period)
    print("Training samples: ", X_train.shape[0])
    print("Test samples: ", X_test.shape[0])
    print("Number of features: ", X_train.shape[1])
    print("Input features: " + in_value)
    print("Target station", station)
    print("Input features:", col_names)
    print("Outpute value", out_name)
    print("-------------")

    ev_samples_cnt = config["ev_samples_cnt"]

    mdn_iter = config["mdn"]["mdn_iter"]
    mdn_layers = config["mdn"]["mdn_layers"]
    mdn_mixture_cnt = config["mdn"]["mdn_mixture_cnt"]
    mdn_id = "mdn_l" + str(mdn_layers) + "_i" + str(mdn_iter) + "_mc" + str(
        mdn_mixture_cnt)

    bnn_samples = config["bnn"]["bnn_samples"]
    bnn_iter = config["bnn"]["bnn_iter"]
    bnn_layers = config["bnn"]["bnn_layers"]
    bnn_id = "bnn_l" + str(bnn_layers) + "_i" + str(bnn_iter) + "_s" + str(
        bnn_samples)

    desc = ""
    desc += "\nPeriod: " + str(period)
    desc += "\nTraining samples: " + str(X_train.shape[0])
    desc += "\nTest samples: " + str(X_test.shape[0])
    desc += "\nNumber of features: " + str(X_train.shape[1])
    desc += "\nTaking LU BW as feature: " + str(take_lu_bw)
    desc += "\nInput value: " + str(in_value)
    desc += "\nTarget station: " + str(station)
    desc += "\nInput features: " + str(col_names)
    desc += "\nOutpute value: " + str(out_name)
    desc += "\nTest-train split ratio: " + str(train_per)
    desc += "\n-------------\n"

    desc += "\nMDN Configuration: "
    desc += "\nIterations: " + str(mdn_iter)
    desc += "\nLayers: " + str(mdn_layers)
    desc += "\nMixtures Count: " + str(mdn_mixture_cnt)
    desc += "\n-------------\n"

    desc += "\nBNN Configuration: "
    desc += "\nIterations: " + str(bnn_iter)
    desc += "\nLayers: " + str(bnn_layers)
    desc += "\nSamples for vatiational inference: " + str(bnn_samples)
    desc += "\n-------------\n"

    desc += "\nEvaluation Configuration"
    desc += "\nSamples drawn from models for each observation: " + str(
        ev_samples_cnt)

    ev = Evaluator(dest, desc, out_value)
    ev.set_test_train_split(X_train, X_test, y_train, y_test)
    ev.set_names(col_names, out_name)

    os.makedirs(dest + "/bnn_train_plots")
    os.makedirs(dest + "/mdn_train_plots")

    def get_mdn():
        if args.load_mdn is None:
            mdn_model = Mdn("MDN Model",
                            X_train,
                            y_train,
                            inner_dims=mdn_layers,
                            num_mixtures=mdn_mixture_cnt)
            mdn_model.fit(num_iter=mdn_iter,
                          callback=lambda mod, j: plot_mdn(
                              X_train, X_test, y_train, y_test, dest +
                              "/mdn_train_plots", mod, j))
            mdn_model.save(dest + "/mdn_model")
            return mdn_model
        else:
            print("Loading MDN from file")
            mdn_model = Mdn("MDN Model",
                            X_train,
                            y_train,
                            inner_dims=mdn_layers,
                            num_mixtures=mdn_mixture_cnt,
                            model_file=args.load_mdn)
            mdn_model.save(dest + "/mdn_model")
            return mdn_model

    def get_bnn():
        if args.load_bnn is None:
            bnn_model = Bnn("BNN Model")
            bnn_model.build(X_train.shape[1],
                            1,
                            layers_defs=bnn_layers,
                            examples=X_train.shape[0])
            bnn_model.fit(X_train,
                          np.squeeze(y_train),
                          epochs=bnn_iter,
                          samples=bnn_samples,
                          callback=lambda mod, j: plot_bnn(
                              X_train, X_test, y_train, y_test, dest +
                              "/bnn_train_plots", mod, j))
            bnn_model.save(dest + "/bnn_model", "bnn_model")
            return bnn_model
        else:
            print("Loading BNN from file")
            bnn_model = Bnn("BNN Model")
            bnn_model.load(args.load_bnn, name="bnn_model")
            bnn_model.save(dest + "/bnn_model", "bnn_model")
            return bnn_model

    if args.model == "bnn":
        print("Fitting the BNN")
        bnn_model = get_bnn()
        ev.evaluate_bnn(bnn_model, bnn_id, samples=ev_samples_cnt)
    elif args.model == "mdn":
        print("Fitting the MDN")
        mdn_model = get_mdn()
        ev.evaluate_mdn(mdn_model, mdn_id, samples=ev_samples_cnt)
    else:
        print("Fitting the MDN")
        mdn_model = get_mdn()

        ev.evaluate_mdn(mdn_model, mdn_id, samples=ev_samples_cnt)

        tf.reset_default_graph()

        print("Fitting the BNN")
        bnn_model = get_bnn()
        ev.evaluate_bnn(bnn_model, bnn_id, samples=ev_samples_cnt)

    if arg.emp:
        ev.evaluate_empirical(samples=ev_samples_cnt)

示例#8

0

显示文件

文件： make_tables.py 项目： technonews/programmielettorali

def calculate(start, end, name):
    sentences, vnps, adjectives, items = select_data(start, end)
    
    # individual freq
    
    filename = 'items_%d_%s.csv' % (start.year, name)
    print filename
    items_freq = calculate_frequency(sentences, items, 'referred_items')
    save_table(items_freq, open(filename, 'w'))
    
    filename = 'vnps_%d_%s.csv' % (start.year, name)
    print filename
    vnps_freq = calculate_frequency(sentences, vnps, 'verb_noun_pair')
    save_table(vnps_freq, open(filename, 'w'))
    
    filename = 'adjectives_%d_%s.csv' % (start.year, name)
    print filename
    adjs_freq = calculate_frequency(sentences, adjectives, 'adjectives')
    save_table(adjs_freq, open(filename, 'w'))
    

    # user networks
    
    filename = 'vnp_user_net_%d_%s.csv' % (start.year, name)
    print filename
    vnp_links = count_user_links(sentences, vnps, 'verb_noun_pair')
    save_table(vnp_links, open(filename, 'w'))

    filename = 'adj_user_net_%d_%s.csv' % (start.year, name)
    print filename
    adj_links = count_user_links(sentences, adjectives, 'adjectives')
    save_table(adj_links, open(filename, 'w'))

    filename = 'itm_user_net_%d_%s.csv' % (start.year, name)
    print filename
    itm_links = count_user_links(sentences, items, 'referred_items')
    save_table(itm_links, open(filename, 'w'))

    
    # multi-frequencies

    filename = 'item_vs_vnp_%d_%s.csv' % (start.year, name)
    print filename
    item_vs_vnp = calculate_dual_frequency(sentences, items, 'referred_items', vnps, 'verb_noun_pair', diary=True)
    save_table(item_vs_vnp, open(filename, 'w'))
    
    filename = 'item_vs_adj_%d_%s.csv' % (start.year, name)
    print filename
    item_vs_adj = calculate_dual_frequency(sentences, items, 'referred_items', adjectives, 'adjectives', diary=True)
    save_table(item_vs_adj, open(filename, 'w'))
    
    filename = 'vnp_vs_vnp_%d_%s.csv' % (start.year, name)
    print filename
    vnp_vs_vnp = calculate_dual_frequency(sentences, vnps, 'verb_noun_pair', vnps, 'verb_noun_pair', diary=True)
    save_table(vnp_vs_vnp, open(filename, 'w'))
    
    filename = 'adj_vs_adj_%d_%s.csv' % (start.year, name)
    print filename
    adj_vs_adj = calculate_dual_frequency(sentences, adjectives, 'adjectives', adjectives, 'adjectives', diary=True)
    save_table(adj_vs_adj, open(filename, 'w'))
    
    filename = 'user_vs_item_%d_%s.csv' % (start.year, name)
    print filename
    user_vs_item = calculate_frequency_user(sentences, items, 'referred_items')
    save_table(user_vs_item, open(filename, 'w'))
    
    filename = 'user_vs_vnp_%d_%s.csv' % (start.year, name)
    print filename
    user_vs_vnp = calculate_frequency_user(sentences, vnps, 'verb_noun_pair')
    save_table(user_vs_vnp, open(filename, 'w'))
    
    return

示例#9

0

显示文件

文件： overlay.py 项目： folguinch/sidecube

def main():
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--chanrange', nargs=2, type=int,
            help='Channel range')
    parser.add_argument('--blc', nargs=2, type=int,
            help='Bottom left corner of the data box')
    parser.add_argument('--trc', nargs=2, type=int,
            help='Top right corner of the data box')
    parser.add_argument('--xcoverage', default=0.8, type=float,
            help='Coverage of the spectrum range over each pixel')
    parser.add_argument('--level', default=None, type=float,
            help='Ignore spectra below level')
    parser.add_argument('--mask', default=None, type=float,
            help='Read a mask from FITS file')
    parser.add_argument('--every', type=int, default=None,
            help='Select one pixel every n pixels from peak')
    parser.add_argument('--autolimit', action='store_true',
            help='Use std and mean to determine if spectra will be plot')
    parser.add_argument('--nsigma', type=int, default=3,
            help='Use std and mean to determine if spectra will be plot')
    parser.add_argument('--color', default='k',
            help='Line color')
    parser.add_argument('cube', type=str,
            help='FITS cube file name')
    parser.add_argument('out', type=str,
            help='Output plot file name')
    args = parser.parse_args()

    # Open cube
    cube = fits.open(os.path.expanduser(args.cube))[0]

    # Select data
    npix, x0, x1, y0, y1 = select_data(cube.shape, blc=args.blc, trc=args.trc)
    if args.chanrange:
        lenspec = abs(args.chanrange[1]-args.chanrange[0]) + 1
        s0, s1 = args.chanrange[0], args.chanrange[1]+1
    else:
        lenspec = cube.shape[-3]
        s0, s1 = 0, lenspec
    subcube = cube.data[0, s0:s1, y0:y1, x0:x1]

    # Create mask
    if args.mask:
        mask = fits.open(args.mask)[0]
        mask = np.squeeze(mask.data).astype(bool)
    elif args.level:
        mask = np.any(subcube > args.level, axis=0)
    elif args.autolimit:
        mean = np.mean(subcube)
        std = np.std(subcube)
        mask = np.any(subcube>mean+args.nsigma*std, axis=0) | \
                np.any(subcube<mean-args.nsigma*std, axis=0)
    else:
        mask = np.ones(subcube.shape[1:], dtype=bool)
    if args.every:
        maxmap = np.nanmax(subcube, axis=0)
        ymax, xmax = np.unravel_index(np.nanargmax(maxmap), maxmap.shape)
        mask = mask & mask_every(subcube.shape[1:], args.every, row=ymax, 
                col=xmax)

    # Data scaling
    scaling = 1.01*np.nanmax(subcube)
    xempty = (1. - args.xcoverage)*0.5
    xaxis = np.linspace(xempty,1.-xempty, lenspec)

    # Create figure
    fig, ax = get_figure(npix, alpha=True)

    # Limits
    ax.set_xlim(0, npix)
    ax.set_ylim(-0.5, npix-0.5)

    # Plot
    for y, x in np.transpose(np.nonzero(mask)):
        # Spectrum
        spec = subcube[:, y, x]
        if np.any(np.isnan(spec)):
            continue

        # X axis
        wlg = xaxis+x
        
        # Plot
        ax.plot(wlg, spec/scaling+y, '%s-' % args.color, lw=0.05)

    fig.savefig(args.out, dpi=600)