Python load_data示例，helpers.preprocess.load_data Python示例

示例#1

0

显示文件

文件： run_preprocess.py 项目： davecore82/kubeflow-demo

def run_preprocess(argv=None):
    """Runs the retrieval and preprocessing of the data.

  Args:
    args: args that are passed when submitting the training

  Returns:

  """
    logging.info('starting preprocessing of data..')
    args = parse_arguments(sys.argv if argv is None else argv)
    tickers = [
        'snp', 'nyse', 'djia', 'nikkei', 'hangseng', 'ftse', 'dax', 'aord'
    ]
    closing_data = preprocess.load_data(tickers, args.es_address,
                                        args.cutoff_year)
    time_series = preprocess.preprocess_data(closing_data)
    logging.info('preprocessing of data complete..')

    logging.info('starting uploading of the preprocessed data on Ceph..')
    temp_folder = 'data'
    if not os.path.exists(temp_folder):
        os.mkdir(temp_folder)
    file_path = os.path.join(temp_folder,
                             'data_{}.csv'.format(args.cutoff_year))
    time_series.to_csv(file_path, index=False)
    storage_helper.upload_to_storage(args.bucket, temp_folder,
                                     args.endpoint_url, args.access_key,
                                     args.secret_key)
    shutil.rmtree(temp_folder)
    if args.kfp:
        with open("/store_path.txt", "w") as output_file:
            output_file.write(file_path)
    logging.info('upload of the preprocessed data on Ceph completed..')

示例#2

0

显示文件

def send_pratical_request(date="2014-08-12"):
    """Obtain the prediction for a certain date in the test set.

  Args:
    date (str): request date to obtain prediction

  """
    # create input from request date
    tickers = [
        'snp', 'nyse', 'djia', 'nikkei', 'hangseng', 'ftse', 'dax', 'aord'
    ]
    closing_data = preprocess.load_data(tickers)
    index = closing_data.index.get_loc(date) - 7
    # because first 7 days are not accounted in the time series
    training_test_data = preprocess.preprocess_data(closing_data)
    input_tensor = np.expand_dims(
        training_test_data[training_test_data.columns[2:]].values[index],
        axis=0).astype(np.float32)

    request_helper.send_request(input_tensor)

示例#3

0

显示文件

def fit(params):
    """Fit the polynomial model which includes the IQ mixer.
    """

    ##### Load and prepare data #####
    x, y, noise, measured_noise_power = load_data(
        'data/fdTestbedData' + str(params.sampling_freq_MHz) + 'MHz10dBm',
        params)

    # Print total number of real parameters to be estimated
    # This number includes the linear as well
    n_poly = int(params.hsi_len * ((params.max_power + 1) / 2) *
                 ((params.max_power + 1) / 2 + 1))
    print(
        "Total number of real parameters to estimate for polynomial based canceller: {:d}"
        .format(2 * n_poly))

    # Split into training and test sets
    training_samples = int(np.floor(x.size * params.training_ratio))

    x_train = x[0:training_samples]
    y_train = y[0:training_samples]
    x_test = x[training_samples:]
    y_test = y[training_samples:]

    # Remove samples when training with less samples
    training_samples = int(np.floor(training_samples * params.training_size))

    x_train = x[0:training_samples]
    y_train = y[0:training_samples]

    ##### Training #####
    # Estimate linear and non-linear cancellation parameters
    h_lin = poly.si_estimation_linear(x_train, y_train, params)
    # This actually also estimates the linear! So we get both
    h_nonlin = poly.si_estimation_nonlinear(x_train, y_train, params)

    ##### Test #####
    # Do linear and non-linear cancellation
    y_canc = poly.si_cancellation_linear(x_test, h_lin, params)
    # NB: This actually contains the linear cancellation AND the non-linear
    # cancellation.
    y_canc_nonlin = poly.si_cancellation_nonlinear(x_test, h_nonlin, params)

    ##### Evaluation #####
    # Scale signals according to known noise power
    y_test, y_canc, y_canc_nonlin, noise = sp.compute_scaling(
        noise, measured_noise_power, y_test, y_canc, y_canc_nonlin)

    if params.save:
        path = "results/results_wlmp"
    else:
        path = "results/tmp_wlmp"

    if not os.path.exists(path):
        os.makedirs(path)

    # Plot PSD and get signal powers
    noise_power, y_test_power, y_test_lin_canc_power, y_test_nonlin_canc_power = sp.plotPSD(
        params,
        y_test[params.hsi_len:],
        y_canc[params.hsi_len:],
        y_canc_nonlin[params.hsi_len:],
        noise,
        y_var=1,
        path=path)

    # We actually have that it performs the total cancellation!
    # model_canc then refers to the difference in performance between the linear
    # and non-linear canceller (where non-linear includes linear)
    model_canc = y_test_lin_canc_power - y_test_nonlin_canc_power
    # Total cancellation of model
    total_canc = y_test_power - y_test_nonlin_canc_power

    # Complexity (where we subtract the linear! and then we add the linear back later)
    n_cadd = n_poly - params.hsi_len - 1
    n_cmult = n_poly - params.hsi_len

    # Take result in terms of complex-valued additions, multiplications and convert
    # to total real-valued ops.
    model_add, model_mult = flop_convert(n_cadd, n_cmult, algo="reduced_cmult")
    model_act = 0

    model_flop = model_add + model_mult

    total_add = model_add
    total_mult = model_mult
    total_act = 0

    lin_add, lin_mult, lin_act = flop_linear_polynomial(params,
                                                        algo="reduced_cmult")

    total_add += lin_add + 2
    total_mult += lin_mult
    total_act += lin_act

    total_flop = total_add + total_mult + total_act

    # Convert to real
    total_params = 2 * n_poly

    data = OrderedDict([
        ('total_flop', total_flop),
        ('total_add', total_add),
        ('total_mult', total_mult),
        ('total_canc_max', total_canc),
        ('total_params', total_params),
        ('model_flop', model_flop),
        ('model_add', model_add),
        ('model_mult', model_mult),
        ('model_act', model_act),
        ('model_canc', model_canc),
        ('training_size', params.training_size),
        ('min_power', params.min_power),
        ('max_power', params.max_power),
    ])

    file_path = path + os.sep + "wlmp.csv"

    df = pd.DataFrame(data, columns=data.keys(), index=[0])

    if os.path.exists(file_path):
        df_restored = pd.read_csv(file_path)

        df = df.append(df_restored)
        df = df.sort_values(by=['total_flop'])

    df.to_csv(file_path, index=False)