Пример #1
0
    parser.add_argument("--n_layers", "-nl", type=int, default=3)
    parser.add_argument("--encoder_hidden_size", "-ehs", type=int, default=64)
    parser.add_argument("--decoder_hidden_size", "-dhs", type=int, default=64)
    parser.add_argument("--seq_len", "-sl", type=int, default=7)
    parser.add_argument("--num_obs_to_train", "-not", type=int, default=1)
    parser.add_argument("--embedding_size", "-es", type=int, default=10)
    parser.add_argument("--standard_scaler", "-ss", action="store_true")
    parser.add_argument("--log_scaler", "-ls", action="store_true")
    parser.add_argument("--mean_scaler", "-ms", action="store_true")
    parser.add_argument("--show_plot", "-sp", action="store_true")
    parser.add_argument("--run_test", "-rt", action="store_true")
    parser.add_argument("--batch_size", "-b", type=int, default=64)
    args = parser.parse_args()

    if args.run_test:
        data_path = util.get_data_path()
        data = pd.read_csv(os.path.join(data_path, "LD_MT200_hour.csv"),
                           parse_dates=["date"])
        data["year"] = data["date"].apply(lambda x: x.year)
        data["day_of_week"] = data["date"].apply(lambda x: x.dayofweek)
        data = data.loc[(data["date"] >= date(2014, 1, 1))
                        & (data["date"] <= date(2014, 3, 1))]

        features = ["hour", "day_of_week"]
        # hours = pd.get_dummies(data["hour"])
        # dows = pd.get_dummies(data["day_of_week"])
        hours = data["hour"]
        dows = data["day_of_week"]
        X = np.c_[np.asarray(hours), np.asarray(dows)]
        num_features = X.shape[1]
        num_periods = len(data)
Пример #2
0
import findspark
findspark.init()

import pyspark

import util

from pyspark.sql import SparkSession
import configparser

config = configparser.ConfigParser()

spark = SparkSession.builder.master("local[1]").appName(
    "SparkLocal").getOrCreate()

# txt is rdd
txt = spark.sparkContext.textFile(util.get_data_path('LICENSE.txt'))

print(txt.count())

python_lines = txt.filter(lambda line: 'python' in line.lower())
print(python_lines.count())
Пример #3
0
import theano.tensor as T
import matplotlib.pyplot as plt
import random

sys.path.append('src')

from assemble_net import Tracer, MLP, AutoEncoder
from run_net_recursively import run_net_recursively
from view import view_reconstruction
from train import train_net
from test_model import Tester, test_autoencoder
from util import get_data_path, setupLogging
from load_data import load_data, load_test_id
from params import *

ROOT = get_data_path()  #glob=True if data is in the global datasets directory

# load data and params
dataname = 'tracer_data_multi_full_33_17_17_nC20_3360_switch0.60_2718_noise'
#dataname = 'tracer_data_multi_full_33_17_17_nC20_17424_switch0.60_2718_noise'
#dataname = 'tracer_data_multi_full_33_17_17_nC20_87544_switch0.60_2718_noise'
#dataname = 'tracer_data_expert_test_norm'

try:
    datasets
except:
    dataset = os.path.join(ROOT, dataname + '.pkl')
    datasets = load_data(dataset, target_is_int=True)

# set rng
rng = np.random.RandomState(23455)
Пример #4
0
	    
	    plt.figure()
	    plt.imshow(features)
	    plt.show()


def make_params_shared(params):
	shared_params = []
	for p in params:
		p = theano.shared(np.asarray(p, dtype=theano.config.floatX))
		shared_params.append(p)
	return shared_params#[[shared_params[0], shared_params[1]], [shared_params[0].T, shared_params[2]]]

if __name__ == '__main__':

	ROOT = get_data_path()
	param_file = os.path.join('data', 'results', 'tracer_data_expert_test/nh_2000_dropout1/LRi_0.00100_reg_0.00/2013_10_11_18_50/params_tracer_data_expert_test_nh_2000_dropout1_LRi_0.00100_reg_0.00_73.450010.pkl')
	data_file = os.path.join(ROOT, 'tracer_data_expert_test.pkl')

	f = open(param_file, 'rb')
	params, details = pickle.load(f)
	f.close()
	train, valid, test = load_data(data_file, True)

	details['mode'] = 'test'	
	params = make_params_shared(params)
	test_set_x = test[0]
	
	test_autoencoder(test_set_x, params, details)

Пример #5
0
import cPickle
import gzip
import os
import sys
import time
import datetime
import numpy as np
import theano
import theano.tensor as T

sys.path.append('src')
from util import get_data_path

ROOT = get_data_path()
LOAD_PARAMS = False
PARAM_PATH = None


def load_convpool_params(rng, img_size, n_in, n_kern, kern_size, n_hid, n_out,
                         pool_ds):

    params = []
    new_size = img_size

    for i in xrange(len(n_kern)):
        shape = (n_kern[i], n_in, kern_size[i], kern_size[i])
        fan_in = np.prod(shape[1:])
        fan_out = (shape[0] * np.prod(shape[2:]) / (pool_ds[i]**2))

        params.append(set_params_random(rng, fan_in, fan_out, shape))
Пример #6
0
import matplotlib.pyplot as plt
import random

sys.path.append('src')

from assemble_net import Tracer, MLP, AutoEncoder
from run_net_recursively import run_net_recursively
from view import view_reconstruction
from train import train_net
from test_model import Tester, test_autoencoder
from util import get_data_path, setupLogging
from load_data import load_data, load_test_id
from params import *


ROOT = get_data_path() #glob=True if data is in the global datasets directory



# load data and params
dataname = 'tracer_data_multi_full_33_17_17_nC20_3360_switch0.60_2718_noise'
#dataname = 'tracer_data_multi_full_33_17_17_nC20_17424_switch0.60_2718_noise'
#dataname = 'tracer_data_multi_full_33_17_17_nC20_87544_switch0.60_2718_noise'
#dataname = 'tracer_data_expert_test_norm'

try:
	datasets
except:
	dataset = os.path.join(ROOT, dataname+'.pkl')
	datasets = load_data(dataset, target_is_int=True)
Пример #7
0
""" Test script to visualize some of the random Curve data.
@author Graham Taylor
"""
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import sys

sys.path.append('code')
sys.path.append('code/util')

from util import get_data_path, dispims, tile_raster_images
from util.serialization import deserialize_object
import os.path

data_path = get_data_path()

# will not reload if data is already in workspace
try:
    datasets
except NameError:
    print 'loading data'
    datasets = deserialize_object(os.path.join(data_path, 'results/params_tracer_data_multi_full_33_17_17_nC1_11000_switch1.00_2718_noise_nh1a_300_nh1b_50_nh2_500_nout_9_preTrainFalse_LRi_0.01000_reg_10.00_dropoutFalse_2.647312.pkl'))

w0 = datasets[2].T
print w0.shape

n_cases, n_dims = w0.shape
im_w = int(np.sqrt(n_dims))  # assume square
case_w = int(np.sqrt(n_cases))+1
Пример #8
0
""" Test script to visualize some of the random Curve data.
@author Graham Taylor
"""
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import sys

sys.path.append('code')
sys.path.append('code/util')

from util import get_data_path, dispims, tile_raster_images
from util.serialization import deserialize_object
import os.path

data_path = get_data_path()

# will not reload if data is already in workspace
try:
    datasets
except NameError:
    print 'loading data'
    datasets = deserialize_object(
        os.path.join(
            data_path,
            'results/params_tracer_data_multi_full_33_17_17_nC1_11000_switch1.00_2718_noise_nh1a_300_nh1b_50_nh2_500_nout_9_preTrainFalse_LRi_0.01000_reg_10.00_dropoutFalse_2.647312.pkl'
        ))

w0 = datasets[2].T
print w0.shape