Пример #1
0
	def compute_covariance(self, x, y, diag=False):
		x, y = format_data(x), format_data(y)

		covs = []
		for kernel, idx in zip(self.kernels, self.maps):
			covs.append(kernel.compute_covariance(x[:,idx], y[:,idx], diag=diag))

		return np.prod(covs, axis=0)
Пример #2
0
	def _format_vects(self, x, y, diag=False):
		x, y = format_data(x), format_data(y)

		if not diag:
			idx1, idx2 = np.meshgrid(np.arange(x.shape[0]), np.arange(y.shape[0]))
			x, y =  x[idx1], y[idx2]
		else:
			x, y = x.reshape(x.shape + (1,)).swapaxes(1,2), x.reshape(x.shape + (1,)).swapaxes(1,2)

		return x, y
Пример #3
0
 def add_observation(self, X_new, Y_new):
     X_new, Y_new = format_data(X_new, dim=self._dim), format_data(Y_new,
                                                                   dim=1)
     if np.sqrt(np.sum((self.X - X_new)**2, axis=1)).min() > self.tol:
         self.X = np.concatenate((self.X, X_new), axis=0)
         self.Y = np.concatenate((self.Y, Y_new), axis=0)
         self._nu = npr.randn(self.X.shape[0], 1)
         if self.marginalize and self.sampler is not None:
             _ = self.sampler.sample(self.resample)  # re-sample
         self._recompute = True
Пример #4
0
 def job_details(self, job_name):
     print(
         blue('Getting details for job %s on server %s ...' %
              (job_name, self.url)))
     table_data = [['Name', 'Status', 'Url']]
     try:
         job = self.server.get_job(job_name)
         table_data.append([job.name, self.job_status(job), job.url])
         format_data(table_data)
     except custom_exceptions.UnknownJob:
         print('No job found : %s' % job_name)
Пример #5
0
 def job_list(self):
     print(blue('Fetching job list for %s...' % self.url))
     table_data = [['Name', 'Status', 'Url']]
     for job_name, job_instance in self.server.get_jobs():
         table_data.append([
             job_instance.name,
             green('RUNNING') if job_instance.is_running() else
             blue('STOPPED'), job_instance.url
         ])
     format_data(table_data)
     print("Jobs found: ", len(self.server.get_jobs_list()))
Пример #6
0
	def compute_covariance(self, x, y, diag=False):
		x, y = format_data(x), format_data(y)
		x, y = np.copy(x), np.copy(y)
		assert(x.shape[1] == self.N and y.shape[1] == self.N)

		for i in range(x.shape[1]):
			a, b = self.parameters[-2*self.N + 2*i].value, self.parameters[-2*self.N + 2*i + 1].value
			x[:,i] = beta.cdf(x[:,i], a, b)
			y[:,i] = beta.cdf(y[:,i], a, b)

		val =  self.kernel.compute_covariance(x, y, diag=diag)

		return val
Пример #7
0
    def c_repo_list (self, a, username, **kw) :
        if kw.get("admin") :  # show all repository
            _repos = self._config_db.repositories
        else :
            _repos = self._config_db.get_user_property(username, "repository", list(), )

        _values = map(
            lambda x : (
                "%s" % (
                    self._config_db.get_repository_property(x, "path"),
                ),
                "%s%s" % (
                    x,
                    self._config_db.get_repository_property(x, "description", "").strip() and (
                        " (%s)" % self._config_db.get_repository_property(x, "description", "").strip()
                    ) or "",
                ),
                self._config_db.is_remote_repository(x) and " O" or " X",
            ),
            _repos,
        )
        #_values.sort()
        return utils.format_data(
            _values,
            width=self._window_size[1],
            captions=("path", "alias", "is remote?", ),
            num_columns=3,
        )
Пример #8
0
 def job_list_active(self):
     print(blue('Fetching job list for %s...' % self.url))
     table_data = [['Name', 'Status', 'Url']]
     count = 0
     for job_name, job_instance in self.server.get_jobs():
         if not job_instance.is_enabled():
             continue
         count += 1
         if count >= 10:
             break
         table_data.append([
             job_instance.name,
             self.job_status(job_instance), job_instance.url
         ])
     format_data(table_data)
     print("Jobs found: ", len(self.server.get_jobs_list()))
Пример #9
0
 def setUp(self):
     self.ud_obj = dict(
         user_id=1,
         activity_id=1,
         temp=(1, 20, 30),
         wind=(0, 0, 10),
         cloud=(0, 0, 100),
         rain=(0.0, 0.0, 0.5),
         weights=dict(
             wind=1.0,
             rain=0.9,
             temp=0.65,
             cloud=0.1,
         ),
         min_size=1,
     )
     time_ranges = []
     for i in range(5):
         time_ranges.append((i * 24 + 6, i * 24 + 10))
     for i in range(5, 7):
         time_ranges.append((i * 24 + 8, i * 24 + 17))
     print(time_ranges)
     self.ud_obj['time_ranges'] = time_ranges
     with open('tests/data/data.json') as fh:
         self.weather_data_bc = BCMock(format_data(json.loads(fh.read())))
Пример #10
0
    def _gp_posterior(self, x):
        '''
		Using Algorithm 2.1 from Gaussian Processes for Machine Learning.

		Returns:
		mean, variance
		'''
        X, Y, noise, kernel = self.X, self.Y, self.noise, self.kernel
        if self._has_noise_prior:
            noise = noise.value

        x = format_data(x, dim=self._dim)

        if self._recompute:
            self._compute_aux()
        L, lower, alpha, ll = self._L, self._lower, self._alpha, self._ll

        K_x = kernel.compute_covariance(x, X)
        v = linalg.solve_triangular(L, K_x, lower=lower)

        mean = K_x.transpose().dot(alpha)

        var = kernel.compute_covariance(x, x, diag=True)[:, 0] - (v * v).sum(
            axis=0) + noise  # compute ONLY the diagonal

        return mean[:, 0], var
Пример #11
0
def city_weather_forecast(api_key, city_name):
    try:
        URL = WEATHER_URL + 'forecast?' + "appid=" + api_key + "&q=" + city_name
        get_data = requests.get(URL, headers=HEADERS, timeout=10).json()
        return format_data(
            json.loads(json.dumps(get_data, default=_json_encode)))
    except requests.exceptions.Timeout:
        return abort(408, {"status": False, "message": "Request timeout"})
Пример #12
0
def main(sc):
    file_key = "7beac85e-00a5-48ae-af2a-aaf9a332463b"
    weather_data = utils.format_data(utils.get_s3_json_file(file_key))

    weather_data_bc = sc.broadcast(weather_data)
    user_data_rdd = sc.parallelize(user_data)

    results = user_data_rdd.map(
        lambda ud_obj: utils.map_func(weather_data_bc, ud_obj)).collect()
    for result in results:
        print("User: %s" % (result['user_id'], ))
        print(result['wws'])
Пример #13
0
    def c_user_view (self, a, username, **kw) :
        _values = map(
            lambda x : (x, self._config_db.get_user_property(username, x, "", ), ),
            ("realname", "email", "public_key", "admin", ),
        )
        _values.insert(0,
            (
                "has password?",
                bool(self._config_db.get_user_property(username, "password")) and "yes" or "no",
            ),
        )
        _values.insert(0, ("username", username, ), )
        _respositories = self._config_db.get_user_property(username, "repository", )
        _values.append(("repository", _respositories and ", ".join(_respositories) or "", ), )

        return utils.format_data(_values, width=self._window_size[1], captions=("key", "value", ), )
Пример #14
0
    def print_user_list (self, userlist, ) :
        _values = map(
            lambda x : (
                x,
                self._config_db.get_full_username(x),
                self._config_db.is_admin(x) and "O" or "X",
            ),
            userlist,
        )

        return [
                i for i in utils.format_data(
                    _values and _values or (("no users", "", ), ),
                    width=self._window_size[1],
                    captions=("username", "realname", "is admin?", ),
                )
        ]
 def gen_data_page(self, pattern, is_kernel=0):
     self.data_page_str.clear()
     # TO DO: need to embed num_of_kernel_data_pages, num_of_data_pages, etc. in the riscv_core_setting
     page_cnt = 1 if is_kernel else 2
     page_size = 4096
     for section_idx in range(page_cnt):
         if is_kernel:
             self.data_page_str.append(
                 "kernel_data_page_{}:".format(section_idx))
         else:
             self.data_page_str.append("data_page_{}:".format(section_idx))
         # TO DO: need to embed data_page_alignment in the core_setting
         self.data_page_str.append(".align 12")
         for i in range(0, page_size, 32):
             tmp_data = self.gen_data(i, pattern, 32)
             tmp_str = ".word {:{}}".format(utils.format_data(tmp_data),
                                            utils.length)
             self.data_page_str.append(tmp_str)
Пример #16
0
    def regress(self, x, num_samples=10, marginalize=True):
        x = format_data(x, dim=self._dim)

        if self.marginalize and marginalize:
            means = np.zeros((x.shape[0], num_samples))
            vars = np.zeros((x.shape[0], num_samples))
            hp_samples = self.sampler.sample(num_samples)
            for i in range(num_samples):
                self.set_kernel_parameters(hp_samples[i])
                mean, var = self._gp_posterior(x)

                means[:, i] = mean
                vars[:, i] = var

            mean = means.mean(axis=1)
            var = vars.mean(axis=1)
            # var = mean**2 - (means**2 + vars).mean(axis=1)
        else:
            mean, var = self._gp_posterior(x)

        return mean, var
Пример #17
0
def main(config, data):
  X, y = utils.format_data(config, data)

  print('| Creating log configurations ...')  
  config.create_log_configurations()

  print('| Saving log configurations ...')
  config.comments = f'{config.comments}, {str(X.shape)}'
  config.save_config()

  folds = config.data['folds']
  current_best_acc, current_best_mcc = 0, -2
  best_fold = None

  print(f'| Training model with {folds} folds ...')
  folder = KFold(n_splits=folds)
  exp_evaluation = experiment_history.EvaluationHistory(config=config) 
  exp_training = experiment_history.TrainingHistory(log_dir=config.log_dir)

  for k, (train_index, test_index) in enumerate(folder.split(X)):
    print(f'| X: {X.shape}\n| y: {y.shape}')
    x_train, x_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Build model
    model_handler = model.Model(config=config)
    md = model_handler.build_model(x_train.shape)
    start, used = 0, 0

    try:
      # Train model
      start = time.time()
      history = model_handler.train_model(x_train, y_train)
      used = time.time() - start
      print(f'| K = {k + 1} | Used {used:.2f} seconds') 
    except KeyboardInterrupt as e:
      break

    # Evaluate model
    print('| Evaluating model on test set ...')
    predictions = md.predict(x_test)
    is_football = config.usecase == config.FOOTBALL
    evaluations = None 
    if is_football:
      evaluations = exp_evaluation.custom_evaluate(predictions, y_test, k + 1, data['columns'])
    else:
      evaluations = exp_evaluation.evaluate(predictions, y_test, k + 1)
    exp_training.update_history(history.history, used)
    
    # Save model
    print('| Saving model ...')
    md.save(f'{config.log_dir}/models/k{k + 1}.h5')
    
    
    # Replace current best model if ACC is better (or ACC is similar but MCC is better)
    same_acc = abs(evaluations['ACC'] - current_best_acc) < 0.05
    better_mcc = evaluations['MCC'] > current_best_mcc
    better_acc = evaluations['ACC'] > current_best_acc

    if better_acc or (same_acc and better_mcc):
      print('| Replacing best model ...')
      md.save(f'{config.log_dir}/models/best.h5')
      current_best_mcc = evaluations['MCC']
      current_best_acc = evaluations['ACC']
      best_fold = k + 1 
    
    # Clear model/session
    del md
    del model_handler
    K.clear_session()
    print('=======================')
  
  # Rename best model for best fold and save evaluations
  if os.path.exists(f'{config.log_dir}/models/best.h5'):
    os.rename(f'{config.log_dir}/models/best.h5', f'{config.log_dir}/models/best_({best_fold}).h5')
  
  if best_fold or current_best_mcc != -2:
    exp_evaluation.save_history()
    exp_evaluation.save_statistics()
    exp_training.save_history()
    exp_training.save_statistics()
    print(f'| Saved all in: {config.log_dir}')
  else:
    print('| Did not save anything')
Пример #18
0
            if a != pred:
                incorrect += 1
                activity_error[a] += 1;

    for a in test_activites:
        print "Activity " + str(a) + ": " + str(float(activity_error[a])/activity_count[a]);

    utils.show_confusion_matrix(y_test, y_pred)
    
    return incorrect/total

if __name__ == "__main__":
    X, y = data.load_time()


    activities = utils.format_data(X, y)
    
    train, test = utils.split_training(activities, .2)

    hmms = train_all_hmm(train)

    error = test_error(test, hmms)

    print error

    #for a in activities:
    #    print "Activity " + str(a);
    #    for array in activities[a]:
    #        print np.shape(array);
    # # X = [np.array(x) for x in activities[2]]
    # print len(X), X[:2]
Пример #19
0
import csv
import numpy as np
import utils
import plot

DATASETS = ["LGA", "SFO", "MDW", "ORD"]

for DATASET in DATASETS:
    with open("data/" + DATASET + ".csv", 'rb') as rawcsv:

        # np.set_printoptions(threshold=np.inf, suppress=True)
        data_orig = csv.reader(rawcsv, delimiter=',')
        data, dates, wind = utils.format_data(data_orig)

        date_vector = dates[:, 0]

        date_vector *= 12.0
        date_vector += 1

        print(data.shape)
        print(date_vector.shape)

        plot.plot2d((date_vector, data[:, 1]),
                    DATASET + "-temp-time",
                    c="black",
                    a=0.2)
        plot.plot2d((date_vector, data[:, 7]),
                    DATASET + "-humidity-time",
                    c="black",
                    a=0.2)
        plot.plot2d((date_vector, wind),
def trainNestedCV(direct, subject, session, filename, hyp_params, parameters):

    subj = load_subject(direct, subject, 1, filename)["subject"]
    #
    # data = subj.data3D.astype(np.float32) # convert data to 3d for deep learning
    # labels = subj.labels.astype(np.int64)
    # labels[:] = [x - 1 for x in labels]
    data, labels = format_data('words', subject, 4096)

    import random  #just for testing
    labels = []  #just for testing
    for i in range(200):  #just for testing
        labels.append(random.randint(0, 3))  #just for testing

    labels = np.array(labels).astype(np.int64)
    data = data[:200, :, 0:750]

    unique = np.unique(labels, return_counts=False)
    data_params = dict(n_classes=len(unique),
                       n_chans=6,
                       input_time_length=subj.epoch)  #n_chans = subj.n_chans

    #w = windows(data, subj, 500, 250, 500)  # fs = subj.sfreq # list of windows

    num_folds = 2
    skf = StratifiedKFold(
        n_splits=num_folds, shuffle=False,
        random_state=10)  # don't randomize trials to preserce structure

    trainsetlist, testsetlist = [], []
    inner_fold_acc, inner_fold_loss, inner_fold_CE = [], [], []

    subj_results = Results(
        subject, filename,
        num_folds)  #, class_names=["apple", "orange", "car", "bus"]
    subj_results.change_directory(direct)

    subj_results.get_acc_loss_df(
        hyp_params, 'Fold')  # empty dataframe headed with each HP set

    clf = Classification(hyp_params, parameters, data_params, "01", "shallow",
                         "words")  # classifier object

    print(f"Inner-fold training for Subject {subject} in progress...")
    for inner_ind, outer_index in skf.split(data, labels):
        inner_fold, outer_fold = data[inner_ind], data[outer_index]
        inner_labels, outer_labels = labels[inner_ind], labels[outer_index]
        subj_results.concat_y_true(outer_labels)

        trainsetlist.append(SignalAndTarget(
            inner_fold, inner_labels))  # used for outer-fold train/test
        testsetlist.append(SignalAndTarget(outer_fold, outer_labels))

        for train_idx, valid_idx in skf.split(inner_fold, inner_labels):
            X_Train, X_val = inner_fold[train_idx], inner_fold[valid_idx]
            y_train, y_val = inner_labels[train_idx], inner_labels[valid_idx]
            train_set = SignalAndTarget(X_Train, y_train)
            val_set = SignalAndTarget(X_val, y_val)

            hyp_param_acc, hyp_param_loss = [], []
            hyp_param_acc, hyp_param_loss, hyp_param_CE = clf.train_inner(
                train_set, val_set, None, False)

            inner_fold_loss.append(hyp_param_loss)
            inner_fold_acc.append(hyp_param_acc)
            inner_fold_CE.append(hyp_param_CE)

    subj_results.fill_acc_loss_df(inner_fold_acc, inner_fold_loss,
                                  inner_fold_CE)

    subj_results.get_hp_means(
        hyp_params, "accuracy")  #needed to select inter-subject parameters

    subj_results.get_best_params("accuracy")
    clf.best_params = subj_results.best_params
    clf.set_best_params()
    print(f"Best parameters selected: {clf.best_params}")
    print(
        "///////-------------------------------------------------------///////"
    )
    print(
        f"Outer-fold training and testing for Subject {subject} in progress..."
    )
    scores, fold_models, predictions, probabilities, outer_cross_entropy = clf.train_outer(
        trainsetlist, testsetlist, False
    )  #accuracy score for each fold, combined predictions for each fold

    subj_results.outer_fold_accuracies = scores
    subj_results.y_pred = np.array(predictions)
    subj_results.y_probs = np.array(probabilities)
    subj_results.outer_fold_cross_entropies = outer_cross_entropy

    subj_results.train_loss, subj_results.valid_loss, subj_results.test_loss, subj_results.train_acc, subj_results.valid_acc, subj_results.test_acc = get_model_loss_and_acc(
        fold_models)

    subj_results.save_result()

    subj_results.subject_stats()
    print("")
    print(subj_results.subject_stats_df.head())
Пример #21
0
model_path = config["Model"]["Path"]
print("All configuration is imported")

cnxn = pyodbc.connect('DRIVER=' + driver + ';SERVER=' + server +
                      ';PORT=1433;DATABASE=' + database + ';UID=' + username +
                      ';PWD=' + password)

print("Connection to SQL Server is established")

# Load SQL Table into Dataframe
# image_df = utils.load_image_table(cnxn)
df = utils.load_image_table(cnxn)
print("Photo Data is loaded")

# Change Column Name and Data Type
df = utils.format_data(df)

print("Photo Data is formatted")

# Extract insights from existing columns, e.g. weekday from unix epoch
df = utils.extract_data(df)

print("More insight is gained from Photo Data")

# Predict Score

# Apply delay function on Image Post Time and Image Original User Last Post Time

# best_photos_sorted
features = [
    "caption_length", "english_content_length", "english_content_ratio",
Пример #22
0
NUM_TRAIN = 20000

np.set_printoptions(formatter={'float': '{:05.2f}'.format})

# This file does the testing without clearly time-dependent variables,
# such as wind direction, temperature, dewpoint, etc.
# 
# It would thus not make sense to /try/ to predict temperature, etc. 
# from our HMM classes (since we are ignoring them)

warnings.filterwarnings("ignore")
for DATASET in DATASETS:
	with open("data/" + DATASET + ".csv", 'rb') as rawcsv:

		our_csv = csv.reader(rawcsv, delimiter=',')
		data = utils.format_data(our_csv)[0]
		orig_data = data

		print("\n#####  " + DATASET + "  #####")
		print(data.shape)
		print

		# overall standard deviations of data
		std = np.std(data, axis=0)

		# HMM class estimates
		train = data[0:NUM_TRAIN].astype(int)
		test  = data[NUM_TRAIN:].astype(int)

		# naive weather prediction: tomorrow has the same weather as today
		deltas = np.zeros((len(test)-1, 21))
def objective(trial):

    # Open data file
    f_in = h5py.File(DT_FL_IN, "r")
    dt_in = f_in[DT_DST_IN]

    f_out = h5py.File(DT_FL_OUT, "r")
    dt_out = f_out[DT_DST_OUT]

    WD = 2
    # Dummy y_data
    x_data, _ = format_data(dt_in, wd=WD, get_y=True)
    _, y_data = format_data(dt_out, wd=WD, get_y=True)
    x_data = np.squeeze(x_data)

    # Split data and get slices
    idxs = split(x_data.shape[0],
                 N_TRAIN,
                 N_VALID,
                 test_last=dt_in.attrs["idx"])
    slc_trn, slc_vld, slc_tst = slicer(x_data.shape, idxs)

    # Get data
    x_train = x_data[slc_trn[0]]
    y_train = y_data[slc_trn[0]]
    x_val = x_data[slc_vld[0]]
    y_val = y_data[slc_vld[0]]

    conv_shape = y_train.shape[1:3]
    # Strides cfg
    strd = [2, 2, 5, 5]

    # Limits and options
    epochs = 60
    # Filters
    flt_lm = [[4, 128], [4, 128], [4, 128]]
    d_lm = [1, 50]
    # Kernel
    k_lm = [3, 5]
    # Regularizer
    l2_lm = [1e-7, 1e-3]
    # Activation functions
    act_opts = ["relu", "elu", "tanh", "linear"]
    # Latent space cfg
    lt_sz = [5, 150]
    lt_dv = [0.3, 0.7]
    # Learning rate
    lm_lr = [1e-5, 1e-1]

    # Clear tensorflow session
    tf.keras.backend.clear_session()
    # Input
    inputs = layers.Input(shape=x_train.shape[1:])
    d = inputs
    # Decoder
    n_layers = trial.suggest_int("n_layers", 1, 3)
    flt = trial.suggest_int("nl_flt", d_lm[0], d_lm[1])
    # Reduction from output
    red = np.prod(strd[:n_layers])
    # Decoder first shape
    lt_shp = (np.array(conv_shape) / red).astype(int)
    # Decoder dense size
    n_flat = np.prod(lt_shp) * flt
    # Format stride list
    strd = strd[::-1][-n_layers:]
    # Latent -> Decoder layer
    # Activation
    act_lt = trial.suggest_categorical("lt_activation", act_opts)
    # Regularization
    l2_lt = int(trial.suggest_loguniform("lt_l2", l2_lm[0], l2_lm[1]))
    l2_reg = regularizers.l2(l=l2_lt)
    # Flat input to the decoder
    d = layers.Dense(n_flat,
                     activation=act_lt,
                     kernel_regularizer=l2_reg,
                     name="l1_dense_decoder")(inputs)
    # Reshape to the output of the encoder
    d = layers.Reshape(list(lt_shp) + [flt])(d)
    # Generate the convolutional layers
    for i in range(n_layers):
        # Get number of filters
        flt = trial.suggest_int("n{}_flt".format(i), flt_lm[i][0],
                                flt_lm[i][1])
        # Get the kernel size
        k_sz = trial.suggest_categorical("d{}_kernel_size".format(i), k_lm)
        # Get the activation function
        act = trial.suggest_categorical("d{}_activation".format(i), act_opts)
        # Regularization value
        l2 = trial.suggest_loguniform("d{}_l2".format(i), l2_lm[0], l2_lm[1])
        l2_reg = regularizers.l2(l=l2)
        # Convolutional layer
        d = layers.Conv2DTranspose(
            flt,
            (k_sz, k_sz),
            strides=strd[i],
            activation=act,
            padding="same",
            kernel_regularizer=l2_reg,
            name="{}_decoder".format(i + 1),
        )(d)
        dp = 0
        # Dropout layers
        if dp > 0:
            d = layers.Dropout(dp, name="{}_dropout_decoder".format(i + 1))(d)

    decoded = layers.Conv2DTranspose(
        y_train.shape[3],
        (5, 5),
        activation="linear",
        padding="same",
        name="output_decoder",
    )(d)

    ae = Model(inputs, decoded, name="Decoder_nxt")

    # Earling stopping monitoring the loss of the validation dataset
    monitor = "val_loss_norm_error"
    patience = int(epochs * 0.3)
    es = EarlyStopping(monitor=monitor,
                       mode="min",
                       patience=patience,
                       restore_best_weights=True)

    opt = "adam"
    if opt == "adam":
        k_optf = optimizers.Adam
    elif opt == "nadam":
        k_optf = optimizers.Nadam
    elif opt == "adamax":
        k_optf = optimizers.Adamax

    lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1])
    if lr > 0:
        k_opt = k_optf(learning_rate=lr)
    else:
        k_opt = k_optf()

    ae.compile(optimizer=k_opt,
               loss=loss_norm_error,
               metrics=["mse", loss_norm_error])

    batch_size = int(trial.suggest_uniform("batch_sz", 2, 32))
    ae.summary()
    hist = ae.fit(
        x_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        shuffle=True,
        validation_data=(x_val, y_val),
        callbacks=[KerasPruningCallback(trial, "val_loss_norm_error"), es],
        verbose=1,
    )

    txt = PREFIX + SUFFIX
    ae.save(txt.format(RUN_VERSION, trial.number))
    return min(hist.history["val_loss_norm_error"])
    prediction = result.mean(axis=0)
    uncertainty = result.std(axis=0)
    return prediction, uncertainty


# Load Data
print("Loading and preprocessing Data...\n")
clean_data = load_preprocess_data()

# Preprocess Data
X = clean_data.drop('state', axis=1)
y = pd.DataFrame(clean_data['state'].values)
y.columns = ['state']
X = X.iloc[:500000]
y = y.iloc[:500000]
X = format_data(X)
del clean_data

# Extract categories
cats = extract_categories(y['state'].values)
cats.sort()
print(type(cats))
NUM_CATS = len(cats)
print("categories: ", cats)
print("number of categories: ", NUM_CATS)

# Scale Data
min_max_scaler = MinMaxScaler()
nsamples, nx, ny = X.shape
d2_X = X.reshape((nsamples,nx*ny))
d2_X_scaled = min_max_scaler.fit_transform(d2_X)
Пример #25
0
                       dtype=bool)
yvl_missing = np.array(validate_df.loc[:,
                                       'COVAR_y1_MISSING':'COVAR_y3_MISSING'],
                       dtype=bool)

# read data
train_df['train_flag'] = True
validate_df['train_flag'] = False
data = pd.concat((train_df, validate_df))

# remove temporary data
del train_df
del validate_df

# basic formatting
Xtr, ytr, Xvl, yvl = utils.format_data(data, preprocessing=False)
del data

#
# do preprocessing
#
scaler = decomposition.RandomizedPCA()
#scaler = decomposition.SparsePCA(n_components=max_pca_components)
#scaler = decomposition.PCA(n_components='mle')
print 'PCA max features to keep: %d' % (max_pca_components)
Xtr = scaler.fit_transform(
    Xtr
)  # fit only for train data (http://cs231n.github.io/neural-networks-2/#datapre)
Xvl = scaler.transform(Xvl)

#
Пример #26
0
from sklearn.svm import LinearSVC
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.externals.joblib import Memory

from utils import format_data

# Using joblib allows to cache some of the results, in order to gain time on
# computation

mem = Memory(cachedir='.')


################################################################################
# Load the data
X, y = format_data()

################################################################################
# Split data into training set and testing set
print "Splitting the data"
X_train, X_test = X[0:3000], X[3000:6000]
y_train, y_test = y[0:3000], y[3000:6000]

################################################################################
# Train the SVM classification model
print "Training the classification model"
cs = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 1]
csx = range(len(cs))
precisions = []
recalls = []
for c in cs:
Пример #27
0
import plot
import utils

DATASETS = ["LGA", "SFO", "MDW", "ORD"]
# All final images are ORD
HMM_CLASSES = [2, 4, 6, 8, 10]
NUM_TRAIN = 20000

np.set_printoptions(formatter={'float': '{:05.2f}'.format})

warnings.filterwarnings("ignore")
for DATASET in DATASETS:
    with open("data/" + DATASET + ".csv", 'rb') as rawcsv:

        our_csv = csv.reader(rawcsv, delimiter=',')
        data, dates, wind = utils.format_data(our_csv)
        orig_data = np.array(data)

        print("\n#####  " + DATASET + "  #####")
        print(data.shape)
        print

        # overall standard deviations of data
        std = np.std(data, axis=0)

        # HMM class estimates
        train = data[0:NUM_TRAIN].astype(int)
        test = data[NUM_TRAIN:].astype(int)

        for ii in HMM_CLASSES:
            # Run Gaussian HMM
Пример #28
0
def classifier_rbf():
    X, y = format_data()

    clf = SVC(C=10, gamma=0.002)
    clf = mem.cache(clf.fit)(X, y)
    return clf
Пример #29
0
def network_model(subject_id, model_type, data_type, cropped, cuda, parameters, hyp_params):
	best_params = dict() # dictionary to store hyper-parameter values

	#####Parameter passed to funciton#####
	max_epochs  = parameters['max_epochs']
	max_increase_epochs = parameters['max_increase_epochs']
	batch_size = parameters['batch_size']

	#####Constant Parameters#####
	best_loss = 100.0 # instatiate starting point for loss
	iterator = BalancedBatchSizeIterator(batch_size=batch_size)
	stop_criterion = Or([MaxEpochs(max_epochs),
						 NoDecrease('valid_misclass', max_increase_epochs)])
	monitors = [LossMonitor(), MisclassMonitor(), RuntimeMonitor()]
	model_constraint = MaxNormDefaultConstraint()
	epoch = 4096

	#####Collect and format data#####
	if data_type == 'words':
		data, labels = format_data(data_type, subject_id, epoch)
		data = data[:,:,768:1280] # within-trial window selected for classification
	elif data_type == 'vowels':
		data, labels = format_data(data_type, subject_id, epoch)
		data = data[:,:,512:1024]
	elif data_type == 'all_classes':
		data, labels = format_data(data_type, subject_id, epoch)
		data = data[:,:,768:1280]
	
	x = lambda a: a * 1e6 # improves numerical stability
	data = x(data)
	
	data = normalize(data)
	data, labels = balanced_subsample(data, labels) # downsampling the data to ensure equal classes
	data, _, labels, _ = train_test_split(data, labels, test_size=0, random_state=42) # redundant shuffle of data/labels

	#####model inputs#####
	unique, counts = np.unique(labels, return_counts=True)
	n_classes = len(unique)
	n_chans   = int(data.shape[1])
	input_time_length = data.shape[2]

	#####k-fold nested corss-validation#####
	num_folds = 4
	skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=10)
	out_fold_num = 0 # outer-fold number
	
	cv_scores = []
	#####Outer=Fold#####
	for inner_ind, outer_index in skf.split(data, labels):
		inner_fold, outer_fold     = data[inner_ind], data[outer_index]
		inner_labels, outer_labels = labels[inner_ind], labels[outer_index]
		out_fold_num += 1
		 # list for storing cross-validated scores
		loss_with_params = dict()# for storing param values and losses
		in_fold_num = 0 # inner-fold number
		
		#####Inner-Fold#####
		for train_idx, valid_idx in skf.split(inner_fold, inner_labels):
			X_Train, X_val = inner_fold[train_idx], inner_fold[valid_idx]
			y_train, y_val = inner_labels[train_idx], inner_labels[valid_idx]
			in_fold_num += 1
			train_set = SignalAndTarget(X_Train, y_train)
			valid_set = SignalAndTarget(X_val, y_val)
			loss_with_params[f"Fold_{in_fold_num}"] = dict()
			
			####Nested cross-validation#####
			for drop_prob in hyp_params['drop_prob']:
				for loss_function in hyp_params['loss']:
					for i in range(len(hyp_params['lr_adam'])):
						model = None # ensure no duplication of models
						# model, learning-rate and optimizer setup according to model_type
						if model_type == 'shallow':
							model =  ShallowFBCSPNet(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length,
										 n_filters_time=80, filter_time_length=40, n_filters_spat=80, 
										 pool_time_length=75, pool_time_stride=25, final_conv_length='auto',
										 conv_nonlin=square, pool_mode='max', pool_nonlin=safe_log, 
										 split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,
										 drop_prob=drop_prob).create_network()
							lr = hyp_params['lr_ada'][i]
							optimizer = optim.Adadelta(model.parameters(), lr=lr, rho=0.9, weight_decay=0.1, eps=1e-8)
						elif model_type == 'deep':
							model = Deep4Net(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length,
										 final_conv_length='auto', n_filters_time=20, n_filters_spat=20, filter_time_length=10,
										 pool_time_length=3, pool_time_stride=3, n_filters_2=50, filter_length_2=15,
										 n_filters_3=100, filter_length_3=15, n_filters_4=400, filter_length_4=10,
										 first_nonlin=leaky_relu, first_pool_mode='max', first_pool_nonlin=safe_log, later_nonlin=leaky_relu,
										 later_pool_mode='max', later_pool_nonlin=safe_log, drop_prob=drop_prob, 
										 double_time_convs=False, split_first_layer=False, batch_norm=True, batch_norm_alpha=0.1,
										 stride_before_pool=False).create_network() #filter_length_4 changed from 15 to 10
							lr = hyp_params['lr_ada'][i]
							optimizer = optim.Adadelta(model.parameters(), lr=lr, weight_decay=0.1, eps=1e-8)
						elif model_type == 'eegnet':
							model = EEGNetv4(in_chans=n_chans, n_classes=n_classes, final_conv_length='auto', 
										 input_time_length=input_time_length, pool_mode='mean', F1=16, D=2, F2=32,
										 kernel_length=64, third_kernel_size=(8,4), drop_prob=drop_prob).create_network()
							lr = hyp_params['lr_adam'][i]
							optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=0, eps=1e-8, amsgrad=False)
						
						set_random_seeds(seed=20190629, cuda=cuda)
						
						if cuda:
							model.cuda()
							torch.backends.cudnn.deterministic = True
						model = torch.nn.DataParallel(model)
						log.info("%s model: ".format(str(model)))

						loss_function = loss_function
						model_loss_function = None

						#####Setup to run the selected model#####
						model_test = Experiment(model, train_set, valid_set, test_set=None, iterator=iterator,
												loss_function=loss_function, optimizer=optimizer,
												model_constraint=model_constraint, monitors=monitors,
												stop_criterion=stop_criterion, remember_best_column='valid_misclass',
												run_after_early_stop=True, model_loss_function=model_loss_function, cuda=cuda,
												data_type=data_type, subject_id=subject_id, model_type=model_type, 
												cropped=cropped, model_number=str(out_fold_num)) 

						model_test.run()
						model_loss = model_test.epochs_df['valid_loss'].astype('float')
						current_val_loss = current_loss(model_loss)
						loss_with_params[f"Fold_{in_fold_num}"][f"{drop_prob}/{loss_function}/{lr}"] = current_val_loss

		####Select and train optimized model#####
		df = pd.DataFrame(loss_with_params)
		df['mean'] = df.mean(axis=1) # compute mean loss across k-folds
		writer_df = f"results_folder\\results\\S{subject_id}\\{model_type}_parameters.xlsx"
		df.to_excel(writer_df)
		
		best_dp, best_loss, best_lr = df.loc[df['mean'].idxmin()].__dict__['_name'].split("/") # extract best param values
		if str(best_loss[10:13]) == 'nll':
			best_loss = F.nll_loss
		elif str(best_loss[10:13]) == 'cro':
			best_loss = F.cross_entropy
		
		print(f"Best parameters: dropout: {best_dp}, loss: {str(best_loss)[10:13]}, lr: {best_lr}")

		#####Train model on entire inner fold set#####
		torch.backends.cudnn.deterministic = True
		model = None
		#####Create outer-fold validation and test sets#####
		X_valid, X_test, y_valid, y_test = train_test_split(outer_fold, outer_labels, test_size=0.5, random_state=42, stratify=outer_labels)
		train_set = SignalAndTarget(inner_fold, inner_labels)
		valid_set = SignalAndTarget(X_valid, y_valid)
		test_set  = SignalAndTarget(X_test, y_test)


		if model_type == 'shallow':
			model =  ShallowFBCSPNet(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length,
						 n_filters_time=60, filter_time_length=5, n_filters_spat=40, 
						 pool_time_length=50, pool_time_stride=15, final_conv_length='auto',
						 conv_nonlin=relu6, pool_mode='mean', pool_nonlin=safe_log, 
						 split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,
						 drop_prob=0.1).create_network() #50 works better than 75
			
			optimizer = optim.Adadelta(model.parameters(), lr=2.0, rho=0.9, weight_decay=0.1, eps=1e-8) 
			
		elif model_type == 'deep':
			model = Deep4Net(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length,
						 final_conv_length='auto', n_filters_time=20, n_filters_spat=20, filter_time_length=5,
						 pool_time_length=3, pool_time_stride=3, n_filters_2=20, filter_length_2=5,
						 n_filters_3=40, filter_length_3=5, n_filters_4=1500, filter_length_4=10,
						 first_nonlin=leaky_relu, first_pool_mode='mean', first_pool_nonlin=safe_log, later_nonlin=leaky_relu,
						 later_pool_mode='mean', later_pool_nonlin=safe_log, drop_prob=0.1, 
						 double_time_convs=False, split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,
						 stride_before_pool=False).create_network()
			
			optimizer = AdamW(model.parameters(), lr=0.1, weight_decay=0)
		elif model_type == 'eegnet':
			model = EEGNetv4(in_chans=n_chans, n_classes=n_classes, final_conv_length='auto', 
						 input_time_length=input_time_length, pool_mode='mean', F1=16, D=2, F2=32,
						 kernel_length=64, third_kernel_size=(8,4), drop_prob=0.1).create_network()
			optimizer = optim.Adam(model.parameters(), lr=0.1, weight_decay=0, eps=1e-8, amsgrad=False) 
			

		if cuda:
			model.cuda()
			torch.backends.cudnn.deterministic = True
			#model = torch.nn.DataParallel(model)
		
		log.info("Optimized model")
		model_loss_function=None
		
		#####Setup to run the optimized model#####
		optimized_model = op_exp(model, train_set, valid_set, test_set=test_set, iterator=iterator,
								loss_function=best_loss, optimizer=optimizer,
								model_constraint=model_constraint, monitors=monitors,
								stop_criterion=stop_criterion, remember_best_column='valid_misclass',
								run_after_early_stop=True, model_loss_function=model_loss_function, cuda=cuda,
								data_type=data_type, subject_id=subject_id, model_type=model_type, 
								cropped=cropped, model_number=str(out_fold_num))
		optimized_model.run()

		log.info("Last 5 epochs")
		log.info("\n" + str(optimized_model.epochs_df.iloc[-5:]))
		
		writer = f"results_folder\\results\\S{subject_id}\\{data_type}_{model_type}_{str(out_fold_num)}.xlsx"
		optimized_model.epochs_df.iloc[-30:].to_excel(writer)

		accuracy = 1 - np.min(np.array(optimized_model.class_acc))
		cv_scores.append(accuracy) # k accuracy scores for this param set. 
		
	#####Print and store fold accuracies and mean accuracy#####
	
	print(f"Class Accuracy: {np.mean(np.array(cv_scores))}")
	results_df = pd.DataFrame(dict(cv_scores=cv_scores,
								   cv_mean=np.mean(np.array(cv_scores))))

	writer2 = f"results_folder\\results\\S{subject_id}\\{data_type}_{model_type}_cvscores.xlsx"
	results_df.to_excel(writer2)
	return optimized_model, np.mean(np.array(cv_scores))
Пример #30
0
import csv
import numpy as np
from sklearn import manifold, decomposition, cluster

import plot
import utils

# Only looks at MDW cuz this is slow,
# and MDW offers enough interesting things to see (or lack thereof)
with open('data/MDW.csv', 'rb') as MDWcsv:
    # np.set_printoptions(threshold=np.inf, suppress=True)
    csv = csv.reader(MDWcsv, delimiter=',')
    data = utils.format_data(csv)[0]

    print("Using PCA, n=2")
    pca = decomposition.PCA(n_components=2)
    output = pca.fit_transform(data)
    plot.plot2d(tuple(output[::7].T), "humidity-pca", c=data[::7, 7], a=0.8)
    plot.plot2d(tuple(output[::7].T), "temperature-pca", c=data[::7, 1], a=0.8)

    print("Using PCA, n=3")
    pca = decomposition.PCA(n_components=3)
    output = pca.fit_transform(data)
    plot.plot3d_anim(tuple(output[::7].T),
                     "humidity-pca3d",
                     c=data[::7, 7],
                     a=0.8)
    plot.plot3d_anim(tuple(output[::7].T),
                     "temperature-pca3d",
                     c=data[::7, 1],
                     a=0.8)
Пример #31
0
'''Traing'''
import utils
import svmutil
from grid import find_parameters

TRAIN_DATA = 'data/training_data_libsvm'
TEST_DATA = 'data/testing_data_libsvm'
BARE_DATA = 'data/training.data'
MODEL_PATH = 'model/speech.model'

training_data, testing_data = utils.load_data(BARE_DATA, 20000)

utils.format_data(training_data, TRAIN_DATA)
utils.format_data(testing_data, TEST_DATA)
def objective(trial):

    # Open data file
    f = h5py.File(DT_FL, "r")
    dt = f[DT_DST]

    # Format data for LSTM training
    x_data, y_data = format_data(dt, wd=WD, get_y=True)

    x_data = np.squeeze(x_data)
    # Split data and get slices
    idxs = split(x_data.shape[0], N_TRAIN, N_VALID)
    slc_trn, slc_vld, slc_tst = slicer(x_data.shape, idxs)

    # Get data
    x_train = x_data[slc_trn[0]]
    y_train = y_data[slc_trn[0]] - x_train
    x_val = x_data[slc_vld[0]]
    y_val = y_data[slc_vld[0]] - x_val

    # Limits and options
    # Filters
    # n_lstm = [[4, 128], [4, 128], [4, 128]]
    n_lstm = [[4, 196], [4, 196], [4, 196]]
    # Regularizer
    l2_lm = [1e-7, 1e-3]
    # Activation functions
    act_opts = ["relu", "elu", "tanh", "linear"]
    # Latent space cfg
    lt_sz = [5, 150]
    lt_dv = [0.3, 0.7]
    # Learning rate
    lm_lr = [1e-5, 1]

    # Clear tensorflow session
    tf.keras.backend.clear_session()
    # Input
    inputs = layers.Input(shape=x_train.shape[1:])
    p = inputs
    # Dense layers
    # n_lyr_dense = trial.suggest_int("n_lyr_dense", 0, 2)
    n_lyr_dense = trial.suggest_int("n_lyr_dense", 1, 3)
    for i in range(n_lyr_dense):
        # For the current layer
        # Get number of filters
        l = trial.suggest_int("n{}_dense".format(i), n_lstm[i][0],
                              n_lstm[i][1])
        # Get the activation function
        act = trial.suggest_categorical("d{}_activation".format(i), act_opts)
        # Regularization value
        l2 = trial.suggest_loguniform("d{}_l2".format(i), l2_lm[0], l2_lm[1])
        l2_reg = regularizers.l2(l=l2)
        # Set layer
        p = layers.Dense(
            l,
            activation=act,
            # kernel_regularizer=l2_reg,
            name="{}_dense".format(i + 1),
        )(p)
        # Dropout
        dp = trial.suggest_uniform("d{}_dropout".format(i), 0, 1)
        p = layers.Dropout(dp, name="{}_dropout_dense".format(i + 1))(p)
        bn = trial.suggest_categorical("d{}_batchnorm".format(i), [0, 1])
        if bn == 1:
            p = layers.BatchNormalization(name="{}_bnorm_dense".format(i +
                                                                       1))(p)

    out = layers.Dense(y_data.shape[1], activation="linear")(p)

    pred = Model(inputs, out, name="auto_encoder_add")

    # opt_opts = ["adam", "nadam", "adamax", "RMSprop"]
    # opt = trial.suggest_categorical("optimizer", opt_opts)
    opt = "adam"
    if opt == "adam":
        k_optf = optimizers.Adam
    elif opt == "nadam":
        k_optf = optimizers.Nadam
    elif opt == "adamax":
        k_optf = optimizers.Adamax
    elif opt == "RMSprop":
        k_optf = optimizers.RMSprop

    lr = trial.suggest_loguniform("lr", lm_lr[0], lm_lr[1])
    if lr > 0:
        k_opt = k_optf(learning_rate=lr)
    else:
        k_opt = k_optf()

    pred.compile(optimizer=k_opt, loss="mse", metrics=["mse", loss_norm_error])

    batch_size = int(trial.suggest_uniform("batch_sz", 2, 32))
    pred.summary()
    hist = pred.fit(
        x_train,
        y_train,
        epochs=100,
        batch_size=batch_size,
        shuffle=True,
        validation_data=(x_val, y_val),
        callbacks=[KerasPruningCallback(trial, "val_mse")],
        verbose=1,
    )

    txt = PREFIX + SUFFIX
    pred.save(txt.format(RUN_VERSION, trial.number))
    return hist.history["val_mse"][-1]
Пример #33
0

if __name__ == '__main__':

    logger = logging.getLogger('trainlogger')
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        fmt='%(levelname)s\t%(asctime)s\t%(message)s',
        datefmt='%Y-%m-%dT%H:%M:%S')
    handler = logging.FileHandler('./logs/train.log', 'a')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    if sys.argv[1] == 'init':
        logger.info('init data')
        utils.format_data(config.YULIAO, config.CUT_WORDS)
        utils.make_train_test(config.CUT_WORDS)

        utils.gen_vocabulary_file(config.TRAIN_ENC_FILE,
                                  config.TRAIN_ENC_VOCABULARY, None)
        utils.gen_vocabulary_file(config.TRAIN_DEC_FILE,
                                  config.TRAIN_DEC_VOCABULARY, None)

        utils.convert_to_vec(config.TRAIN_ENC_FILE,
                             config.TRAIN_ENC_VOCABULARY, config.TRAIN_ENC_VEC)
        utils.convert_to_vec(config.TRAIN_DEC_FILE,
                             config.TRAIN_DEC_VOCABULARY, config.TRAIN_DEC_VEC)

        utils.convert_to_vec(config.TEST_ENC_FILE, config.TRAIN_ENC_VOCABULARY,
                             config.TEST_ENC_VEC)
        utils.convert_to_vec(config.TEST_DEC_FILE, config.TRAIN_DEC_VOCABULARY,
Пример #34
0
dt_dst = "scaled_data"

# The percentage for the test is implicit
n_train = 0.8
n_valid = 0.1

# Select the variable to train
# 0: Temperature - 1: Pressure - 2: Velocity - None: all
var = 2

# %%
# Open data file
f = h5py.File(dt_fl, "r")
dt = f[dt_dst]

x_data, y_data = format_data(dt, wd=3, var=2, get_y=True, cont=True)

# Split data file
idxs = split(x_data.shape[0], n_train, n_valid)
slc_trn, slc_vld, slc_tst = slicer(x_data.shape, idxs)
# Slice data
x_train = x_data[slc_trn]
x_val = x_data[slc_vld]

slc_trn, slc_vld, slc_tst = slicer(y_data.shape, idxs)
y_train = y_data[slc_trn]
y_val = y_data[slc_vld]

# %%
# LSTM neural network settings
Пример #35
0
    def __init__(self,
                 optfun,
                 X,
                 Y,
                 noise,
                 kernel,
                 bounds=None,
                 burnin=500,
                 resample=50,
                 n_init=1,
                 tol=1e-6,
                 sobol_seed=1991,
                 sampler=MDSliceSampler,
                 sampler_args={}):
        assert (isinstance(kernel, BaseKernel))

        self._dim = X.shape[1] if X.shape.__len__(
        ) > 1 else 1  # get dimension of input space
        self.optfun = optfun
        self.X = np.copy(format_data(X, dim=self._dim))
        self.Y = np.copy(format_data(Y, dim=1))
        self.noise = noise
        self.kernel = kernel
        self.burnin = burnin
        self.resample = resample
        self.tol = tol
        self._sobol_seed = sobol_seed

        if bounds == None:
            bounds = []
            for i in range(self._dim):
                bounds.append((0., 1.))
        assert (len(bounds) == self._dim)
        self.bounds = bounds

        if self.X.shape[0] == 0:
            X_new, Y_new = self._random_search(n_init)
            self.X, self.Y = np.concatenate(
                (self.X, X_new), axis=0), np.concatenate((self.Y, Y_new),
                                                         axis=0)

        self._nu = npr.randn(self.X.shape[0], 1)

        self._has_noise_prior = False
        if isinstance(noise, Parameter):
            self._has_noise_prior = True

        # get initial values of kernel hyperparameters
        kernel_parameters = kernel.get_valid_parameters()
        x0 = np.zeros(
            len(kernel_parameters) + (1 if self._has_noise_prior else 0))
        for i, par in enumerate(kernel_parameters):
            x0[i] = par.value
        if self._has_noise_prior:
            x0[-1] = self.noise.value

        self._recompute = True

        # get bounds on kernel parameters, see if you should marginalize
        bounds = []
        for par in kernel_parameters:
            bounds.append(par.prior.support)
        if self._has_noise_prior:
            bounds.append(self.noise.prior.support)

        if bounds.__len__() > 0:
            self.sampler = sampler(self._parameter_posterior,
                                   x0,
                                   bounds=bounds,
                                   log=True,
                                   burnin=burnin,
                                   **sampler_args)
            self.marginalize = True
        else:
            self.sampler = None
            self.marginalize = False
Пример #36
0
def classifier():
    X, y = format_data()

    clf = LinearSVC(C=0.005)
    clf = mem.cache(clf.fit)(X, y)
    return clf