示例#1
0
def calibrate_sensor(sensor, measurements, verbose):
    parameters = PARAMETERS[sensor]
    if verbose:
        print "found %d records" % len(measurements)

    flt_meas, flt_idx = utils.filter_meas(measurements,
                                          parameters.noise_window,
                                          parameters.noise_threshold)
    if verbose:
        print "remaining %d after low pass" % len(flt_meas)
    p0 = utils.get_min_max_guess(flt_meas, parameters.sensor_ref)
    cp0, np0 = utils.scale_measurements(flt_meas, p0)
    print "initial guess : avg %f std %f" % (np0.mean(), np0.std())

    def err_func(p, meas, y):
        cp, np = utils.scale_measurements(meas, p)
        err = y * scipy.ones(len(meas)) - np
        return err

    p1, success = scipy.optimize.leastsq(err_func,
                                         p0[:],
                                         args=(flt_meas,
                                               parameters.sensor_ref))
    cp1, np1 = utils.scale_measurements(flt_meas, p1)

    print "optimized guess : avg %f std %f" % (np1.mean(), np1.std())

    utils.print_xml(p1, sensor, parameters.sensor_res)
    print ""

    utils.plot_results(measurements, flt_idx, flt_meas, cp0, np0, cp1, np1,
                       parameters.sensor_ref)
示例#2
0
def new_experiment(dataset_filename, network_filename):
    dataset_filename = '../reddit-comments-may-2015/TipOfMyTongue_sub.db'
    network_filename = 'TipOfMyTongue_sub_network_Dec_2020.txt'
    graph_engineering.db_to_graph(dataset_filename, network_filename, parenting=False)
    
    print('Community detection...')
    topological_community,used_authors,numClusters = graph_engineering.community_detection(network_filename)
    print('Used authors : ' + str(used_authors))

    # Feature Extraction
    print('Feature Extraction....')
    feature_file_list = feature_engineering.extract_features(dataset_filename,used_authors)

    # Cluster communities
    # TODO different community detection algorithms
    # TODO number of clusters based on how many communities
    print('Cluster communities...')
    clusters = []
    clusters.append(feature_to_cluster(feature_file_list[1:8], numClusters))
    
    # Evaluation
    print('Evaluations.....')
    cluster_names = ['K-means']
    for i, cluster in enumerate(clusters):
        evaluations = utils.evaluate_cluster_to_community(topological_community, cluster, 5)
        utils.plot_results(evaluations,cluster_names[i], "results/" + os.path.basename(dataset_filename)[:-3] + "_result_" + cluster_names[i].replace(" ", "_") + ".png")
示例#3
0
def main(algorithm, data, cl_labels, min_k, max_k, max_iterations, epsilon):
    results, silhouette, chs, ssws, ssbs, ars, hom, comp = [], [], [], [], [], [], [], []
    membership, centroids, labels = [], [], []

    for c in range(min_k, max_k + 1):
        if algorithm == 'kmeans':
            labels, centroids = kmeans.kmeans(data, c)
        elif algorithm == 'bisecting_kmeans':
            labels, centroids = bisecting_kmeans.bisecting_kmeans(data, c)
        elif algorithm == 'fuzzy_cmeans':
            membership, centroids = fuzzyCmeans.execute(data, max_iterations, c, epsilon)
            labels = fuzzyCmeans.get_labels(len(data), membership)

        silhouette.append((c, metrics.silhouette_score(data, labels, metric='euclidean')))
        chs.append((c, metrics.calinski_harabaz_score(data, labels)))
        ssws.append((c, utils.get_ssw(data, centroids, labels)))
        ssbs.append((c, utils.get_ssb(centroids)))
        ars.append((c, metrics.adjusted_rand_score(cl_labels, labels)))
        hom.append((c, metrics.homogeneity_score(cl_labels, labels)))
        comp.append((c, metrics.completeness_score(cl_labels, labels)))

    results.append(("Silhouette", "", zip(*silhouette)[0], "", zip(*silhouette)[1], 333, "blue"))
    results.append(("Calinski-Harabaz Index", "", zip(*chs)[0], "", zip(*chs)[1], 334, "blue"))
    results.append(("Intra cluster Variance", "", zip(*ssws)[0], "", zip(*ssws)[1], 331, "blue"))
    results.append(("Inter cluster Variance", "", zip(*ssbs)[0], "", zip(*ssbs)[1], 332, "blue"))
    results.append(("Adjusted Rand Index", "", zip(*ars)[0], "", zip(*ars)[1], 335, "orange"))
    results.append(("Homogeneity", "", zip(*hom)[0], "", zip(*hom)[1], 336, "orange"))
    results.append(("Completeness", "", zip(*comp)[0], "", zip(*comp)[1], 337, "orange"))

    print(labels)
    utils.plot_results(results, algorithm)
示例#4
0
 def train(self):
     self.training_miss_classifications = []
     self.testing_miss_classifications = []
     self.session.run(self.initialize)
     lr = self.config[self.network_type]["initial_learning_rate"]
     x_test, y_test = self.mnist.test.images, self.mnist.test.labels
     for epoch in range(self.config["num_epoch"]):
         miss_classes = []
         mu = U.get_momentum(epoch)
         for itr in range(self.num_itr):
             x_train, y_train = self.mnist.train.next_batch(self.config["batch_size"])
             if self.config[self.network_type]["jitter_images"]:
                 x_train = U.jitter_images(x_train)
             _, miss_class, logits_val = self.session.run([self.train_op, self.missclassification_error, self.logits],
                                           feed_dict={self.x: x_train,
                                                      self.y: y_train,
                                                      self.keep_prob_hidden_unit: self.config[self.network_type]["keep_prob_hidden_unit"],
                                                      self.keep_prob_visible_unit: self.config[self.network_type]["keep_prob_visible_unit"],
                                                      self.learning_rate: lr,
                                                      self.momentum: mu,
                                                      self.max_norm: self.config["max_norm_val"]})
             miss_classes.append(miss_class)
         lr *= self.config["learning_rate_decay"]
         if (epoch + 1) % self.config["show_every"] == 0:
             test_miss_class = self.session.run(self.missclassification_error,
                                                feed_dict={self.x: x_test,
                                                           self.y: y_test})
             print("epoc: {0}, train_miss_class: {1:0.0f}, test_miss_class: {2:0.0f}"
                   .format(epoch, np.sum(miss_classes), test_miss_class))
             self.training_miss_classifications.append(np.sum(miss_classes))
             self.testing_miss_classifications.append(test_miss_class)
     U.save_data(self.training_miss_classifications, self.testing_miss_classifications, self.network_type)
     U.plot_results(self.training_miss_classifications, self.testing_miss_classifications, self.network_type)
示例#5
0
def main(verbose=False, plot=False, save=False, random_ar_param=True):
    # load configuration dicts. Could be implemented to load from JSON instead.
    data_config, model_config, train_config = load_config(
        verbose, random_ar_param)
    # loads randomly generated data. Could be implemented to load a specific dataset instead.
    data = load_data(data_config, verbose, plot)
    # runs training and testing.
    results_dar, stats_dar = run_training(data, model_config, train_config,
                                          verbose)

    # optional printing
    if verbose:
        print(stats_dar)

    # optional plotting
    if plot:
        utils.plot_loss_curve(losses=results_dar["losses"],
                              test_loss=results_dar["test_mse"],
                              epoch_losses=results_dar["epoch_losses"],
                              show=False,
                              save=save)
        utils.plot_weights(model_config["ar"],
                           results_dar["weights"],
                           data["ar"],
                           model_name="AR-Net",
                           save=save)
        utils.plot_results(results_dar, model_name="AR-Net", save=save)
示例#6
0
    def SGD(self, training_data, epochs, mini_batch_size, learning_rate, test_data=None, full_batch=False):
        if test_data:
            # https://github.com/MichalDanielDobrzanski/DeepLearningPython35/blob/ea229ac6234b7f3373f351f0b18616ca47edb8a1/network.py#L62
            # test_data = list(test_data)
            n_test = len(test_data)
            test_results = []

        # https://github.com/MichalDanielDobrzanski/DeepLearningPython35/blob/ea229ac6234b7f3373f351f0b18616ca47edb8a1/network.py#L58
        # training_data = list(training_data)
        n = len(training_data)
        for t in range(epochs):
            random.shuffle(training_data)
            mini_batches = [training_data[k: k + mini_batch_size] for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:  # Update parameters
                if full_batch is False:  # mini-batch: a list of tuples, tuple: (x,y), x,y: arrays
                    self.update_mini_batch(mini_batch, learning_rate)
                else:
                    self.update_full_batch(mini_batch, learning_rate)   # Use full matrix of the mini-batch
            if test_data:
                test_result = self.evaluate(test_data)
                test_results.append(test_result)
                print("Epoch {}: {} / {}".format(t, test_result, n_test))
            else:
                print("Epoch {} complete".format(t))
        if test_data:
            plot_results(test_results)
示例#7
0
def train(lr=0.0075, nb_epoch=10, batch_size=256, verbose=1):
    X_train, y_train, X_test, y_test = build_training_data()
    model = Sequential()

    lstm = build_partial_lstm_model()
    mlp = build_partial_mlp_model()

    # To SUM you'll have to match the outputs of the partial networks to be the same size, aka 64 as it is now
    # Also to be able to SUM/MEAN etc. we need to oversize the LSTM a bit to match the output of the MLP so if
    # the LSTM overfits a bit at the end, now you know why
    # model.add(Merge([mlp, lstm], mode='sum'))
    # Concat will work with different sizes
    model.add(Merge([mlp, lstm], mode='concat'))

    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer=RMSprop(lr=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    history = model.fit([X_train, X_train],
                        y_train,
                        nb_epoch=nb_epoch,
                        batch_size=batch_size,
                        validation_data=([X_test, X_test], y_test),
                        callbacks=callbacks(),
                        verbose=verbose)
    score = model.evaluate([X_test, X_test], y_test, verbose=0)

    print('Test score:', score[0])
    print('Test accuracy:', score[1])

    plot_results(history, score[1], 'MLP w/ LSTM')
示例#8
0
def test(model, x_test, y_test, opt):
    model = torch.load('model/model.pkl')
    model.eval()
    pred_dat = []

    h, c = model.init_state()
    seq_len = x_test.shape[1]

    for i in range(0, seq_len):
        x = ToVariable(x_test[:, i, :])
        x = x.view(-1, 1, 1)
        pre_out, h, c = model(x, h, c)
        h = h.data
        c = c.data
        if use_cuda:
            pred_dat.append(pre_out.data.cpu().numpy())
        else:
            pred_dat.append(pre_out.data.numpy())

    pred_dat = np.array(pred_dat)

    pred_dat = pred_dat.transpose(1, 0, 2)
    pred_dat = (pred_dat[:, :, 0] * (opt.max_data - opt.min_data) +
                (opt.max_data + opt.min_data)) / 2
    y_test = (y_test[:, :, 0] * (opt.max_data - opt.min_data) +
              (opt.max_data + opt.min_data)) / 2

    error = np.sum((pred_dat[:, -opt.test_len:] - y_test[:, -opt.test_len:])**
                   2) / (opt.test_len * pred_dat.shape[0])
    print('The mean square error is: %f' % error)

    plot_results(pred_dat[0, -opt.test_len:], y_test[0, -opt.test_len:])
示例#9
0
    def process_results(self,
                        true: list,
                        predicted: list,
                        name=None,
                        **plot_args):

        errs = dict()
        for out, out_name in enumerate(self.out_cols):

            t = true[out]
            p = predicted[out]

            if np.isnan(t).sum() > 0:
                mask = np.invert(np.isnan(t))
                t = t[mask]
                p = p[mask]

            errors = FindErrors(t, p)
            errs[out_name + '_errors'] = errors.calculate_all()
            errs[out_name + '_stats'] = errors.stats()

            plot_results(t,
                         p,
                         name=os.path.join(self.path, name + out_name),
                         **plot_args)

        save_config_file(self.path, errors=errs, name=name)

        return
示例#10
0
文件: __init__.py 项目: OpenUAS/wasp
def calibrate_sensor(sensor, measurements, verbose):
    parameters = PARAMETERS[sensor]
    if verbose:
       print "found %d records" % len(measurements)

    flt_meas, flt_idx = utils.filter_meas(measurements, parameters.noise_window, parameters.noise_threshold)
    if verbose:
        print "remaining %d after low pass" % len(flt_meas)
    p0 = utils.get_min_max_guess(flt_meas, parameters.sensor_ref)
    cp0, np0 = utils.scale_measurements(flt_meas, p0)
    print "initial guess : avg %f std %f" % (np0.mean(), np0.std())

    def err_func(p,meas,y):
        cp, np = utils.scale_measurements(meas, p)
        err = y*scipy.ones(len(meas)) - np
        return err

    p1, success = scipy.optimize.leastsq(err_func, p0[:], args=(flt_meas, parameters.sensor_ref))
    cp1, np1 = utils.scale_measurements(flt_meas, p1)

    print "optimized guess : avg %f std %f" % (np1.mean(), np1.std())

    utils.print_xml(p1, sensor, parameters.sensor_res)
    print ""

    utils.plot_results(measurements, flt_idx, flt_meas, cp0, np0, cp1, np1, parameters.sensor_ref)
    def train(self, num_epochs, model, saved_dir, device, criterion, optimizer,
              val_every):
        if criterion is None:
            criterion = torch.nn.BCELoss()
        if optimizer is None:
            optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-5)
            # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

        best_loss = 9999
        avg_val_loss_list = []
        avg_train_loss_list = []

        for epoch in range(num_epochs):
            temp_epoch_loss = []
            for step, (sequence, target) in enumerate(self.train_loader):
                sequence = sequence
                target = target
                sequence, target = sequence.to(device), target.to(device)

                outputs, _ = model(sequence)
                loss = criterion(outputs, target)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                if (step + 1) % 25 == 0:
                    print("Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}".format(
                        epoch + 1, num_epochs, step + 1,
                        len(self.train_loader), loss.item()))

                temp_epoch_loss.append(loss.item())

            avg_train_loss = sum(temp_epoch_loss) / len(temp_epoch_loss)

            if (epoch + 1) % val_every == 0:  # Compare and save the best model
                avg_loss = self.validation(epoch + 1, model, self.val_loader,
                                           criterion, device)

                if avg_loss < best_loss:
                    print("Best performance at epoch: {}".format(epoch + 1))
                    print("Save model in", saved_dir)
                    best_loss = avg_loss
                    #                 save_model(model, optimizer, epoch, best_loss, saved_dir)

                    if len(avg_val_loss_list) == 0:
                        save_model(model, optimizer, epoch, avg_train_loss,
                                   best_loss, saved_dir)

                    else:
                        save_model(model, optimizer, epoch,
                                   avg_train_loss_list, avg_val_loss_list,
                                   saved_dir)

                avg_train_loss_list.append(avg_train_loss)
                avg_val_loss_list.append(avg_loss)

        plot_results(np.arange(0, num_epochs), avg_train_loss_list,
                     avg_val_loss_list)
def exercise_3():
    folder = './data/'
    ext = '.csv'
    file_names = ['balance', 'phoneme', 'sonar']

    classifiers = [
        (KNeighborsClassifier(n_neighbors=5), 'k-NN'),
        (SVC(kernel="linear", C=0.025), 'SVC'),
        (DecisionTreeClassifier(max_depth=5), 'Decision Tree'),
    ]

    data = {}
    for fn in file_names:
        X, y, _ = prepare_data_from_file(folder + fn + ext)
        data[fn] = {}

        for clf, clf_name in classifiers:
            kf = KFold(n_splits=10, shuffle=True)

            learning_times = []
            prediction_times = []
            score_array = []
            for train_index, test_index in kf.split(X):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]

                start = time.time()
                clf.fit(X_train, y_train)
                end = time.time()
                learning_time = end - start
                learning_times.append(learning_time)

                start = time.time()
                y_pred = clf.predict(X_test)
                end = time.time()
                prediction_time = end - start
                prediction_times.append(prediction_time)

                accuracy = accuracy_score(y_test, y_pred)
                score_array.append(accuracy)

            avg_learning_time = np.mean(learning_times)
            avg_prediction_time = np.mean(prediction_times)
            avg_score = np.mean(score_array)

            data[fn][clf_name] = [
                avg_learning_time, avg_prediction_time, avg_score
            ]

    for fn, clfs in data.items():
        classifier_names = [*clfs.keys()]
        values = [*clfs.values()]

        learning_times = list(map(lambda x: x[0], values))
        prediction_times = list(map(lambda x: x[1], values))
        scores = list(map(lambda x: x[2], values))

        plot_results(learning_times, scores, classifier_names, fn)
def set_annotations_and_plot(file_name, anndf, likelohood_column, plot):
    print('Reading results...')
    df = pd.read_csv(file_name, parse_dates=True, index_col='timestamp')
    df['Annotation'] = anndf.Aux
    print('Writing annotated results...')
    df.to_csv(file_name)
    if plot:
        utils.plot_results(df, 'Resp', 'anomaly_score',
                           likelohood_column, '.*[HOXC].*')
    return df
示例#14
0
def plot_hogwild():
    x_train, y_train, x_test, y_test = load_processed_data(dir_data)

    # --- 2. plot hogwild for various values of K
    n_runs = 3
    n_workers = 8
    T = 1000000
    alpha = 0.33
    beta = 0.37
    theta = 0.2
    results = [
        AvgLogger([
            train_hogwild(a=x_train,
                          b=y_train,
                          a_test=x_test,
                          b_test=y_test,
                          T=T,
                          alpha=alpha,
                          beta=beta,
                          K=K,
                          theta=theta,
                          n_processes=n_workers,
                          sequential=False,
                          seed=s)[1] for s in range(n_runs)
        ]) for K in [3, 10, 50]
    ]

    # --- 1. plot comparison between SGD and hogwild, fixed K
    # n_runs = 3
    # n_workers = 8
    # T = 1000000
    # alpha = 0.33
    # beta = 0.37
    # theta = 0.2
    # results = [AvgLogger([
    #     train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta,
    #                   K=K, theta=theta, n_processes=n_workers, sequential=False, seed=s)[1]
    #     for s in range(n_runs)
    # ]) for K in [3]]
    # results.append(AvgLogger([
    #     train_hogwild(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, beta=beta,
    #                   K=3, theta=theta, n_processes=n_workers, sequential=True, seed=s)[1]
    #     for s in range(n_runs)
    # ]))
    # results.append(AvgLogger([
    #     train_sgd(a=x_train, b=y_train, a_test=x_test, b_test=y_test, T=T, alpha=alpha, return_avg=True, seed=s)[1]
    #     for s in range(n_runs)
    # ]))

    plot_results(
        results,
        add_to_title=
        rf" ($\alpha={alpha}, \beta={beta}, \theta={theta}$, n_runs={n_runs})")
示例#15
0
    def process_results(self, true, predicted, name=None):

        if np.isnan(true).sum() > 0:
            mask = np.invert(np.isnan(true.reshape(-1,)))
            true = true[mask]
            predicted = predicted[mask]

        errors = FindErrors(true, predicted)
        for er in ['mse', 'rmse', 'r2', 'nse', 'kge', 'rsr', 'percent_bias']:
            print(er, getattr(errors, er)())

        plot_results(true, predicted, name=os.path.join(self.path, name))
        return
    def generate_markov_model(self):
        # use percent return and try to minimize variance and maximize return
        # TODO: generate model every day
        self.model = mix.GaussianMixture(
            n_components=3,
            covariance_type="full",  #tied
            random_state=7,
            n_init=60)
        if 'date' in self.train.columns:
            self.train = self.train.set_index('date')
        self.model.fit(self.train[['return'] + self.features])
        if 'date' in self.test.columns:
            self.test = self.test.set_index('date')
        # TODO rename state with english text
        self.test['state'] = self.model.predict(self.test[['return'] +
                                                          self.features])

        # get next day percent change
        self.test['next_day_change'] = self.test['close'].shift(
            -1) / self.test['close'] - 1
        #self.test['close'] = self.test['close'].shift(-1)
        print(self.test)
        import pdb
        pdb.set_trace()
        self.test.to_csv('test.csv')
        # find the best state numbers
        results = []
        for i in range(self.model.n_components):
            results.append([
                i, self.model.means_[i][0],
                np.diag(self.model.covariances_[i])[0]
            ])

        result_df = pd.DataFrame(results, columns=['state', 'mean', 'var'])
        result_df = result_df.set_index('state').sort_values(by=['mean'])

        result_df['state_names'] = ['sell', 'buy', 'strong_buy']

        self.result_df = result_df

        print(self.result_df)
        for i in self.result_df.index:
            group = self.test[self.test['state'] == i]['next_day_change']
            print(i, group.mean(), group.std())
        #for g, group in self.test.groupby(by='state'):
        #    print(g, group['next_day_change'].mean(), group['next_day_change'].std())

        states_used = result_df.index
        self.test['close'] = self.test['close'].shift(-1)
        plot_results(self.test.reset_index(), self.name, result_df.index)
        """
示例#17
0
def train(train_sets: tuple,
          test_sets: tuple,
          input_shape: tuple = (1, 128, 128, 1),
          model_version="1.0.0",
          epochs: int = 100,
          classes: int = 2,
          batch_size: int = 1,
          verbose=1,
          out_dir: str = "saved_models"):
    """
    The function to train the model.

    Parameters:
        train_sets (tuple): A tuple of np.array for train images and train labels.
        test_sets (tuple): A tuple of np.array for test images and test labels.
        input shape (tuple): Input shape of the model. It should be in the form of (1, ..., ...).
        model_version (str): The version of the model in d.d.d format.
        epochs (int): The number of epochs.
        classes (int): The number of classes.
        batch_size (int): The number of batch size.
        verbose (bool): Wether to show the progress of each epoch.
        out_dir (str): The output dir for saving the model in.
    """
    (x_train, y_train), (x_test, y_test) = train_sets, test_sets
    y_train = keras.utils.to_categorical(y_train, classes)
    y_test = keras.utils.to_categorical(y_test, classes)
    m = get_model(model_version)
    if not m:
        return
    model = m.build_model(input_shape)
    model.compile(loss=BinaryCrossentropy(),
                  optimizer=RMSprop(learning_rate=0.0001),
                  metrics=['accuracy'])
    saver = ModelSaver(out_dir)
    csv_logger = CSVLogger(
        "%s/%s/log.csv" %
        (out_dir, datetime.datetime.now().date().strftime("%Y_%m_%d")),
        append=True,
        separator=',')
    history = model.fit(x_train,
                        y_train,
                        batch_size=batch_size,
                        epochs=epochs,
                        verbose=verbose,
                        validation_data=(x_test, y_test),
                        callbacks=[saver, csv_logger])
    model.save("%s/%s/final.hd5" %
               (out_dir, datetime.datetime.now().date().strftime("%Y_%m_%d")))
    print("Model saved in %s as final.hd5" % out_dir)
    plot_results(history, epochs, out_dir)
    def get_results_with_pca(self):
        results = []
        for i in range(self.model.n_components):
            results.append([ i, self.model.means_[i][0], np.diag(self.model.covariances_[i])[0] ])
        
        result_df = pd.DataFrame(results, columns = ['state','mean', 'var'])
        result_df = result_df.set_index('state').sort_values(by=['mean'])
        print(result_df)
        self.test['next_change'] = self.test['close'].shift(-1) / self.test['close'] - 1
        #self.test[ ['date', 'state', 'close', 'next_change'] ].to_csv('test.csv')
        for state in result_df.index:
            this_group = self.test.loc[self.test['state']==state, 'next_change']
            print(state, float(this_group.mean()), float(this_group.std()))

        plot_results(self.test, self.name, result_df.index)
    def get_results(self):
        results = []
        for i in range(self.model.n_components):
            results.append([ i, self.model.means_[i][0], np.diag(self.model.covariances_[i])[0] ])
        
        result_df = pd.DataFrame(results, columns = ['state','mean', 'var'])
        result_df = result_df.set_index('state').sort_values(by=['mean'])
        print(result_df)
        self.test['next_change'] = self.test['close'].shift(-1) / self.test['close'] - 1
        #self.test[ ['date', 'state', 'close', 'next_change'] ].to_csv('test.csv')
        for state in result_df.index:
            this_group = self.test.loc[self.test['state']==state, 'next_change']
            print(state, float(this_group.mean()), float(this_group.std()))

        plot_results(self.test, self.name, result_df.index)

        """
        # get next day percent change
        self.test['next_day_change'] = self.test['close'].shift(-1) / self.test['close'] - 1
        #self.test['close'] = self.test['close'].shift(-1)
        print(self.test)
        import pdb; pdb.set_trace()
        self.test.to_csv('test.csv')
        # find the best state numbers
        results = []
        for i in range(self.model.n_components):
            results.append([ i, self.model.means_[i][0], np.diag(self.model.covariances_[i])[0] ])
        
        result_df = pd.DataFrame(results, columns = ['state','mean', 'var'])
        result_df = result_df.set_index('state').sort_values(by=['mean'])
        
        result_df['state_names'] = ['sell','buy','strong_buy']

        self.result_df = result_df
        
        print(self.result_df)
        for i in self.result_df.index:
            group = self.test[self.test['state']==i]['next_day_change']
            print(i, group.mean(), group.std())
        #for g, group in self.test.groupby(by='state'):
        #    print(g, group['next_day_change'].mean(), group['next_day_change'].std())
        
        states_used = result_df.index
        self.test['close'] = self.test['close'].shift(-1)
        plot_results(self.test.reset_index(), self.name, result_df.index)

        """
        """
示例#20
0
    def showResults(self):
        if self.solver is None or self.solver.progress < 100:
            return

        self.setPlotOptions()
        self.saveParameters(
        )  # only save parameters if there are plots to open

        self.opened_plots = utils.plot_results(
            pixels=self.solver.pixels,
            shift_x=self.solver.shift_x,
            shift_y=self.solver.shift_y,
            shift_p=self.solver.shift_p,
            shift_x_y_error=self.solver.shift_x_y_error,
            box_shift=self.solver.box_shift,
            fps=self.solver.fps,
            res=self.solver.res,
            input_path=self.fileName,
            output_basepath=self.output_basepath,
            plots_dict=self.plots_dict,
            boxes_dict=self.boxes_dict,
            chop_duration=float(self.lineEdit_chop_sec.text()),
            start_frame=self.solver.start_frame)

        print("%d plots shown." % (len(self.opened_plots)))
示例#21
0
def eval(epoch, model, eval_loader, device, writer, height, width, batch_size):
    model.eval()
    running_loss = 0.0
    inputs_epoch, labels_epoch, latent_variables_epoch, reconstructions_epoch = \
        torch.Tensor(), torch.LongTensor(), torch.Tensor(), torch.Tensor()
    with torch.no_grad():
        for data in eval_loader:
            inputs, labels = data[0].to(device), data[1].to(device)
            reconstructions, latent_variables = model(inputs)
            loss = model.loss_function(inputs) / batch_size
            running_loss += loss.item()
            inputs_epoch = torch.cat(
                [inputs_epoch, inputs.cpu().detach()], dim=0)
            labels_epoch = torch.cat(
                [labels_epoch, labels.cpu().detach()], dim=0)
            latent_variables_epoch = torch.cat(
                [latent_variables_epoch,
                 latent_variables.cpu().detach()],
                dim=0)
            reconstructions_epoch = torch.cat(
                [reconstructions_epoch,
                 reconstructions.cpu().detach()], dim=0)
    fig = plot_results(model, inputs, reconstructions, latent_variables,
                       height, width)
    writer.add_figure('results', fig, global_step=epoch)
    project_results(latent_variables_epoch, inputs_epoch, labels_epoch, writer,
                    epoch, height, width)
    eval_loss = running_loss / len(eval_loader)
    return eval_loss
示例#22
0
    def __init__(self, model_name, accum_rate, decay_rate, trade_states,
                 index):
        self.index = index
        self.model_name = model_name

        self.bank_value = 10000

        self.accum_rate = accum_rate
        self.decay_rate = decay_rate
        self.trade_states = trade_states

        self.current_accum = self.accum_rate
        self.held_shares = {}
        self.held_shares['TQQQ'] = {'num_shares': 0}

        conn = sqlite3.connect('markov_models.db')
        sql = 'select * from trades where name = "%s"' % model_name
        self.trade_days = pd.read_sql(sql, conn)
        print(self.trade_days)
        plot_results(self.trade_days, model_name)
        return

        # get TQQQ
        self.tqqq = yfinance.Ticker('TQQQ').history(
            period='5y', auto_adjust=False).reset_index()
        self.tqqq.columns = map(str.lower, self.tqqq.columns)

        self.qqq = yfinance.Ticker('QQQ').history(
            period='5y', auto_adjust=False).reset_index()
        self.qqq.columns = map(str.lower, self.qqq.columns)

        # get TQQQ performance

        self.tqqq = self.tqqq[
            (self.tqqq['date'] >= self.trade_days.head(1)['date'].values[0])
            & (self.tqqq['date'] <= self.trade_days.tail(1)['date'].values[0])]
        self.tqqq_start_price = float(self.tqqq.head(1)['close'])
        self.tqqq_performance = float(self.tqqq.tail(1)['close']) / float(
            self.tqqq.head(1)['close'])

        self.qqq = self.qqq[
            (self.qqq['date'] >= self.trade_days.head(1)['date'].values[0])
            & (self.qqq['date'] <= self.trade_days.tail(1)['date'].values[0])]
        self.qqq_start_price = float(self.qqq.head(1)['close'])

        # start trading
        self.run_trades()
示例#23
0
def run_different_q_gammas(env_name):
    gammas = [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0]
    overall_scores = []
    times = []

    for gamma in gammas:
        scores, time = run_q_learning(env_name, 0, 0.6, gamma)
        overall_scores.append(scores)
        times.append([time])

    title = "Mean Score vs Gamma for {}, QLearning".format(env_name)
    file_name = "scores/{}-QLearning-Gamma.png".format(env_name)
    plot_results(overall_scores, "Mean Score of Policy", gammas, "Gamma Value",
                 title, file_name)

    title = "Time Taken (s) vs Gamma for {}, QLearning".format(env_name)
    file_name = "times/{}-QLearning-Gamma.png".format(env_name)
    plot_results(times, "Time Taken (s)", gammas, "Gamma Value", title,
                 file_name)
示例#24
0
def run_different_epsilons(env_name):
    epsilons = [0.0, 0.01, 0.02, 0.03, 0.04, 0.05]
    overall_scores = []
    times = []

    for epsilon in epsilons:
        scores, time = run_q_learning(env_name, epsilon)
        overall_scores.append(scores)
        times.append([time])

    title = "Mean Score vs Epsilon for {}, QLearning".format(env_name)
    file_name = "scores/{}-QLearning-Epsilon.png".format(env_name)
    plot_results(overall_scores, "Mean Score of Policy", epsilons,
                 "Epsilon Value", title, file_name)

    title = "Time Taken (s) vs Epsilon for {}, QLearning".format(env_name)
    file_name = "times/{}-QLearning-Epsilon.png".format(env_name)
    plot_results(times, "Time Taken (s)", epsilons, "Epsilon Value", title,
                 file_name)
示例#25
0
def run_bench(api, version):

    bench_title = api + " IOR benchmark - pdwfs " + version + " - " + str(
        datetime.utcnow()) + " UTC"
    print "Running:", bench_title

    read = "1"  # 1: perform read benchmark
    numTasks = "2"  # number of parallel processes
    filePerProc = "0"  # 1: write one file per processes
    collective = "1"  # 1: enable collective IO operations (MPIIO, HDF5 only)
    segmentCount = "1"  # see previous schematic
    transferSize = [
        "512k", "1m", "3m", "5m", "7m", "10m", "25m", "35m", "50m", "60m",
        "75m", "85m", "100m", "115m", "125m", "150m", "175m", "200m", "225m",
        "250m"
    ]
    utils.build_ior_script(api, read, numTasks, filePerProc, collective,
                           segmentCount, transferSize)

    with open("run/bench.sh", "w") as f:
        f.write(bench_script)

    subprocess.check_call(["bash", "run/bench.sh"])

    print "    Parsing and saving the results in a plot"
    df_disk = utils.parse_ior_results("/output/ior_disk.out")
    df_pdwfs = utils.parse_ior_results("/output/ior_pdwfs.out")

    os.rename("/output/ior_disk.out", "/output/ior_" + api + "_disk.out")
    os.rename("/output/ior_pdwfs.out",
              "/output/ior_" + api + "_pdwfs-" + version + ".out")

    matplotlib.use('Agg')

    for readOrWrite in ["write", "read"]:
        filename = readOrWrite + "_ior_" + api + "_pdwfs-" + version + ".png"
        utils.plot_results(readOrWrite,
                           df_disk[df_disk["Operation"] == readOrWrite],
                           df_pdwfs[df_pdwfs["Operation"] == readOrWrite],
                           title=bench_title,
                           filename="/output/" + filename)
        with open("/output/bench.html", "a") as f:
            f.write("<img src=" + filename + ">\n")
示例#26
0
 def get_results(self):
     """
     results = []
     for i in range(self.model.n_components):
         results.append([ i, self.model.means_[i][0], np.diag(self.model.covariances_[i])[0] ])
     
     result_df = pd.DataFrame(results, columns = ['state','mean', 'var'])
     result_df = result_df.set_index('state').sort_values(by=['mean'])
     """
     #print('===')
     #print(result_df)
     self.test['next_change'] = self.test['close'].shift(-1) / self.test['close'] - 1
     
     for state in self.test['state'].unique():
         this_group = self.test.loc[self.test['state']==state, 'next_change']
         print(state, float(this_group.mean()), float(this_group.std()))
     print('===')
     try:
         plot_results(self.test, self.name)
     except Exception as e:
         print(e)
def test(model, x_test, y_test, data_df_combined_clean):
    model = torch.load('model/model.pkl')
    model.eval()
    x_test = ToVariable(x_test).double()
    h1, c1, h2, c2, h3, c3 = model.init_state(x_test.shape[0])
    seq_len = x_test.shape[1]

    pred_dat, h1, c1, h2, c2, h3, c3 = model(x_test, h1, c1, h2, c2, h3, c3)

    pred_dat = np.array(pred_dat.detach().numpy())

    #De-standardize predictions
    preds_unstd = pred_dat * data_df_combined_clean.iloc[:, -1].std(
    ) + data_df_combined_clean.iloc[:, -1].mean()
    y_test_unstd = y_test * data_df_combined_clean.iloc[:, -1].std(
    ) + data_df_combined_clean.iloc[:, -1].mean()

    mrse = np.sqrt(
        ((preds_unstd[:, -1, :] - y_test_unstd[:, -1, :])**2)).mean(axis=0)
    print('The mean square error is: %f' % mrse)

    plot_results(preds_unstd[:, -1, :], y_test_unstd[:, -1, :])
示例#28
0
def train(lr=0.0075, nb_epoch=10, batch_size=256, verbose=1):
    X_train, y_train, X_test, y_test = build_training_data()
    model = build_partial_mlp_model()

    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer=RMSprop(lr=lr),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    history = model.fit(X_train,
                        y_train,
                        nb_epoch=nb_epoch,
                        batch_size=batch_size,
                        validation_data=(X_test, y_test),
                        callbacks=callbacks(),
                        verbose=verbose)
    score = model.evaluate(X_test, y_test, verbose=0)

    print('Test score:', score[0])
    print('Test accuracy:', score[1])

    plot_results(history, score[1], 'mlp')
示例#29
0
def run_different_learn_rates(env_name):
    learning_rates = [
        0.4, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95
    ]
    overall_scores = []
    times = []

    for learning_rate in learning_rates:
        scores, time = run_q_learning(env_name, 0, learning_rate)
        overall_scores.append(scores)
        times.append([time])

    title = "Mean Score vs Learning Rate for {}, QLearning".format(env_name)
    file_name = "scores/{}-QLearning-Learning-Rate.png".format(env_name)
    plot_results(overall_scores, "Mean Score of Policy", learning_rates,
                 "Learning Rate Value", title, file_name)

    title = "Time Taken (s) vs Learning Rate for {}, QLearning".format(
        env_name)
    file_name = "times/{}-QLearning-Learning-Rate.png".format(env_name)
    plot_results(times, "Time Taken (s)", learning_rates,
                 "Learning Rate Value", title, file_name)
示例#30
0
def run_different_gammas(method, method_name, env_name):
    gammas = [
        0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7,
        0.75, 0.8, 0.85, 0.9, 0.95, 1
    ]
    overall_scores = []
    times = []

    for gamma in gammas:
        scores, time = method(env_name, gamma)
        overall_scores.append(scores)
        times.append([time])

    title = "Mean Score vs Gamma for {}, {}".format(env_name, method_name)
    file_name = "scores/{}-{}.png".format(env_name, method_name)
    plot_results(overall_scores, "Mean Score of Policy", gammas, "Gamma Value",
                 title, file_name)

    title = "Time Taken (s) vs Gamma for {}, {}".format(env_name, method_name)
    file_name = "times/{}-{}.png".format(env_name, method_name)
    plot_results(times, "Time Taken (s)", gammas, "Gamma Value", title,
                 file_name)
def train_vae(vae,
              encoder,
              decoder,
              x,
              y,
              x_train,
              x_test,
              y_test,
              lable_color_dict,
              group,
              additional=False):
    if additional:
        vae.load_weights('vae_' + group + '.h5')

    vae.fit(x_train, epochs=50, batch_size=10, validation_data=(x_test, None))
    vae.save_weights('vae_' + group + '.h5', overwrite=True)

    models = (encoder, decoder)
    data = (x, y)
    plot_results(models,
                 data,
                 lable_color_dict,
                 model_name='vae_' + group + '.h5')
示例#32
0
def evaluate(opt,
             model,
             data_loader,
             logger,
             error_threshold=0.05,
             limit=None,
             vis=None):
    '''
  Loop through the dataset and calculate evaluation metrics.
  '''
    if model.compare_model is not None:
        logger.print('Comparison: {} ({}), {} ({})'.format(\
                         model.iterator.name(), model.iterator.n_operations,
                         model.compare_model.name(), model.compare_model.n_operations))
    logger.print('Initialization: {}'.format(opt.initialization))
    logger.print('Error threshold: {}'.format(error_threshold))

    metric = utils.Metrics(scale=1, error_threshold=error_threshold)
    images = {'error_curves': [], 'results': []}

    for step, data in enumerate(data_loader):
        bc, gt, x = data['bc'], data['final'], data['x']
        f = None if 'f' not in data else data['f']
        if opt.initialization != 'random':
            # Test time: do not change data if 'random'
            x = utils.initialize(x, bc, opt.initialization)
        results, x = model.evaluate(x, gt, bc, f, opt.n_evaluation_steps)
        # Update metric
        metric.update(results)

        if step % opt.log_every == 0:
            img = utils.plot_error_curves(results, num=4)
            if vis is not None:
                vis.add_image({'errors_avg_init': img}, step)
            images['error_curves'].append(img)
            img = utils.plot_results({'x': x, 'gt': gt})
            if vis is not None:
                vis.add_image({'results': img}, step)
            images['results'].append(img)
        if (step + 1) % opt.log_every == 0:
            print('Step {}'.format(step + 1))
        if limit is not None and (step + 1) == limit:
            break

    # Get results
    results = metric.get_results()
    for key in results:
        logger.print('{}: {}'.format(key, results[key]))
    metric.reset()
    return results, images
示例#33
0
def fit_independent(rng=(-5, 5)):
    x, y, yerr = load_data("line_data.txt")
    true_m, true_b, _, _ = load_data("line_true_params.txt")

    # Build the design matrix.
    A = np.vander(x, 2)
    AT = A.T

    # Compute the mean and covariance of the posterior constraint.
    cov = np.linalg.inv(np.dot(AT, A / yerr[:, None] ** 2))
    mu = np.dot(cov, np.dot(AT, y / yerr ** 2))

    # Plot these constraints with the truth and the data points.
    samples = np.random.multivariate_normal(mu, cov, 1000)
    fig = plot_results(x, y, yerr, samples, truth=(true_m, true_b))
    fig.gca().set_title("assuming independent uncertainties")
    fig.savefig(os.path.join("figures", "line_independent.png"))
示例#34
0
文件: fit_emcee.py 项目: amoliu/gp
def fit_emcee(rng=(-5, 5)):
    # Initialize the walkers.
    ndim, nwalkers = 4, 32
    pos = [np.random.randn(ndim) for i in xrange(nwalkers)]

    # Initialize the sampler.
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, args=(x, y, yerr))

    # Run a burn-in.
    print("Running burn-in")
    pos, lp, state = sampler.run_mcmc(pos, 1000)
    sampler.reset()

    # Run the production chain.
    print("Running production")
    sampler.run_mcmc(pos, 500)
    print("Done")

    fig = plot_results(x, y, yerr, sampler.flatchain, truth=(true_m, true_b))
    fig.savefig(os.path.join("figures", "line_emcee.png"))
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

##############################################################################
# Print machine learning metrics
utils.print_dbscan_metrics(X, n_clusters_, labels_true, labels)

##############################################################################
# TODO - Xform Back, compute zone size and centroid
poi_result_set = utils.add_zoas_to_poi_dataset(labels, poi_dataset)

##############################################################################
# Output Results
utils.output_results(poi_result_set, screen=s.ZOA_SUMMARY_TO_SCREEN, outfile=s.OUTPUT_FILE)

##############################################################################
# Plot result using X_prime a transpose of [[lat, lng]] to [[x=lng, y=lat]]
# If mode is proxy, lookup coordinates
# X_pr
if s.MATPLOT_ZOA_CLUSTERS:
    if s.MODE == "proxy":
        X_prime = gadm.lat_lng_tpose2(X, poi_dataset)
    else:
        X_prime = gadm.lat_lng_tpose(X)
    utils.plot_results(labels, X_prime, core_samples_mask)
def main(cwd, do_amgng, amgng_file, ma_window, ma_recalc_delay,
         do_cla, cla_file, buffer_len, plot):
    values = inspect.getargvalues(inspect.currentframe())[3]
    print('using parameters: {}'.format(values))

    amgng_df = None
    if do_amgng:
        from mgng.amgng import main as amgng_main
        print('Training AMGNG model...')
        out_file = os.path.join(cwd, 'out_amgng_{}'.format(amgng_file))
        full_path = os.path.join(cwd, amgng_file)
        start = datetime.now()
        amgng_main(input_file=full_path, output_file=out_file,
                   buffer_len=buffer_len, index_col='timestamp',
                   skip_rows=[1,2], ma_window=ma_window,
                   ma_recalc_delay=ma_recalc_delay)
        amgng_time = datetime.now() - start

        print('Reading results...')
        amgng_df = pd.read_csv(out_file, parse_dates=True,
                               index_col='timestamp')
        amgng_df['Annotation'] = anndf.Type
        print('Writing annotated results...')
        amgng_df.to_csv(out_file)
        if plot:
            utils.plot_results(amgng_df, ['narma30-1000_samples'],
                               'anomaly_score', 'anomaly_density', '[rs]')
        print('Time taken: amgng={}'.format(amgng_time))

    cla_df = None
    if do_cla:
        from cla.swarm import swarm
        from cla.cla import main as cla_main
        out_file = os.path.join(cwd, 'out_cla_{}'.format(cla_file))
        print('Training CLA model...')
        full_path = os.path.join(cwd, cla_file)
        SWARM_DESCRIPTION = {
            'includedFields': [
                {
                    'fieldName': 'timestamp',
                    'fieldType': 'datetime',
                },
                {
                    'fieldName': 'ECG1',
                    'fieldType': 'float',
                },
            ],
            'streamDef': {
                'info': 'chfdbchf13 ECG1',
                'version': 1,
                'streams': [
                    {
                        'info': 'chfdbchf13',
                        'source': full_path,
                        'columns': ['*']
                    }
                ]
            },
            'inferenceType': 'TemporalAnomaly',
            'inferenceArgs': {
                'predictionSteps': [1],
                'predictedField': 'ECG1'
            },
            'iterationCount': buffer_len,
            'swarmSize': 'large'
        }
        start = datetime.now()
        swarm(cwd=cwd, input_file=cla_file,
              swarm_description=SWARM_DESCRIPTION)
        swarm_time = datetime.now() - start
        start = datetime.now()
        cla_main(cwd=cwd, input_file=full_path, output_name=out_file, plot=False,
                 predicted_field='ECG1')
        cla_time = datetime.now() - start

        print('Reading results...')
        cla_df = pd.read_csv(out_file, parse_dates=True, index_col='timestamp')
        cla_df['Annotation'] = anndf.Type
        print('Writing annotated results...')
        cla_df.to_csv(out_file)
        if plot:
            utils.plot_results(cla_df, ['ECG1'], 'anomaly_score',
                               'anomaly_likelihood', '[rs]')
        print('Time taken: swarm={}, cla={}'.format(swarm_time, cla_time))
    return amgng_df, cla_df