Exemplo n.º 1
0
    def test_should_be_return_number_intances(self):
        instances = '7'

        response = '{ "pagination": { "offset": 100000000, "total": 7, "page_size": 10 }, "pipelines": []}'

        pipeline = Pipeline()

        self.assertEquals(pipeline.get_instance(response), instances)
Exemplo n.º 2
0
    def test_should_be_return_pipeline_status_Passed(self):
        path = os.getcwd()
        response_fine_name = 'instance_pipeline_passed.json'
        response_file = path + '/test/resources/' + response_fine_name

        response = open(response_file, 'r').read()

        pipeline = Pipeline()

        self.assertEquals(pipeline.get_status(response), 'Passed')
Exemplo n.º 3
0
 def tuning(hp):
     import tensorflow as tf
     pipeline = Pipeline(tfrecords_filenames=hp["tfrecords_filenames"])
     train_dataset = pipeline.get_train_data(int(hp["batch_size"]))
     val_dataset = pipeline.get_val_data(int(hp["batch_size"]))
     model = keras_model.create_model(
         learning_rate=float(hp["lr"]),
         dense_1=int(hp["dense_1"]),
         dense_2=int(hp["dense_2"]))
     checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
         "model.h5", monitor='loss', save_best_only=True, save_freq=2)
     callbacks = [checkpoint_callback, TuneReporterCallback()]
     model.fit(
         train_dataset, validation_data=val_dataset,
         verbose=1,
         epochs=int(hp["epochs"]),
         callbacks=callbacks)
Exemplo n.º 4
0
    def task_retrain_model():

        try:
            logging.info("Query data from database")
            sql2tfrecord = PostgreSQL2Tfrecord()
            data = sql2tfrecord.query_db()
            formated_data = sql2tfrecord.format_data(data)
            del data
            gc.collect()

            sql2tfrecord.write2tfrecord(
                data=formated_data,
                filename=os.path.join(
                    package_dir, "..", "data", "data.tfrecord"))

        except Exception as e:
            logging.error("Error in process data to tfrecord: {}".format(e))
            return 0
        logging.critical("Writed data to tfrecord")

        try:
            logging.info("Start initializing training process")
            pipeline = Pipeline(
                tfrecords_filenames=os.path.join(
                    package_dir, "..", "data", "data.tfrecord"))
            train_keras_model = TrainKerasModel(pipeline=pipeline)
            hyperparameter_space.update({
                "tfrecords_filenames": os.path.join(
                    package_dir,
                    "..",
                    "data",
                    "data.tfrecord")
            })
        except Exception as e:
            logging.error(
                "Error in initializing training process: {}".format(e))
            return 0
        try:
            logging.info("Start Searching Best Model")
            best_model = train_keras_model.get_best_model(
                hyperparameter_space=hyperparameter_space,
                num_samples=num_samples)
        except Exception as e:
            logging.error("Error in searching best model: {}".format(e))
            return 0

        try:
            logging.info("Start saving model")
            result = train_keras_model.save_model(
                model=best_model,
                filename=os.path.join(
                    models_dir, str(int(time.time()))))
        except Exception as e:
            logging.error("Error in saving model: {}".format(e))

        logging.critical("Retrain Finish. Training result: {}".format(result))
Exemplo n.º 5
0
 def _setup_pipelines(self):
     self.pipelines = {}
     print self.configuration
     for pipeline_group in self.configuration.cruise.pipelines:
         for pipeline in pipeline_group.pipeline:
             stages = []
             for stage in pipeline.stage:
                 pipeline_name = pipeline.get_attribute("name")
                 stages.append(stage.get_attribute("name"))
             self.pipelines[pipeline_name] = Pipeline(pipeline_name, stages)
Exemplo n.º 6
0
def run_video(input_filepath, detector_config, output_filepath=None):
    """
    Args:
        input_filepath: input video filepath. Set to 0 for webcam, or other device no.
        detector_config: path to detector config file. 
        output_filepath: filepath to save result video, set to None to disable saving 
                to disk.Default=None.
    """
    if input_filepath == "0":
        input_filepath = 0
    video_capture = cv2.VideoCapture(input_filepath)

    # exit if video not opened
    if not video_capture.isOpened():
        logger.error('Cannot open video')
        sys.exit()

    # Default resolutions of the frame are obtained.The default resolutions are system dependent.
    # We convert the resolutions from float to integer.
    frame_width = int(video_capture.get(3))
    frame_height = int(video_capture.get(4))

    # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
    video_out = None
    if output_filepath is not None:
        video_out = cv2.VideoWriter(output_filepath,
                                    cv2.VideoWriter_fourcc(*'MPEG'), 20.,
                                    (frame_width, frame_height))

    # init detector
    device = init_device()
    detector = setup_detector(detector_config, device)

    # init tracker
    tracker = setup_tracker()

    # init detection pipeline
    # TODO: pass image_size config
    pipeline = Pipeline(detector=detector,
                        tracker=tracker,
                        resize_image_size=(300, 300))

    # run processing
    process(video_capture, pipeline, video_out=video_out, headless=False)

    # When everything is done, release the capture
    video_capture.release()
    cv2.destroyAllWindows()

    # shutdown device
    detector.close()
    device.close()
    device.destroy()
Exemplo n.º 7
0
def main():
    directory_path, text_path, quotes_path, speech_path, output_path = _get_data_from_cmd(
    )

    quotes_rules = _read_csv(quotes_path, ';')
    speech_rules = _read_csv(speech_path, ';')

    quotes_adapter = QuotesAdapter(quotes_rules)
    speech_detector = SpeechDetector(speech_rules)
    character_detector = CharacterDetector()
    pipeline = Pipeline(quotes_adapter, speech_detector, character_detector)

    if directory_path == None:
        list_of_textfiles = [text_path]
    else:
        list_of_textfiles = _get_list_of_text_files(directory_path)

    for textpath in list_of_textfiles:
        text = _read_file(textpath)
        text = pipeline.apply_to(text)
        _write_to_file(output_path, directory_path, textpath, text)
Exemplo n.º 8
0
        print(f"OOB score = {rf.oob_score_}")
        oob_diff.append(rf.score(X_train, y_train) - rf.oob_score_)
        oob.append(rf.oob_score_)

    ax.plot(oob_diff, color='red')
    ax.plot(oob, color='blue')
    ax.set_title("Reducing OOB Error by limiting max_depth")
    plt.savefig('images/oob.png')


if __name__ == '__main__':
    # energy_df = pd.read_csv('data/energy_dataset.csv',index_col=0, parse_dates=[0])
    # weather_df = pd.read_csv('data/weather_features.csv',index_col=0, parse_dates=[0])
    print("Loading Data")
    # read in files from s3 bucket
    energy = Pipeline('s3://ajzcap2/energy_dataset.csv')
    weather = Pipeline('s3://ajzcap2/weather_features.csv')

    #make index a datetime object
    energy.my_reset_index()
    weather.my_reset_index()

    # Drop columns
    weather_drop_cols = [
        'weather_icon', 'weather_main', 'weather_id', 'temp_min', 'temp_max'
    ]
    energy_drop_cols = [
        'generation fossil coal-derived gas', 'generation fossil oil shale',
        'generation fossil peat', 'generation geothermal', 'generation marine',
        'generation hydro pumped storage aggregated',
        'forecast wind offshore eday ahead', 'generation wind offshore',
Exemplo n.º 9
0
from src.pipeline import Pipeline
from src.train import TrainKerasModel
from tests.resources.test_data import (
    test_hyperparameter_space, test_simple_train_hyperparameters)
import os
import tensorflow as tf
import shutil
package_dir = os.path.dirname(os.path.abspath(__file__))
pipeline = Pipeline(
    tfrecords_filenames=os.path.join(
        package_dir, "resources", "test_data.tfrecord"))
train_keras_model = TrainKerasModel(pipeline=pipeline)


class TestTrainKerasModel:

    def test_simple_train(self):
        model = train_keras_model.simple_train(
            test_simple_train_hyperparameters)
        assert model is not None

    def test_get_best_model(self):
        test_hyperparameter_space.update(
            {"tfrecords_filenames": os.path.join(
                package_dir, "resources", "test_data.tfrecord")})
        tuned_model = train_keras_model.get_best_model(
            hyperparameter_space=test_hyperparameter_space,
            num_samples=1)
        print(tuned_model)
        assert tuned_model is not None
Exemplo n.º 10
0
from src.pipeline import Pipeline
import OpenGL.GL as gl
from src.shape import Shape
import numpy

print("Numpy: {:s}".format(numpy.__version__))

# fg = FrameGrabber()
shape = Shape()
pipeline = Pipeline(shape, None)
pipeline.loadShaderFile('shaders/diffuse.vert', gl.GL_VERTEX_SHADER)
pipeline.loadShaderFile('shaders/diffuse.frag', gl.GL_FRAGMENT_SHADER)
pipeline.initGl()
pipeline.sendData()


pipeline.run()

# fg.finish()
Exemplo n.º 11
0
# matplotlib.use("Agg")
from sklearn.model_selection import train_test_split, KFold
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.linear_model import Lasso, LassoCV, Ridge, LinearRegression
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline as SKPipe
from sklearn.inspection import permutation_importance, plot_partial_dependence
from sklearn.model_selection import GridSearchCV
from src.pipeline import Pipeline
from src.helpers import plot_corr_matrix, scree_plot, plot_num_estimators_mse, gridsearch, pdplots, compare_default_models, pca_with_scree, feat_imp_plots, plot_oob_error
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from statsmodels.stats.outliers_influence import variance_inflation_factor

if __name__ == '__main__':
    df = Pipeline('s3://ajzcap3/spain_data.csv')
    us = Pipeline('data/us_data.csv')

    # make spanish data look more like us data
    drop_cols = [
        'Valencia_wind_speed', 'Madrid_temp', 'Madrid_wind_speed',
        'Seville_temp', 'Bilbao_temp', 'Bilbao_wind_speed', ' Barcelona_temp'
    ]

    for i in drop_cols:
        df.df.drop(i, inplace=True, axis=1)

    # Combine hydro
    df.df['conventional hydro'] = df.df[
        'generation hydro run-of-river and poundage'] + df.df[
            'generation hydro water reservoir']
Exemplo n.º 12
0
    def train(self):
        # TODO not relevant for paper but important

        X = self.load()

        x_train, x_test, y_train, y_test, indices_train, indices_test = \
            train_test_split(
                X['data'], X['target'], range(0, len(X['data'])), test_size=0.2, random_state=42)
        print('data loaded')

        # order of labels in `target_names` can be different from `categories`
        target_names = X['target_names']

        def size_mb(docs):
            return sum(len(s.encode('utf-8')) for s in docs) / 1e6

        data_train_size_mb = size_mb(x_train)
        data_test_size_mb = size_mb(x_test)

        print("%d documents - %0.3fMB (training set)" %
              (len(x_train), data_train_size_mb))
        print("%d documents - %0.3fMB (test set)" %
              (len(x_test), data_test_size_mb))
        print("%d categories" % len(target_names))
        print()

        print(
            "Extracting features from the training data using a sparse vectorizer"
        )
        t0 = time()
        if False:
            vectorizer = HashingVectorizer(stop_words='english',
                                           alternate_sign=False,
                                           n_features=2**16)
            X_train = vectorizer.transform(x_train)
        else:
            vectorizer = TfidfVectorizer(sublinear_tf=True,
                                         max_df=0.5,
                                         stop_words='english')
            X_train = vectorizer.fit_transform(x_train)
        duration = time() - t0
        print("done in %fs at %0.3fMB/s" %
              (duration, data_train_size_mb / duration))
        print("n_samples: %d, n_features: %d" % X_train.shape)
        print()

        print(
            "Extracting features from the test data using the same vectorizer")
        t0 = time()
        X_test = vectorizer.transform(x_test)
        duration = time() - t0
        print("done in %fs at %0.3fMB/s" %
              (duration, data_test_size_mb / duration))
        print("n_samples: %d, n_features: %d" % X_test.shape)
        print()

        # mapping from integer feature name to original token string
        if False:
            feature_names = None
        else:
            feature_names = vectorizer.get_feature_names()

        if feature_names:
            feature_names = np.asarray(feature_names)

        def trim(s):
            """Trim string to fit on terminal (assuming 80-column display)"""
            return s if len(s) <= 80 else s[:77] + "..."

        # #############################################################################
        # Benchmark classifiers
        def benchmark(clf):
            print('_' * 80)
            print("Training: ")
            print(clf)
            t0 = time()
            clf.fit(X_train, y_train)
            train_time = time() - t0
            print("train time: %0.3fs" % train_time)

            t0 = time()
            pred = clf.predict(X_test)
            test_time = time() - t0
            print("test time:  %0.3fs" % test_time)

            score = metrics.accuracy_score(y_test, pred)
            print("accuracy:   %0.3f" % score)

            if hasattr(clf, 'coef_'):
                print("dimensionality: %d" % clf.coef_.shape[1])
                print("density: %f" % density(clf.coef_))

                if False and feature_names is not None:
                    print("top 10 keywords per class:")
                    for i, label in enumerate(target_names):
                        top10 = np.argsort(clf.coef_[i])[-10:]
                        print(
                            trim("%s: %s" %
                                 (label, " ".join(feature_names[top10]))))
                print()

            print("classification report:")
            print(
                metrics.classification_report(y_test,
                                              pred,
                                              target_names=target_names))

            print("confusion matrix:")
            print(metrics.confusion_matrix(y_test, pred))

            print()
            clf_descr = str(clf).split('(')[0]
            return clf_descr, score, train_time, test_time

        results = []
        for clf, name in ((RidgeClassifier(tol=1e-2,
                                           solver="lsqr"), "Ridge Classifier"),
                          (Perceptron(n_iter=50), "Perceptron"),
                          (PassiveAggressiveClassifier(n_iter=50),
                           "Passive-Aggressive"),
                          (KNeighborsClassifier(n_neighbors=10),
                           "kNN"), (RandomForestClassifier(n_estimators=100),
                                    "Random forest")):
            print('=' * 80)
            print(name)
            results.append(benchmark(clf))

        for penalty in ["l2", "l1"]:
            print('=' * 80)
            print("%s penalty" % penalty.upper())
            # Train Liblinear model
            results.append(
                benchmark(LinearSVC(penalty=penalty, dual=False, tol=1e-3)))

            # Train SGD model
            results.append(
                benchmark(
                    SGDClassifier(alpha=.0001, n_iter=50, penalty=penalty)))

        # Train SGD with Elastic Net penalty
        print('=' * 80)
        print("Elastic-Net penalty")
        results.append(
            benchmark(
                SGDClassifier(alpha=.0001, n_iter=50, penalty="elasticnet")))

        # Train NearestCentroid without threshold
        print('=' * 80)
        print("NearestCentroid (aka Rocchio classifier)")
        results.append(benchmark(NearestCentroid()))

        # Train sparse Naive Bayes classifiers
        print('=' * 80)
        print("Naive Bayes")
        results.append(benchmark(MultinomialNB(alpha=.01)))
        results.append(benchmark(BernoulliNB(alpha=.01)))

        print('=' * 80)
        print("LinearSVC with L1-based feature selection")
        # The smaller C, the stronger the regularization.
        # The more regularization, the more sparsity.
        results.append(
            benchmark(
                Pipeline([('feature_selection',
                           SelectFromModel(
                               LinearSVC(penalty="l1", dual=False, tol=1e-3))),
                          ('classification', LinearSVC(penalty="l2"))])))

        # make some plots

        indices = np.arange(len(results))

        results = [[x[i] for x in results] for i in range(4)]

        clf_names, score, training_time, test_time = results
        training_time = np.array(training_time) / np.max(training_time)
        test_time = np.array(test_time) / np.max(test_time)

        plt.figure(figsize=(12, 8))
        plt.title("Score")
        plt.barh(indices, score, .2, label="score", color='navy')
        plt.barh(indices + .3,
                 training_time,
                 .2,
                 label="training time",
                 color='c')
        plt.barh(indices + .6,
                 test_time,
                 .2,
                 label="test time",
                 color='darkorange')
        plt.yticks(())
        plt.legend(loc='best')
        plt.subplots_adjust(left=.25)
        plt.subplots_adjust(top=.95)
        plt.subplots_adjust(bottom=.05)

        for i, c in zip(indices, clf_names):
            plt.text(-.3, i, c)

        plt.show()

        return self._container
Exemplo n.º 13
0
sys.path.append('../')

from src.pipeline import Pipeline
from src.arduino_client import Arduino
from src.server import Server

while True:
    protocol = 'http'
    host = 'localhost'
    pipeline = 'piarm'
    gocd = Server(protocol, host, pipeline)
    arduino = Arduino()

    instance_request = requests.get(gocd.history_url)

    pipeline = Pipeline()

    state_request = requests.get(
        gocd.pipeline_url + pipeline.get_instance(instance_request.content))

    pipeline_status = pipeline.get_status(state_request.content)

    print pipeline_status

    if pipeline_status == 'Passed':
        passed = 'b'
        arduino.send(passed)
    else:
        failed = 'a'
        arduino.send(failed)