Пример #1
0
def test_wrapped_nested_type():
    '''Tests to make sure that nested NamedTuple messages are unpacked correctly'''
    Inner = create_namedtuple('Inner', [('x', int), ('y', int), ('z', int)])

    N1 = create_namedtuple('N1', [('dict_data', Dict[str, int])])
    N2 = create_namedtuple('N2', [('n1s', List[N1])])

    def f1(x: List[Inner]) -> Inner:
        '''Returns the component-wise sum of a sequence of Inner'''
        sums = np.vstack(x).sum(axis=0)
        return Inner(*sums)

    def f2(n2_in: N2) -> N2:
        '''Returns another N2 type using data from the input N2 type'''
        n1_in = n2_in.n1s[0]
        dict_data = dict(**n1_in.dict_data)  # shallow copy
        dict_data['b'] = 2
        n1_out = N1(dict_data=dict_data)
        n2_out = N2(n1s=[n1_out, n1_out])
        return n2_out

    f1_in = ([
        Inner(1, 2, 3),
    ] * 5, )
    f1_out = (5, 10, 15)

    n1 = N1(dict_data={'a': 1})
    n1_out = N1(dict_data={'a': 1, 'b': 2})
    f2_in = N2(n1s=[n1])
    f2_out = N2(n1s=[n1_out, n1_out])

    _generic_test(f1, f1_in, f1_out)
    _generic_test(f2, f2_in, f2_out, skip=_dict_skips)
def model_create_pipeline(formatter, clf):
    formatter.set_params(classifier=clf)
    tag_type = []
    for item in formatter.output_types_:
        for k in item:
            tag_type.append((k, item[k]))
    name_in = "ImageTag"
    ImageTag = create_namedtuple(name_in, tag_type)
    name_multiple_in = name_in + "s"
    ImageTagSet = create_namedtuple(name_in + "Set",
                                    [(name_multiple_in, List[ImageTag])])

    def predict_class(val_wrapped: ImageTagSet) -> ImageTagSet:
        df = pd.DataFrame(getattr(val_wrapped, name_multiple_in),
                          columns=ImageTag._fields)
        tags_df = formatter.predict(df)
        tags_parts = tags_df.to_dict('split')
        tags_list = [ImageTag(*r) for r in tags_parts['data']]
        print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".
              format("classify", MODEL_NAME, VERSION, len(df), ImageTagSet,
                     len(tags_df), ImageTagSet))
        return ImageTagSet(tags_list)

    package_path = path.dirname(path.realpath(__file__))
    return Model(classify=predict_class), Requirements(packages=[package_path],
                                                       reqs=[pd, np, sklearn])
Пример #3
0
def model_create_pipeline(formatter, clf):
    from acumos.modeling import Model, List, create_namedtuple
    from acumos.session import Requirements
    from os import path
    from image_mood_classifier._version import MODEL_NAME, __version__ as VERSION

    # add classifier
    formatter.set_params(classifier=clf)

    # create a dataframe and image set
    # ImageSet = create_dataframe("ImageSet", ImageDecoder.generate_input_dataframe())
    # TODO: replace with more friendly dataframe operation when it supoprts strings...
    tag_type = []
    print("=================formatter.output_types_:%s",formatter.output_types_)
    for item in formatter.output_types_:
        print("++++++++++++++++++++++item:%s",item)
        for k in item:
            print("======================k:%s",k)
            tag_type.append((k, item[k]))
            print("=================tag_type:%s",tag_type)
    name_in = "ImageTag"
    ImageTag = create_namedtuple(name_in, tag_type)
    print("==========================ImageTag:%s",ImageTag)
    name_multiple_in = name_in + "s"
    print("==========================name_multiple_in:%s",name_multiple_in)
    ImageTagSet = create_namedtuple(name_in + "Set", [(name_multiple_in, List[ImageTag])])
    print("========================ImageTagSet:%s",ImageTagSet)
    print("=======================ImageTag._fields:%s",ImageTag._fields)

    def predict_class(val_wrapped: ImageTagSet) -> ImageTagSet:
        '''Returns an array of float predictions'''
        # NOTE: we don't have a named output type, so need to match 'value' to proto output
        # print("-===== input -===== ")
        # print(input_set)
        df = pd.DataFrame(getattr(val_wrapped, name_multiple_in), columns=ImageTag._fields)
        # print("-===== df -===== ")
        # print(df)
        # print("-===== out df -===== ")
        tags_df = formatter.predict(df)
        # print(tags_df)
        tags_parts = tags_df.to_dict('split')
        # print("-===== out list -===== ")
        # print(output_set)
        tags_list = [ImageTag(*r) for r in tags_parts['data']]
        print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".format(
              "classify", MODEL_NAME, VERSION, len(df), ImageTagSet, len(tags_df), ImageTagSet))
        return ImageTagSet(tags_list)

    # compute path of this package to add it as a dependency
    package_path = path.dirname(path.realpath(__file__))
    return Model(classify=predict_class), Requirements(packages=[package_path], reqs=[pd, np, sklearn])
Пример #4
0
    def generate_model(self, CSV_filename, is_raw_data=False):
        from acumos.modeling import Model, List, create_namedtuple
        from acumos.session import Requirements
        from os import path
        import sklearn

        print(">> %s:  Loading raw features, training model" % CSV_filename)
        model = self.build_model_from_CSV(CSV_filename,
                                          is_raw_data=is_raw_data)
        print(">> %s:  Reload features, push to server" % CSV_filename)
        df = pd.read_csv(CSV_filename)[self.features]
        listVars = [(df.columns[i], df.dtypes[i].type)
                    for i in range(len(df.columns))]
        VmPredictorDataFrame = create_namedtuple('VmPredictorDataFrame',
                                                 listVars)

        def predict_metric(df: VmPredictorDataFrame) -> List[float]:
            '''Returns an array of float predictions'''
            X = np.column_stack(df)
            return model.predict(X)

        # compute path of this package to add it as a dependency
        package_path = path.dirname(path.realpath(__file__))
        return Model(classify=predict_metric), Requirements(
            packages=[package_path], reqs=[matplotlib, sklearn, np, pd])
def test_session_push_sklearn():
    '''Tests basic model pushing functionality with sklearn'''
    clear_jwt()

    with _patch_auth():
        with MockServer() as server:
            iris = load_iris()
            X = iris.data
            y = iris.target

            clf = RandomForestClassifier(random_state=0)
            clf.fit(X, y)

            columns = [
                'sepallength', 'sepalwidth', 'petallength', 'petalwidth'
            ]
            X_df = pd.DataFrame(X, columns=columns)

            DataFrame = create_dataframe('DataFrame', X_df)
            Predictions = create_namedtuple('Predictions',
                                            [('predictions', List[int])])

            def predict(df: DataFrame) -> Predictions:
                '''Predicts the class of iris'''
                X = np.column_stack(df)
                yhat = clf.predict(X)
                preds = Predictions(predictions=yhat)
                return preds

            model = Model(predict=predict)

            model_url, auth_url, _, _ = server.config
            s = AcumosSession(model_url, auth_url)
            s.push(model, name='sklearn_iris_push')
Пример #6
0
    def generate_model(self, file_list, data_out=None):
        from acumos.modeling import Model, List, create_namedtuple
        from acumos.session import Requirements
        from os import path
        import sklearn

        # Note:  all files in the list will be appended
        master_df, VM_list = self.preprocess_files(file_list)
        train_start, range_end = self.find_time_range(
            master_df)  # TBD:  allow user to specify start/stop dates
        train_stop = train_start + self.train_interval
        xmodel, train_data = self.train_timeslice_model(master_df,
                                                        VM_list,
                                                        train_start,
                                                        train_stop,
                                                        featfile=data_out)

        df = train_data[self.features]
        listColumns = list(df.columns)
        listVars = [(df.columns[i], type(df.ix[0][i]))
                    for i in range(len(listColumns))]
        VmPredictorDataFrame = create_namedtuple('VmPredictorDataFrame',
                                                 listVars)
        VmPredictorDataFrameSet = create_namedtuple(
            'VmPredictorDataFrameSet',
            [('frames', List[VmPredictorDataFrame])])
        type_out = List[float]

        def predict_metric(val_wrapped: VmPredictorDataFrameSet) -> type_out:
            '''Returns an array of float predictions'''
            df = pd.DataFrame(val_wrapped.frames, columns=listColumns)
            # df = pd.DataFrame(np.column_stack(val_wrapped), columns=val_wrapped._fields)  # numpy doesn't like binary
            predict_nd = xmodel.predict(df)  # return here is a nd-array
            predict_list = predict_nd.tolist()  # flatten to tag set
            # predict_list = type_out(list(predict_nd))  # flatten to tag set
            return predict_list

        # compute path of this package to add it as a dependency
        package_path = path.dirname(path.realpath(__file__))
        return Model(classify=predict_metric), Requirements(
            packages=[package_path], reqs=[sklearn, np, pd])
def test_session_push_keras():
    '''Tests basic model pushing functionality with keras'''
    clear_jwt()

    with _patch_auth():
        with MockServer() as server:
            iris = load_iris()
            X = iris.data
            y = pd.get_dummies(iris.target).values

            clf = Sequential()
            clf.add(Dense(3, input_dim=4, activation='relu'))
            clf.add(Dense(3, activation='softmax'))
            clf.compile(loss='categorical_crossentropy',
                        optimizer='adam',
                        metrics=['accuracy'])
            clf.fit(X, y)

            columns = [
                'sepallength', 'sepalwidth', 'petallength', 'petalwidth'
            ]
            X_df = pd.DataFrame(X, columns=columns)

            DataFrame = create_dataframe('DataFrame', X_df)
            Predictions = create_namedtuple('Predictions',
                                            [('predictions', List[int])])

            def predict(df: DataFrame) -> Predictions:
                '''Predicts the class of iris'''
                X = np.column_stack(df)
                yhat = clf.predict(X)
                preds = Predictions(predictions=yhat)
                return preds

            model = Model(predict=predict)

            model_url, auth_url, _, _ = server.config
            s = AcumosSession(model_url, auth_url)
            s.push(model, name='keras_iris_push')
Пример #8
0
def model_create_pipeline(path_model, path_label, top_n):
    from sklearn.pipeline import Pipeline
    import keras
    from image_classifier.keras_model.prediction_formatter import Formatter
    from image_classifier.keras_model.evaluate_image import Predictor
    from image_classifier.keras_model.image_decoder import ImageDecoder
    from acumos.modeling import Model, List, create_namedtuple
    from acumos.session import Requirements
    from os import path
    from _version import __version__

    # read dictionary to pass along to formatter class
    dict_classes = eval(open(path_label, 'r').read()) if path_label else None

    # we will create a hybrid keras/scikit pipeline because we need some preprocessing done
    #   within scikit that is not easily posisble with keras
    #
    # stages are as follows (the quoted section is the scikit pipeline name)
    #   #1 'decode' - input+reshape - decode incoming image with MIME+BINARY as inputs
    #   #2 'predict' - prediction - input the transformed image to the prediction method
    #   #3 'format' - predict transform - post-process the predictions into sorted prediction classes
    # see this page for hints about what happens...
    #   https://stackoverflow.com/questions/37984304/how-to-save-a-scikit-learn-pipline-with-keras-regressor-inside-to-disk
    #
    # NOTE: the last object is an "estimator" type so that we can call "predict", as required by the
    #       acumos-based wrapper functionality
    pipeline = Pipeline([
        ('decode', ImageDecoder()),
        ('predict', Predictor(path_model=path_model)),
        ('format', Formatter(dict_classes, top_n))
    ])

    # create a dataframe and image set
    # ImageSet = create_dataframe("ImageSet", ImageDecoder.generate_input_dataframe())
    # TODO: replace with more friendly dataframe operation when it supoprts strings...
    df = ImageDecoder.generate_input_dataframe()
    image_type = tuple(zip(df.columns, ImageDecoder.generate_input_types()))
    name_in = "Image"
    input_image = create_namedtuple(name_in, image_type)

    # output of clasifier, list of tags
    df = Formatter.generate_output_dataframe()
    tag_result = tuple(zip(df.columns, Formatter.generate_output_types()))
    name_out = "ImageTag"
    ImageTag = create_namedtuple(name_out, tag_result)
    output_set = create_namedtuple(name_out + "Set", [(name_out + "s", List[ImageTag])])

    def predict_class(val_wrapped: input_image) -> output_set:
        '''Returns an array of float predictions'''
        # NOTE: we don't have a named output type, so need to match 'value' to proto output
        # print("-===== input -===== ")
        # print(input_set)
        df = pd.DataFrame([val_wrapped], columns=input_image._fields)
        # print("-===== df -===== ")
        # print(df)
        # print("-===== out df -===== ")
        tags_df = pipeline.predict(df)
        # print(tags_df)
        tags_parts = tags_df.to_dict('split')
        # print("-===== out list -===== ")
        # print(output_set)
        tags_list = [ImageTag(*r) for r in tags_parts['data']]
        print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".format(
              "classify", MODEL_NAME, __version__, len(df), input_image, len(tags_df), output_set))
        return output_set(tags_list)

    # compute path of this package to add it as a dependency
    package_path = path.dirname(path.realpath(__file__))
    return Model(classify=predict_class), Requirements(packages=[package_path], reqs=[pd, np, keras, 'tensorflow', 'Pillow'])
Пример #9
0
def _load_namedtuple(name, field_types):
    '''Workaround for dill NamedTuple serialization bug'''
    return create_namedtuple(name, [(k, v) for k, v in field_types.items()])
from acumos.session import AcumosSession

if __name__ == '__main__':
    '''Main'''

    iris = load_iris()
    X = iris.data
    y = iris.target

    clf = RandomForestClassifier(random_state=0)
    clf.fit(X, y)

    columns = ['sepallength', 'sepalwidth', 'petallength', 'petalwidth']
    X_df = pd.DataFrame(X, columns=columns)

    DataFrame = create_dataframe('DataFrame', X_df)
    Predictions = create_namedtuple('Predictions',
                                    [('predictions', List[int])])

    def predict(df: DataFrame) -> Predictions:
        '''Predicts the class of iris'''
        X = np.column_stack(df)
        yhat = clf.predict(X)
        preds = Predictions(predictions=yhat)
        return preds

    model = Model(transform=predict)

    s = AcumosSession(None)
    s.dump(model, 'model', '.')
Пример #11
0
print(train_cols)
np.set_printoptions(precision=2)  #2 decimal places
np.set_printoptions(suppress=True)  #remove scientific notation

redfin = RedfinAcumosModel()

model_cols = redfin.get_formatted_test_cols(train)
print(model_cols)

# items is the model_cols list from the previous slide
items = [('baths', float), ('beds', float), ('square_feet', float),
         ('property_type', str), ('year_built', float), ('lot_size', float),
         ('hoa_per_month', float), ('days_on_market', float),
         ('location', str), ('state', str), ('city', str)]

HouseDataFrame = create_namedtuple('HouseDataFrame', items)

# here, an appropriate NamedTuple type is inferred from a pandas DataFrame
# HouseDataFrame = create_dataframe('HouseDataFrame', X_df)
print(HouseDataFrame.__doc__)

df = redfin.process_data(train, True)
redfin.df = df
redfin.df.info()


def predict(data):
    test_df = redfin.process_data(data)
    # Train by merging locations/columns found in the train dataframe.
    test_df = redfin.match_train(test_df)
    redfin.train_model()
Пример #12
0
# See the License for the specific language governing permissions and
# limitations under the License.
# ===============LICENSE_END=========================================================
"""
Provides an image Acumos model example

This model returns metadata about input images
"""
import io

import PIL

from acumos.modeling import Model, create_namedtuple
from acumos.session import AcumosSession

ImageShape = create_namedtuple('ImageShape', [('width', int), ('height', int)])


def get_format(data: bytes) -> str:
    '''Returns the format of an image'''
    buffer = io.BytesIO(data)
    img = PIL.Image.open(buffer)
    return img.format


def get_shape(data: bytes) -> ImageShape:
    '''Returns the width and height of an image'''
    buffer = io.BytesIO(data)
    img = PIL.Image.open(buffer)
    shape = ImageShape(width=img.width, height=img.height)
    return shape
Пример #13
0
def model_create_pipeline(transformer, funcName, inputIsSet, outputIsSet):
    from acumos.session import Requirements
    from acumos.modeling import Model, List, create_namedtuple
    from face_privacy_filter._version import MODEL_NAME, __version__ as VERSION
    import sklearn
    import cv2
    from os import path

    # derive the input type from the transformer
    input_type, type_name = transformer._type_in  # it looked like this [('test', int), ('tag', str)]
    type_in = create_namedtuple(type_name, input_type)
    input_set = type_in
    name_multiple_in = type_name
    if inputIsSet:
        name_multiple_in = type_name + "s"
        input_set = create_namedtuple(type_name + "Set",
                                      [(name_multiple_in, List[type_in])])

    # derive the output type from the transformer
    output_type, type_name = transformer._type_out
    type_out = create_namedtuple(type_name, output_type)
    output_set = type_out
    if outputIsSet:
        name_multiple_out = type_name + "s"
        output_set = create_namedtuple(type_name + "Set",
                                       [(name_multiple_out, List[type_out])])

    def transform(val_wrapped: input_set) -> output_set:
        '''Transform from image or detection and return score or image'''
        # print("-===== input -===== ")
        # print(input_set)
        if inputIsSet:
            df = pd.DataFrame(getattr(val_wrapped, name_multiple_in),
                              columns=type_in._fields)
        else:
            df = pd.DataFrame([val_wrapped], columns=type_in._fields)
        # print("-===== df -===== ")
        # print(df)
        result_df = transformer.predict(df)
        # print("-===== out df -===== ")
        # print(result_df)
        # print(result_parts)
        result_parts = result_df.to_dict('split')
        print("[{} - {}:{}]: Input {} row(s) ({}), output {} row(s) ({}))".
              format("classify", MODEL_NAME, VERSION, len(df), type_in,
                     len(result_df), output_set))
        output_obj = []
        if len(df):
            if outputIsSet:
                output_obj = output_set(
                    [type_out(*r) for r in result_parts['data']])
            else:
                output_obj = output_set(*result_parts['data'][0])
        # print("-===== out list -===== ")
        # print(output_obj)
        return output_obj

    # compute path of this package to add it as a dependency
    package_path = path.dirname(path.realpath(__file__))
    objModelDeclare = {}
    objModelDeclare[funcName] = transform
    # add the model dependency manually because of the way we constructed the package;
    # the opencv-python/cv2 dependency is not picked up automagically
    return Model(**objModelDeclare), Requirements(
        packages=[package_path],
        reqs=[pd, np, sklearn, 'opencv-python'],
        req_map={cv2: 'opencv-python'})
# Upper half of the faces
X_train = train[:, :(n_pixels + 1) // 2]
# Lower half of the faces
y_train = train[:, n_pixels // 2:]
X_test = test[:, :(n_pixels + 1) // 2]
y_test = test[:, n_pixels // 2:]

knn = KNeighborsRegressor()
knn.fit(X_train, y_train)

# =============================================================================
# Acumos specific code
# =============================================================================

# represents a single "flattened" [1 x n] image array
FlatImage = create_namedtuple('FlatImage', [('image', List[float])])

# represents a collection of flattened image arrays
FlatImages = create_namedtuple('FlatImages', [('images', List[FlatImage])])


def complete_faces(images: FlatImages) -> FlatImages:
    '''Predicts the bottom half of each input image'''
    X = np.vstack(images).squeeze(
    )  # creates an [m x n] matrixs with m images and n pixels
    yhat = knn.predict(X)
    return FlatImages([FlatImage(row) for row in yhat])


model = Model(complete_faces=complete_faces)
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier

from acumos.modeling import Model, List, create_namedtuple
from acumos.session import AcumosSession


iris = load_iris()
X = iris.data
y = iris.target

clf = RandomForestClassifier(random_state=0)
clf.fit(X, y)

IrisDataFrame = create_namedtuple('IrisDataFrame', [('sepal_length', List[float]),
                                                    ('sepal_width', List[float]),
                                                    ('petal_length', List[float]),
                                                    ('petal_width', List[float])])

# =============================================================================
# # starting in Python 3.6, you can alternatively use this simpler syntax:
#
# from acumos.modeling import NamedTuple
#
# class IrisDataFrame(NamedTuple):
#     '''DataFrame corresponding to the Iris dataset'''
#     sepal_length: List[float]
#     sepal_width: List[float]
#     petal_length: List[float]
#     petal_width: List[float]
# =============================================================================