Пример #1
0
def test_serialize_deserialize():
    with yield_pipeline_execution_context(PipelineDefinition([]), {},
                                          RunConfig()) as context:
        with tempfile.NamedTemporaryFile() as fd:
            serialize_to_file(context, PickleSerializationStrategy(), 'foo',
                              fd.name)
            assert deserialize_from_file(context,
                                         PickleSerializationStrategy(),
                                         fd.name) == 'foo'
Пример #2
0
def test_gcs_object_store(gcs_bucket):
    object_store = GCSObjectStore(gcs_bucket)

    test_str = b"this is a test"
    file_obj = BytesIO()
    file_obj.write(test_str)
    file_obj.seek(0)

    serialization_strategy = PickleSerializationStrategy()

    key = "test-file-%s" % uuid.uuid4().hex
    object_store.set_object(key, file_obj, serialization_strategy)

    assert object_store.has_object(key)
    assert object_store.get_object(
        key, serialization_strategy)[0].read() == test_str

    other_key = "test-file-%s" % uuid.uuid4().hex
    object_store.cp_object(key, other_key)
    assert object_store.has_object(other_key)

    object_store.rm_object(key)
    object_store.rm_object(other_key)
    assert not object_store.has_object(key)
    assert not object_store.has_object(other_key)
Пример #3
0
def load_parameter(input_name, input_value):
    check.invariant(MANAGER_FOR_NOTEBOOK_INSTANCE.populated_by_papermill,
                    'populated_by_papermill')
    if MANAGER_FOR_NOTEBOOK_INSTANCE.solid_def is None:
        check.invariant(
            MANAGER_FOR_NOTEBOOK_INSTANCE.input_name_type_dict is not None,
            'input_name_type_dict must not be None if solid_def is not defined!',
        )
        input_name_type_dict = MANAGER_FOR_NOTEBOOK_INSTANCE.input_name_type_dict
        runtime_type_enum = input_name_type_dict[input_name]
        if (runtime_type_enum == SerializableRuntimeType.SCALAR
                or runtime_type_enum
                == SerializableRuntimeType.JSON_SERIALIZABLE):
            return input_value
        elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE:
            return deserialize_from_file(MANAGER_FOR_NOTEBOOK_INSTANCE.context,
                                         PickleSerializationStrategy(),
                                         input_value)
        else:
            raise DagstermillError(
                "loading parameter {input_name} resulted in an error".format(
                    input_name=input_name))
    else:
        solid_def = MANAGER_FOR_NOTEBOOK_INSTANCE.solid_def
        input_def = solid_def.input_def_named(input_name)
        return read_value(input_def.runtime_type, input_value)
Пример #4
0
    def yield_result(self, value, output_name):
        if not self.populated_by_papermill:
            return value

        if self.solid_def is None:
            if output_name not in self.output_name_type_dict:
                raise DagstermillError(
                    'Solid {solid_name} does not have output named {output_name}'
                    .format(solid_name=self.solid_def_name,
                            output_name=output_name))
            runtime_type_enum = self.output_name_type_dict[output_name]
            if runtime_type_enum == SerializableRuntimeType.SCALAR:
                pm.record(output_name, value)
            elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable(
                    value):
                pm.record(output_name, value)
            elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE:
                out_file = os.path.join(self.marshal_dir,
                                        'output-{}'.format(output_name))
                serialize_to_file(
                    MANAGER_FOR_NOTEBOOK_INSTANCE.context,
                    PickleSerializationStrategy(),
                    value,
                    out_file,
                )
                pm.record(output_name, out_file)
            else:
                raise DagstermillError(
                    'Output Definition for output {output_name} requires repo registration '
                    'since it has a complex serialization format'.format(
                        output_name=output_name))
        else:
            if not self.solid_def.has_output(output_name):
                raise DagstermillError(
                    'Solid {solid_name} does not have output named {output_name}'
                    .format(solid_name=self.solid_def.name,
                            output_name=output_name))

            runtime_type = self.solid_def.output_def_named(
                output_name).runtime_type

            out_file = os.path.join(self.marshal_dir,
                                    'output-{}'.format(output_name))
            pm.record(output_name, write_value(runtime_type, value, out_file))
Пример #5
0
    def yield_result(self, value, output_name='result'):
        if not self.populated_by_papermill:
            return value

        if self.solid_def is None:
            if output_name not in self.output_name_type_dict:
                raise DagstermillError(
                    'Solid {solid_name} does not have output named {output_name}'
                    .format(solid_name=self.solid_def_name,
                            output_name=output_name))
            runtime_type_enum = self.output_name_type_dict[output_name]
            if runtime_type_enum == SerializableRuntimeType.SCALAR:
                scrapbook.glue(output_name, value)
            elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable(
                    value):
                scrapbook.glue(output_name, value)
            elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE:
                out_file = os.path.join(self.marshal_dir,
                                        'output-{}'.format(output_name))
                PickleSerializationStrategy().serialize_to_file(
                    value, out_file)
                scrapbook.glue(output_name, out_file)
            else:
                raise DagstermillError(
                    # Discuss this in the docs and improve error message
                    # https://github.com/dagster-io/dagster/issues/1275
                    # https://github.com/dagster-io/dagster/issues/1276
                    'Output Definition for output {output_name} requires repo registration '
                    'since it has a complex serialization format'.format(
                        output_name=output_name))
        else:
            if not self.solid_def.has_output(output_name):
                raise DagstermillError(
                    'Solid {solid_name} does not have output named {output_name}'
                    .format(solid_name=self.solid_def.name,
                            output_name=output_name))

            runtime_type = self.solid_def.output_def_named(
                output_name).runtime_type

            out_file = os.path.join(self.marshal_dir,
                                    'output-{}'.format(output_name))
            scrapbook.glue(output_name,
                           write_value(runtime_type, value, out_file))
Пример #6
0
def test_gcs_object_store(gcs_bucket):
    s = GCSObjectStore(gcs_bucket)

    test_str = b'this is a test'
    file_obj = BytesIO()
    file_obj.write(test_str)
    file_obj.seek(0)

    ss = PickleSerializationStrategy()

    key = 'test-file-%s' % uuid.uuid4().hex
    s.set_object(key, file_obj, ss)

    assert s.has_object(key)
    assert s.get_object(key, ss).obj.read() == test_str

    other_key = 'test-file-%s' % uuid.uuid4().hex
    s.cp_object(key, other_key)
    assert s.has_object(other_key)

    s.rm_object(key)
    s.rm_object(other_key)
    assert not s.has_object(key)
    assert not s.has_object(other_key)
Пример #7
0
def test_serialization_strategy():
    serialization_strategy = PickleSerializationStrategy()
    with tempfile.NamedTemporaryFile() as fd:
        serialization_strategy.serialize_to_file('foo', fd.name)
        assert serialization_strategy.deserialize_from_file(fd.name) == 'foo'
Пример #8
0
            Tuple[str, str]: The fully qualified key of the source object and the fully qualified
                destination key.
        """

    @abstractmethod
    def uri_for_key(self, key, protocol=None):
        """Implement this method to get a URI for a key in the object store.

        Should return a URI as a string."""

    def key_for_paths(self, path_fragments):
        """Joins path fragments into a key using the object-store specific path separator."""
        return self.sep.join(path_fragments)


DEFAULT_SERIALIZATION_STRATEGY = PickleSerializationStrategy()


class InMemoryObjectStore(ObjectStore):
    def __init__(self):
        self.values = {}
        super(InMemoryObjectStore, self).__init__(name="memory")

    def set_object(self, key, obj, serialization_strategy=None):
        check.str_param(key, "key")
        self.values[key] = obj
        return key

    def get_object(self,
                   key,
                   serialization_strategy=DEFAULT_SERIALIZATION_STRATEGY):
Пример #9
0
def test_serialization_strategy():
    serialization_strategy = PickleSerializationStrategy()
    with safe_tempfile_path() as tempfile_path:
        serialization_strategy.serialize_to_file("foo", tempfile_path)
        assert serialization_strategy.deserialize_from_file(
            tempfile_path) == "foo"