Пример #1
0
    def parse_return_val(hfid, val, data_context):
        """

        Args:
            hfid (str): UUID
            val (object): A scalar, dict, tuple, list, dataframe
            data_context (DataContext): The data context into which to place this value

        Returns:
            (presentation, frames[])

        """

        frames = []

        managed_path = os.path.join(data_context.get_object_dir(), hfid)

        if val is None:
            presentation = hyperframe_pb2.HF

        elif isinstance(val, HyperFrameRecord):
            presentation = hyperframe_pb2.HF
            frames.append(FrameRecord.make_hframe_frame(hfid, pipe.pipeline_id(), [val]))

        elif isinstance(val, np.ndarray) or isinstance(val, list):
            presentation = hyperframe_pb2.TENSOR
            if isinstance(val, list):
                val = np.array(val)
            frames.append(DataContext.convert_serieslike2frame(hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path))

        elif isinstance(val, tuple):
            presentation = hyperframe_pb2.ROW
            for i, _ in enumerate(tuple):
                frames.append(DataContext.convert_serieslike2frame(hfid, common.DEFAULT_FRAME_NAME + ':{}'.format(i), val, managed_path))

        elif isinstance(val, dict):
            presentation = hyperframe_pb2.ROW
            for k, v in val.iteritems():
                if not isinstance(v, (list, tuple, pd.core.series.Series, np.ndarray, collections.Sequence)):
                    frames.append(DataContext.convert_scalar2frame(hfid, k, v, managed_path))
                else:
                    assert isinstance(v, (list, tuple, pd.core.series.Series, np.ndarray, collections.Sequence))
                    frames.append(DataContext.convert_serieslike2frame(hfid, k, v, managed_path))

        elif isinstance(val, pd.DataFrame):
            presentation = hyperframe_pb2.DF
            frames.extend(DataContext.convert_df2frames(hfid, val, managed_path))

        else:
            presentation = hyperframe_pb2.SCALAR
            frames.append(DataContext.convert_scalar2frame(hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path))

        return presentation, frames
Пример #2
0
    def run(self):
        """ Convert an existing file, csv, or dir to the bundle
        """

        bundle_processing_name, add_hf_uuid = self.bundle_outputs()[
            0]  # @UnusedVariable
        bundle_hframe_file = self.output()[PipeBase.HFRAME].path
        managed_path = os.path.dirname(bundle_hframe_file)

        if os.path.isdir(self.input_path):
            """ With a directory, add all files under one special frame """
            abs_input_path = os.path.abspath(self.input_path)
            files = [
                urlparse.urljoin('file:', os.path.join(abs_input_path, f))
                for f in os.listdir(abs_input_path)
            ]
            file_set = DataContext.copy_in_files(files, managed_path)
            frames = [
                FrameRecord.make_link_frame(add_hf_uuid, constants.FILE,
                                            file_set, managed_path),
            ]
            presentation = hyperframe_pb2.TENSOR
        elif os.path.isfile(self.input_path):
            if str(self.input_path).endswith('.csv') or str(
                    self.input_path).endswith('.tsv'):
                bundle_df = pd.read_csv(
                    self.input_path,
                    sep=None)  # sep=None means python parse engine detects sep
                frames = DataContext.convert_df2frames(
                    add_hf_uuid, bundle_df, managed_path=managed_path)
                presentation = hyperframe_pb2.DF
            else:
                """ Other kinds of file """
                abs_input_path = os.path.abspath(self.input_path)
                files = [urlparse.urljoin('file:', abs_input_path)]
                file_set = DataContext.copy_in_files(files, managed_path)
                frames = [
                    FrameRecord.make_link_frame(add_hf_uuid, constants.FILE,
                                                file_set, managed_path),
                ]
                presentation = hyperframe_pb2.TENSOR
        else:
            raise RuntimeError('Unable to find input file or path {}'.format(
                self.input_path))
        """ Make a single HyperFrame output for an add """

        if 'taskname' in self.tags or 'presentable' in self.tags:
            print "Unable to add bundle {}: tags contain reserved keys 'taskname' or 'presentable'".format(
                self.output_bundle)
            # Todo: Delete temporary bundle here
            return

        tags = {'taskname': 'add', 'presentable': 'True', 'root_task': 'True'}

        tags.update(self.tags)

        task_hfr = self.make_hframe(frames,
                                    add_hf_uuid,
                                    self.bundle_inputs(),
                                    self.pipeline_id(),
                                    self.pipe_id(),
                                    self,
                                    tags=tags,
                                    presentation=presentation)

        self.pfs.get_curr_context().write_hframe(task_hfr)
Пример #3
0
    def parse_return_val(hfid, val, data_context):
        """
        Interpret the return values and create an HFrame to wrap them.
        This means setting the correct presentation bit in the HFrame so that
        we call downstream tasks with parameters as the author intended.

        POLICY / NOTE:  An non-HF output is a Presentable.
        NOTE: For now, a task output is *always* presentable.
        NOTE: No other code should set presentation in a HyperFrame.

        The mirror to this function (that unpacks a presentable is disdat.fs.present_hfr()

        Args:
            hfid (str): UUID
            val (object): A scalar, dict, tuple, list, dataframe
            data_context (DataContext): The data context into which to place this value

        Returns:
            (presentation, frames[])

        """

        possible_scalar_types = (int, float, str, bool, np.bool_, np.int8,
                                 np.int16, np.int32, np.int64, np.uint8,
                                 np.uint16, np.uint32, np.uint64, np.float16,
                                 np.float32, np.float64, six.binary_type,
                                 six.text_type, np.unicode_, np.string_)

        frames = []

        managed_path = os.path.join(data_context.get_object_dir(), hfid)

        if val is None:
            """ None's stored as json.dumps([None]) or '[null]' """
            presentation = hyperframe_pb2.JSON
            frames.append(
                DataContext.convert_scalar2frame(
                    hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path))

        elif isinstance(val, HyperFrameRecord):
            presentation = hyperframe_pb2.HF
            frames.append(
                FrameRecord.make_hframe_frame(hfid, pipe.pipeline_id(), [val]))

        elif isinstance(val, np.ndarray) or isinstance(val, list):
            presentation = hyperframe_pb2.TENSOR
            if isinstance(val, list):
                val = np.array(val)
            frames.append(
                DataContext.convert_serieslike2frame(
                    hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path))

        elif isinstance(val, tuple):
            presentation = hyperframe_pb2.ROW
            for i, _ in enumerate(val):
                frames.append(
                    DataContext.convert_serieslike2frame(
                        hfid, common.DEFAULT_FRAME_NAME + ':{}'.format(i), val,
                        managed_path))

        elif isinstance(val, dict):
            presentation = hyperframe_pb2.ROW
            for k, v in val.items():
                if not isinstance(v, (list, tuple, pd.core.series.Series,
                                      np.ndarray, collections.Sequence)):
                    # assuming this is a scalar
                    assert isinstance(
                        v, possible_scalar_types
                    ), 'Disdat requires dictionary values to be one of {} not {}'.format(
                        possible_scalar_types, type(v))
                    frames.append(
                        DataContext.convert_scalar2frame(
                            hfid, k, v, managed_path))
                else:
                    assert isinstance(v, (list, tuple, pd.core.series.Series,
                                          np.ndarray, collections.Sequence))
                    frames.append(
                        DataContext.convert_serieslike2frame(
                            hfid, k, v, managed_path))

        elif isinstance(val, pd.DataFrame):
            presentation = hyperframe_pb2.DF
            frames.extend(
                DataContext.convert_df2frames(hfid, val, managed_path))

        else:
            presentation = hyperframe_pb2.SCALAR
            frames.append(
                DataContext.convert_scalar2frame(
                    hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path))

        return presentation, frames
Пример #4
0
    def parse_pipe_return_val(self, hfid, val, human_name=None):
        """

        Interpret the return values and create an HFrame to wrap them.
        This means setting the correct presentation bit in the HFrame so that
        we call downstream tasks with parameters as the author intended.

        POLICY / NOTE:  An non-HF output is a Presentable.
        NOTE: For now, a task output is *always* presentable.
        NOTE: No other code should set presentation in a HyperFrame.

        The mirror to this function (that unpacks a presentable is disdat.fs.present_hfr()

        Args:
            hfid:
            val:

        Returns:
            Frames, Presentation

        """
        frames = []

        managed_path = os.path.join(
            self.pfs.get_curr_context().get_object_dir(), hfid)

        if val is None:
            presentation = hyperframe_pb2.HF

        elif isinstance(val, HyperFrameRecord):
            presentation = hyperframe_pb2.HF
            frames.append(
                FrameRecord.make_hframe_frame(hfid, self.pipeline_id(), [val]))

        elif isinstance(val, np.ndarray) or isinstance(val, list):
            presentation = hyperframe_pb2.TENSOR
            if isinstance(val, list):
                val = np.array(val)
            frames.append(
                DataContext.convert_serieslike2frame(
                    hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path))

        elif isinstance(val, tuple):
            presentation = hyperframe_pb2.ROW
            for i, _ in enumerate(tuple):
                frames.append(
                    DataContext.convert_serieslike2frame(
                        hfid, common.DEFAULT_FRAME_NAME + ':{}'.format(i), val,
                        managed_path))

        elif isinstance(val, dict):
            presentation = hyperframe_pb2.ROW
            for k, v in val.iteritems():
                assert isinstance(
                    v, (list, tuple, pd.core.series.Series, np.ndarray))
                frames.append(
                    DataContext.convert_serieslike2frame(
                        hfid, k, v, managed_path))

        elif isinstance(val, pd.DataFrame):
            presentation = hyperframe_pb2.DF
            frames.extend(
                DataContext.convert_df2frames(hfid, val, managed_path))

        else:
            presentation = hyperframe_pb2.SCALAR
            frames.append(
                DataContext.convert_scalar2frame(
                    hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path))
            #frames.append(DataContext.convert_serieslike2frame(hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path))

        hfr = self.make_hframe(frames,
                               hfid,
                               self.bundle_inputs(),
                               human_name=human_name,
                               tags={"presentable": "True"},
                               presentation=presentation)

        return hfr