def parse_return_val(hfid, val, data_context): """ Args: hfid (str): UUID val (object): A scalar, dict, tuple, list, dataframe data_context (DataContext): The data context into which to place this value Returns: (presentation, frames[]) """ frames = [] managed_path = os.path.join(data_context.get_object_dir(), hfid) if val is None: presentation = hyperframe_pb2.HF elif isinstance(val, HyperFrameRecord): presentation = hyperframe_pb2.HF frames.append(FrameRecord.make_hframe_frame(hfid, pipe.pipeline_id(), [val])) elif isinstance(val, np.ndarray) or isinstance(val, list): presentation = hyperframe_pb2.TENSOR if isinstance(val, list): val = np.array(val) frames.append(DataContext.convert_serieslike2frame(hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path)) elif isinstance(val, tuple): presentation = hyperframe_pb2.ROW for i, _ in enumerate(tuple): frames.append(DataContext.convert_serieslike2frame(hfid, common.DEFAULT_FRAME_NAME + ':{}'.format(i), val, managed_path)) elif isinstance(val, dict): presentation = hyperframe_pb2.ROW for k, v in val.iteritems(): if not isinstance(v, (list, tuple, pd.core.series.Series, np.ndarray, collections.Sequence)): frames.append(DataContext.convert_scalar2frame(hfid, k, v, managed_path)) else: assert isinstance(v, (list, tuple, pd.core.series.Series, np.ndarray, collections.Sequence)) frames.append(DataContext.convert_serieslike2frame(hfid, k, v, managed_path)) elif isinstance(val, pd.DataFrame): presentation = hyperframe_pb2.DF frames.extend(DataContext.convert_df2frames(hfid, val, managed_path)) else: presentation = hyperframe_pb2.SCALAR frames.append(DataContext.convert_scalar2frame(hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path)) return presentation, frames
def run(self): """ Convert an existing file, csv, or dir to the bundle """ bundle_processing_name, add_hf_uuid = self.bundle_outputs()[ 0] # @UnusedVariable bundle_hframe_file = self.output()[PipeBase.HFRAME].path managed_path = os.path.dirname(bundle_hframe_file) if os.path.isdir(self.input_path): """ With a directory, add all files under one special frame """ abs_input_path = os.path.abspath(self.input_path) files = [ urlparse.urljoin('file:', os.path.join(abs_input_path, f)) for f in os.listdir(abs_input_path) ] file_set = DataContext.copy_in_files(files, managed_path) frames = [ FrameRecord.make_link_frame(add_hf_uuid, constants.FILE, file_set, managed_path), ] presentation = hyperframe_pb2.TENSOR elif os.path.isfile(self.input_path): if str(self.input_path).endswith('.csv') or str( self.input_path).endswith('.tsv'): bundle_df = pd.read_csv( self.input_path, sep=None) # sep=None means python parse engine detects sep frames = DataContext.convert_df2frames( add_hf_uuid, bundle_df, managed_path=managed_path) presentation = hyperframe_pb2.DF else: """ Other kinds of file """ abs_input_path = os.path.abspath(self.input_path) files = [urlparse.urljoin('file:', abs_input_path)] file_set = DataContext.copy_in_files(files, managed_path) frames = [ FrameRecord.make_link_frame(add_hf_uuid, constants.FILE, file_set, managed_path), ] presentation = hyperframe_pb2.TENSOR else: raise RuntimeError('Unable to find input file or path {}'.format( self.input_path)) """ Make a single HyperFrame output for an add """ if 'taskname' in self.tags or 'presentable' in self.tags: print "Unable to add bundle {}: tags contain reserved keys 'taskname' or 'presentable'".format( self.output_bundle) # Todo: Delete temporary bundle here return tags = {'taskname': 'add', 'presentable': 'True', 'root_task': 'True'} tags.update(self.tags) task_hfr = self.make_hframe(frames, add_hf_uuid, self.bundle_inputs(), self.pipeline_id(), self.pipe_id(), self, tags=tags, presentation=presentation) self.pfs.get_curr_context().write_hframe(task_hfr)
def parse_return_val(hfid, val, data_context): """ Interpret the return values and create an HFrame to wrap them. This means setting the correct presentation bit in the HFrame so that we call downstream tasks with parameters as the author intended. POLICY / NOTE: An non-HF output is a Presentable. NOTE: For now, a task output is *always* presentable. NOTE: No other code should set presentation in a HyperFrame. The mirror to this function (that unpacks a presentable is disdat.fs.present_hfr() Args: hfid (str): UUID val (object): A scalar, dict, tuple, list, dataframe data_context (DataContext): The data context into which to place this value Returns: (presentation, frames[]) """ possible_scalar_types = (int, float, str, bool, np.bool_, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float16, np.float32, np.float64, six.binary_type, six.text_type, np.unicode_, np.string_) frames = [] managed_path = os.path.join(data_context.get_object_dir(), hfid) if val is None: """ None's stored as json.dumps([None]) or '[null]' """ presentation = hyperframe_pb2.JSON frames.append( DataContext.convert_scalar2frame( hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path)) elif isinstance(val, HyperFrameRecord): presentation = hyperframe_pb2.HF frames.append( FrameRecord.make_hframe_frame(hfid, pipe.pipeline_id(), [val])) elif isinstance(val, np.ndarray) or isinstance(val, list): presentation = hyperframe_pb2.TENSOR if isinstance(val, list): val = np.array(val) frames.append( DataContext.convert_serieslike2frame( hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path)) elif isinstance(val, tuple): presentation = hyperframe_pb2.ROW for i, _ in enumerate(val): frames.append( DataContext.convert_serieslike2frame( hfid, common.DEFAULT_FRAME_NAME + ':{}'.format(i), val, managed_path)) elif isinstance(val, dict): presentation = hyperframe_pb2.ROW for k, v in val.items(): if not isinstance(v, (list, tuple, pd.core.series.Series, np.ndarray, collections.Sequence)): # assuming this is a scalar assert isinstance( v, possible_scalar_types ), 'Disdat requires dictionary values to be one of {} not {}'.format( possible_scalar_types, type(v)) frames.append( DataContext.convert_scalar2frame( hfid, k, v, managed_path)) else: assert isinstance(v, (list, tuple, pd.core.series.Series, np.ndarray, collections.Sequence)) frames.append( DataContext.convert_serieslike2frame( hfid, k, v, managed_path)) elif isinstance(val, pd.DataFrame): presentation = hyperframe_pb2.DF frames.extend( DataContext.convert_df2frames(hfid, val, managed_path)) else: presentation = hyperframe_pb2.SCALAR frames.append( DataContext.convert_scalar2frame( hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path)) return presentation, frames
def parse_pipe_return_val(self, hfid, val, human_name=None): """ Interpret the return values and create an HFrame to wrap them. This means setting the correct presentation bit in the HFrame so that we call downstream tasks with parameters as the author intended. POLICY / NOTE: An non-HF output is a Presentable. NOTE: For now, a task output is *always* presentable. NOTE: No other code should set presentation in a HyperFrame. The mirror to this function (that unpacks a presentable is disdat.fs.present_hfr() Args: hfid: val: Returns: Frames, Presentation """ frames = [] managed_path = os.path.join( self.pfs.get_curr_context().get_object_dir(), hfid) if val is None: presentation = hyperframe_pb2.HF elif isinstance(val, HyperFrameRecord): presentation = hyperframe_pb2.HF frames.append( FrameRecord.make_hframe_frame(hfid, self.pipeline_id(), [val])) elif isinstance(val, np.ndarray) or isinstance(val, list): presentation = hyperframe_pb2.TENSOR if isinstance(val, list): val = np.array(val) frames.append( DataContext.convert_serieslike2frame( hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path)) elif isinstance(val, tuple): presentation = hyperframe_pb2.ROW for i, _ in enumerate(tuple): frames.append( DataContext.convert_serieslike2frame( hfid, common.DEFAULT_FRAME_NAME + ':{}'.format(i), val, managed_path)) elif isinstance(val, dict): presentation = hyperframe_pb2.ROW for k, v in val.iteritems(): assert isinstance( v, (list, tuple, pd.core.series.Series, np.ndarray)) frames.append( DataContext.convert_serieslike2frame( hfid, k, v, managed_path)) elif isinstance(val, pd.DataFrame): presentation = hyperframe_pb2.DF frames.extend( DataContext.convert_df2frames(hfid, val, managed_path)) else: presentation = hyperframe_pb2.SCALAR frames.append( DataContext.convert_scalar2frame( hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path)) #frames.append(DataContext.convert_serieslike2frame(hfid, common.DEFAULT_FRAME_NAME + ':0', val, managed_path)) hfr = self.make_hframe(frames, hfid, self.bundle_inputs(), human_name=human_name, tags={"presentable": "True"}, presentation=presentation) return hfr