def _gen_metadata(self): from pyspark.sql.group import GroupedData if isinstance(self._data, GroupedData): return Metadata() else: schema = self._data.schema stream_metadata = Metadata() for field in schema.fields: stream_metadata.add_dataDescriptor(DataDescriptor().set_name( str(field.name)).set_type(str(field.dataType))) stream_metadata.add_module(ModuleMetadata().set_name( "cerebralcortex.core.datatypes.datastream.DataStream" ).set_attribute("url", "https://md2k.org").set_author( "Nasir Ali", "*****@*****.**")) return stream_metadata
def get_metadata(stress_imputed_data, output_stream_name, input_stream_name): """ generate metadata for a datastream. Args: stress_imputed_data (DataStream): output_stream_name (str): Returns: """ schema = stress_imputed_data.schema stream_metadata = Metadata() stream_metadata.set_name(output_stream_name).set_description("stress imputed")\ .add_input_stream(input_stream_name) for field in schema.fields: stream_metadata.add_dataDescriptor(DataDescriptor().set_name( str(field.name)).set_type(str(field.dataType))) stream_metadata.add_module( ModuleMetadata().set_name("stress forward fill imputer") \ .set_attribute("url", "hhtps://md2k.org").set_author( "Md Azim Ullah", "*****@*****.**")) return stream_metadata