def decode_data(cls, msg, typedef): r"""Decode an object. Args: msg (string): Encoded object to decode. typedef (dict): Type definition that should be used to decode the object. Returns: object: Decoded object. """ lines = backwards.as_str(msg).splitlines() metadata = {'comments': [], 'element_order': [], 'property_order': {}} if lines[0] != 'ply': raise ValueError("The first line must be 'ply'") # Parse header e = None p = None type_map = {} size_map = {} obj = {} for i, line in enumerate(lines): if line.startswith('format'): metadata['plyformat'] = line.split(None, 1)[-1] elif line.startswith('comment'): out = line.split(None, 1)[-1] if out.startswith('material:'): metadata['element_order'].append('material') obj['material'] = out.split(None, 1)[-1] metadata['comments'].append(out) elif line.startswith('element'): vars = line.split() e_sing = vars[1] e = singular2plural(e_sing) size_map[e] = int(float(vars[2])) type_map[e] = {} metadata['element_order'].append(e) metadata['property_order'][e] = [] obj[e] = [] elif line.startswith('property'): vars = line.split() p = vars[-1] type_map[e][p] = ' '.join(vars[1:-1]) metadata['property_order'][e].append(p) elif 'end_header' in line: headline = i + 1 break # Parse body i = headline for e in metadata['element_order']: if e == 'material': continue for ie in range(size_map[e]): vars = lines[i].split() iv = 0 new = {} for p in metadata['property_order'][e]: if type_map[e][p].startswith('list'): type_vars = type_map[e][p].split() count_type = translate_ply2py(type_vars[1]) plist_type = translate_ply2py(type_vars[2]) count = count_type(vars[iv]) plist = [] iv += 1 for ip in range(count): plist.append(plist_type(vars[iv])) iv += 1 new[p] = plist else: prop_type = translate_ply2py(type_map[e][p]) new[p] = prop_type(vars[iv]) iv += 1 assert (iv == len(vars)) obj[e].append(new) i += 1 # Check that all properties filled in for e in metadata['element_order']: if e not in metadata['property_order']: continue for p in metadata['property_order'][e]: assert (len(obj[e]) == size_map[e]) # Return return PlyDict(obj)
class SerializeBase(tools.YggClass): r"""Base class for serializing/deserializing a Python object into/from a bytes message. Args: newline (str, optional): One or more characters indicating a newline. Defaults to '\n'. comment (str, optional): One or more characters indicating a comment. Defaults to '# '. datatype (schema, optional): JSON schema defining the type of object that the serializer will be used to serialize/deserialize. Defaults to default_datatype. **kwargs: Additional keyword args are processed as part of the type definition. Attributes: initialized (bool): True if the serializer has been initialized either by input arguments specifying the type or by infering the type from a processed message. Class Attributes: has_header (bool): True if the serialization has a header when written to a file. default_read_meth (str): Default method that data should be read from a file for deserialization. is_framed (bool): True if the serialization has a frame allowing multiple serialized objects to be recovered from a single message. concats_as_str (bool): True if serialized objects can be concatenated directly as strings. encoded_datatype (schema): JSON schema defining the type of object produced by the class's func_serialize method. For most classes this will be {'type': 'bytes'}, indicating that the method will produce bytes suitable for serialization. """ _seritype = None _schema_type = 'serializer' _schema_subtype_key = 'seritype' _schema_requried = ['seritype'] _schema_properties = { 'seritype': { 'type': 'string', 'default': 'default', 'description': ('Serializer type.') }, 'newline': { 'type': 'string', 'default': backwards.as_str(serialize._default_newline) }, 'comment': { 'type': 'string', 'default': backwards.as_str(serialize._default_comment) }, 'datatype': { 'type': 'schema' } } _oldstyle_kws = ['format_str', 'field_names', 'field_units', 'as_array'] _attr_conv = ['newline', 'comment'] default_datatype = {'type': 'bytes'} default_encoded_datatype = {'type': 'bytes'} has_header = False default_read_meth = 'read' is_framed = False concats_as_str = True def __init__(self, **kwargs): if ('format_str' in kwargs): drv = tools.get_subprocess_language_driver() if drv.decode_format is not None: kwargs['format_str'] = drv.decode_format(kwargs['format_str']) if isinstance(kwargs.get('datatype', None), MetaschemaType): self.datatype = kwargs.pop('datatype') super(SerializeBase, self).__init__(**kwargs) kwargs = self.extra_kwargs self.extra_kwargs = {} # Set defaults if self.datatype is None: self.datatype = self.default_datatype elif ((isinstance(self.datatype, dict) and (self.datatype != self.default_datatype))): kwargs['datatype'] = self.datatype # Update typedef self.initialized = False if isinstance(self.datatype, dict): self.datatype = get_type_from_def(self.default_datatype, dont_complete=True) if getattr(self, 'encoded_datatype', None) is None: self.encoded_datatype = self.default_encoded_datatype if isinstance(self.encoded_datatype, dict): self.encoded_datatype = get_type_from_def(self.encoded_datatype, dont_complete=True) self.update_serializer(**kwargs) self.initialized = self.is_initialized() def is_initialized(self): r"""Determine if the serializer has been initialized by comparing the current datatype against the default for the class. Returns: bool: True if the current datatype is different than the default, False otherwise. """ return (self.typedef != self.default_datatype) @staticmethod def before_registration(cls): r"""Operations that should be performed to modify class attributes prior to registration.""" tools.YggClass.before_registration(cls) # If the serialization cannot be concatenated, then it is not framed by # definition and would be meaningless if read-in incrementally if not cls.concats_as_str: assert (not cls.is_framed) assert (cls.default_read_meth == 'read') @classmethod def object2dict(cls, obj, **kwargs): r"""Convert a message object into a dictionary. Args: obj (object): Object that would be serialized by this class and should be returned in a dictionary form. **kwargs: Additional keyword arguments are ignored. Returns: dict: Dictionary version of the provided object. """ return {'f0': obj} @classmethod def object2array(cls, obj, **kwargs): r"""Convert a message object into an array. Args: obj (object): Object that would be serialized by this class and should be returned in an array form. **kwargs: Additional keyword arguments are ignored. Returns: np.array: Array version of the provided object or None if one cannot be created. """ return None @classmethod def concatenate(cls, objects, **kwargs): r"""Concatenate objects to get object that would be recieved if the concatenated serialization were deserialized. Args: objects (list): Objects to be concatenated. **kwargs: Additional keyword arguments are ignored. Returns: list: Set of objects that results from concatenating those provided. """ return objects @classmethod def get_testing_options(cls, table_example=False, array_columns=False, include_oldkws=False): r"""Method to return a dictionary of testing options for this class. Arguments: table_example (bool, optional): If True, the returned options will be for an array of elements representing a table-like structure. Defaults to False. array_columns (bool, optional): If True, table_example is set to True and the returned options will be for an array data type where each element is an array representing a column Defaults to False. include_oldkws (bool, optional): If True, old-style keywords will be added to the returned options. This will only have an effect if table_example is True. Defaults to False. Returns: dict: Dictionary of variables to use for testing. Key/value pairs: * kwargs (dict): Keyword arguments for comms tested with the provided content. * empty (object): Object produced from deserializing an empty message. * objects (list): List of objects to be serialized/deserialized. extra_kwargs (dict): Extra keyword arguments not used to construct type definition. * typedef (dict): Type definition resulting from the supplied kwargs. * dtype (np.dtype): Numpy data types that is consistent with the determined type definition. * contents (bytes): Concatenated serialization that will result from deserializing the serialized objects. * contents_recv (list): List of objects that would be deserialized from contents. """ if array_columns: table_example = True if table_example: rows = [(b'one', np.int32(1), 1.0), (b'two', np.int32(2), 2.0), (b'three', np.int32(3), 3.0)] out = { 'kwargs': {}, 'empty': [], 'dtype': None, 'extra_kwargs': {}, 'typedef': { 'type': 'array', 'items': [{ 'type': 'bytes', 'units': 'n/a', 'title': 'name' }, { 'type': 'int', 'precision': 32, 'units': 'umol', 'title': 'count' }, { 'type': 'float', 'precision': 64, 'units': 'cm', 'title': 'size' }] }, 'contents': (b'# name\tcount\tsize\n' + b'# n/a\tumol\tcm\n' + b'# %5s\t%d\t%f\n' + b' one\t1\t1.000000\n' + b' two\t2\t2.000000\n' + b'three\t3\t3.000000\n' + b' one\t1\t1.000000\n' + b' two\t2\t2.000000\n' + b'three\t3\t3.000000\n'), 'objects': 2 * rows, 'field_names': ['name', 'count', 'size'], 'field_units': ['n/a', 'umol', 'cm'] } if include_oldkws: out['kwargs'].update({ 'format_str': '%5s\t%d\t%f\n', 'field_names': ['name', 'count', 'size'], 'field_units': ['n/a', 'umol', 'cm'] }) out['extra_kwargs'].update({ 'format_str': backwards.as_str(out['kwargs']['format_str']) }) if array_columns: out['kwargs']['as_array'] = True dtype = np.dtype({ 'names': out['field_names'], 'formats': ['%s5' % backwards.np_dtype_str, 'i4', 'f8'] }) out['dtype'] = dtype arr = np.array(rows, dtype=dtype) lst = [ units.add_units(arr[n], u) for n, u in zip(out['field_names'], out['field_units']) ] out['objects'] = [lst, lst] for x in out['typedef']['items']: x['subtype'] = x['type'] x['type'] = '1darray' if x['title'] == 'name': x['precision'] = 40 else: out = { 'kwargs': {}, 'empty': b'', 'dtype': None, 'typedef': cls.default_datatype, 'extra_kwargs': {}, 'objects': [b'Test message\n', b'Test message 2\n'] } out['contents'] = b''.join(out['objects']) return out @property def read_meth(self): r"""str: Method that should be used to read data for deserialization.""" return self.default_read_meth @classmethod def seri_kws(cls): r"""Get a list of valid keyword arguments.""" return list( set(list(cls._schema_properties.keys()) + cls._oldstyle_kws)) @property def typedef(self): r"""dict: Type definition.""" return copy.deepcopy(self.datatype._typedef) @property def encoded_typedef(self): r"""dict: Type definition for encoded data objects.""" return self.encoded_datatype._typedef @property def input_kwargs(self): r"""dict: Get the input keyword arguments used to create this class.""" out = {} for k in self._schema_properties.keys(): v = getattr(self, k, None) if v is not None: out[k] = copy.deepcopy(v) for k in self._attr_conv: if (k in out) and isinstance(out[k], backwards.string_types): out[k] = backwards.as_str(out[k]) return out @property def serializer_info(self): r"""dict: Serializer info.""" out = copy.deepcopy(self.extra_kwargs) for k in self._schema_properties.keys(): if k in ['datatype']: continue v = getattr(self, k, None) if v is not None: out[k] = copy.deepcopy(v) for k in out.keys(): v = out[k] if isinstance(v, backwards.string_types): out[k] = backwards.as_str(v) elif isinstance(v, (list, tuple)): out[k] = [] for x in v: out[k].append(backwards.as_str(x, allow_pass=True)) else: out[k] = v return out @property def empty_msg(self): r"""obj: Object indicating empty message.""" return self.datatype._empty_msg # def is_empty(self, obj): # r"""Determine if an object represents an empty message for this serializer. # Args: # obj (object): Object to test. # Returns: # bool: True if the object is empty, False otherwise. # """ # emsg = self.empty_msg # return (isinstance(obj, type(emsg)) and (obj == emsg)) def get_field_names(self, as_bytes=False): r"""Get the field names for an array of fields. Args: as_bytes (bool, optional): If True, the field names will be returned as bytes. If False the field names will be returned as unicode. Defaults to False. Returns: list: Names for each field in the data type. """ if getattr(self, 'field_names', None) is not None: out = self.field_names elif self.typedef['type'] != 'array': out = None elif isinstance(self.typedef['items'], dict): # pragma: debug raise Exception("Variable number of items not yet supported.") elif isinstance(self.typedef['items'], list): out = [] any_names = False for i, x in enumerate(self.typedef['items']): out.append(x.get('title', 'f%d' % i)) if len(x.get('title', '')) > 0: any_names = True # Don't use field names if they are all defaults if not any_names: out = None if (out is not None): if as_bytes: out = [backwards.as_bytes(x) for x in out] else: out = [backwards.as_str(x) for x in out] return out def get_field_units(self, as_bytes=False): r"""Get the field units for an array of fields. Args: as_bytes (bool, optional): If True, the field units will be returned as bytes. If False the field units will be returned as unicode. Defaults to False. Returns: list: Units for each field in the data type. """ if self.typedef['type'] != 'array': return None if getattr(self, 'field_units', None) is not None: out = self.field_units elif isinstance(self.typedef['items'], dict): # pragma: debug raise Exception("Variable number of items not yet supported.") elif isinstance(self.typedef['items'], list): out = [] any_units = False for i, x in enumerate(self.typedef['items']): out.append(x.get('units', '')) if len(x.get('units', '')) > 0: any_units = True # Don't use field units if they are all defaults if not any_units: out = None if (out is not None): if as_bytes: out = [backwards.as_bytes(x) for x in out] else: out = [backwards.as_str(x) for x in out] return out @property def numpy_dtype(self): r"""np.dtype: Corresponding structured data type. Will be None unless the type is an array of 1darrays.""" out = None if (self.typedef['type'] == 'array') and ('items' in self.typedef): if isinstance(self.typedef['items'], dict): as_array = (self.typedef['items']['type'] in ['1darray', 'ndarray']) if as_array: out = definition2dtype(self.typedef['items']) elif isinstance(self.typedef['items'], (list, tuple)): as_array = True dtype_list = [] field_names = [] for i, x in enumerate(self.typedef['items']): if x['type'] != '1darray': as_array = False break dtype_list.append(definition2dtype(x)) field_names.append(x.get('title', 'f%d' % i)) if as_array: out = np.dtype(dict(names=field_names, formats=dtype_list)) return out def initialize_from_message(self, msg, **metadata): r"""Initialize the serializer based on recieved message. Args: msg (object): Message that serializer should be initialized from. **kwargs: Additional keyword arguments are treated as metadata that may contain additional information for initializing the serializer. """ if ((self.initialized or metadata.get('raw', False) or metadata.get('incomplete', False))): return cls = guess_type_from_obj(msg) typedef = cls.encode_type(msg) typedef = cls.extract_typedef(typedef) metadata.update(typedef) self.initialize_serializer(metadata) def initialize_serializer(self, metadata, extract=False): r"""Initialize a serializer based on received metadata. This method will exit early if the serializer has already been intialized. Args: metadata (dict): Header information including type info that should be used to initialize the serializer class. extract (bool, optional): If True, the type will be defined using a subset of the type information in metadata. If False, all of the type information will be used. Defaults to False. """ if ((self.initialized or metadata.get('raw', False) or metadata.get('incomplete', False))): return self.update_serializer(extract=extract, **metadata) self.initialized = (self.typedef != self.default_datatype) def update_serializer(self, extract=False, skip_type=False, **kwargs): r"""Update serializer with provided information. Args: extract (bool, optional): If True, the updated typedef will be the bare minimum as extracted from total set of provided keywords, otherwise the entire set will be sued. Defaults to False. skip_type (bool, optional): If True, everything is updated except the data type. Defaults to False. **kwargs: Additional keyword arguments are processed as part of they type definition and are parsed for old-style keywords. Raises: RuntimeError: If there are keywords that are not valid typedef keywords (currect or old-style). """ old_datatype = None if self.initialized: old_datatype = copy.deepcopy(self.datatype) _metaschema = get_metaschema() # Raise an error if the types are not compatible seritype = kwargs.pop('seritype', self.seritype) if (seritype != self._seritype) and (seritype != 'default'): # pragma: debug raise Exception("Cannot change types form %s to %s." % (self._seritype, seritype)) # Remove metadata keywords unrelated to serialization # TODO: Find a better way of tracking these _remove_kws = [ 'body', 'address', 'size', 'id', 'incomplete', 'raw', 'commtype', 'filetype', 'response_address', 'request_id', 'append', 'in_temp', 'is_series', 'working_dir', 'fmts', 'model_driver', 'env', 'send_converter', 'recv_converter', 'typedef_base' ] kws = list(kwargs.keys()) for k in kws: if (k in _remove_kws) or k.startswith('zmq'): kwargs.pop(k) # Set attributes and remove unused metadata keys for k in self._schema_properties.keys(): if (k in kwargs) and (k != 'datatype'): setattr(self, k, kwargs.pop(k)) # Create preliminary typedef typedef = kwargs.pop('datatype', {}) for k in _metaschema['properties'].keys(): if k in kwargs: typedef[k] = kwargs.pop(k) # Update extra keywords if (len(kwargs) > 0): self.extra_kwargs.update(kwargs) self.debug("Extra kwargs: %s" % str(self.extra_kwargs)) # Update type if not skip_type: # Update typedef from oldstyle keywords in extra_kwargs typedef = self.update_typedef_from_oldstyle(typedef) if typedef.get('type', None): if extract: cls = get_type_class(typedef['type']) typedef = cls.extract_typedef(typedef) self.datatype = get_type_from_def(typedef) # Check to see if new datatype is compatible with new one if old_datatype is not None: errors = list( compare_schema(self.typedef, old_datatype._typedef) or ()) if errors: raise RuntimeError(( "Updated datatype is not compatible with the existing one." + " New:\n%s\nOld:\n%s\n") % (pprint.pformat(self.typedef), pprint.pformat(old_datatype._typedef))) # Enfore that strings used with messages are in bytes for k in self._attr_conv: v = getattr(self, k, None) if isinstance(v, backwards.string_types): setattr(self, k, backwards.as_bytes(v)) def update_typedef_from_oldstyle(self, typedef): r"""Update a given typedef using an old, table-style serialization spec. Existing typedef values are not overwritten and warnings are raised if the provided serialization spec is not compatible with the type definition. Args: typedef (dict): Type definition to update. Returns: dict: Updated typedef. """ for k in self._oldstyle_kws: used = [] updated = [] v = self.extra_kwargs.get(k, getattr(self, k, None)) if v is None: continue # Check status if ((k != 'format_str') and (typedef.get('type', None) != 'array')): continue # Key specific changes to type if k == 'format_str': v = backwards.as_str(v) fmts = serialize.extract_formats(v) if 'type' in typedef: if (typedef.get('type', None) == 'array'): assert (len(typedef.get('items', [])) == len(fmts)) # if len(typedef.get('items', [])) != len(fmts): # warnings.warn(("Number of items in typedef (%d) doesn't" # + "match the number of formats (%d).") # % (len(typedef.get('items', [])), len(fmts))) continue as_array = self.extra_kwargs.get( 'as_array', getattr(self, 'as_array', False)) typedef.update(type='array', items=[]) for i, fmt in enumerate(fmts): nptype = serialize.cformat2nptype(fmt) itype = OneDArrayMetaschemaType.encode_type( np.ones(1, nptype)) itype = OneDArrayMetaschemaType.extract_typedef(itype) if (fmt == '%s') and ('precision' in itype): del itype['precision'] if as_array: itype['type'] = '1darray' else: itype['type'] = itype.pop('subtype') if (((itype['type'] in _flexible_types) and ('precision' in itype))): del itype['precision'] typedef['items'].append(itype) used.append('as_array') updated.append('format_str') elif k == 'as_array': # Can only be used in conjunction with format_str pass elif k in ['field_names', 'field_units']: v = [backwards.as_str(x) for x in v] if k == 'field_names': tk = 'title' else: tk = 'units' if isinstance(typedef['items'], dict): typedef['items'] = [ copy.deepcopy(typedef['items']) for _ in range(len(v)) ] assert (len(v) == len(typedef.get('items', []))) # if len(v) != len(typedef.get('items', [])): # warnings.warn('%d %ss provided, but only %d items in typedef.' # % (len(v), k, len(typedef.get('items', [])))) # continue all_updated = True for iv, itype in zip(v, typedef.get('items', [])): if tk in itype: all_updated = False itype.setdefault(tk, iv) if all_updated: used.append(k) updated.append( k) # Won't change anything unless its an attribute else: # pragma: debug raise ValueError( "Unrecognized table-style specification keyword: '%s'." % k) for rk in used: if rk in self.extra_kwargs: del self.extra_kwargs[rk] for rk in updated: if rk in self.extra_kwargs: self.extra_kwargs[rk] = v elif hasattr(self, rk): setattr(self, rk, v) return typedef def func_serialize(self, args): r"""Serialize a message. Args: args: List of arguments to be formatted or numpy array to be serialized. Returns: bytes, str: Serialized message. """ raise NotImplementedError("func_serialize not implemented.") def func_deserialize(self, msg): r"""Deserialize a message. Args: msg: Message to be deserialized. Returns: obj: Deserialized message. """ raise NotImplementedError("func_deserialize not implemented.") def serialize(self, args, header_kwargs=None, add_serializer_info=False, no_metadata=False): r"""Serialize a message. Args: args (obj): List of arguments to be formatted or a ready made message. header_kwargs (dict, optional): Keyword arguments that should be added to the header. Defaults to None and no header is added. add_serializer_info (bool, optional): If True, serializer information will be added to the metadata. Defaults to False. no_metadata (bool, optional): If True, no metadata will be added to the serialized message. Defaults to False. Returns: bytes, str: Serialized message. Raises: TypeError: If returned msg is not bytes type (str on Python 2). """ if header_kwargs is None: header_kwargs = {} if isinstance(args, backwards.bytes_type) and (args == tools.YGG_MSG_EOF): header_kwargs['raw'] = True self.initialize_from_message(args, **header_kwargs) metadata = {'no_metadata': no_metadata} if add_serializer_info: self.debug("serializer_info = %s", str(self.serializer_info)) metadata.update(self.serializer_info) metadata['typedef_base'] = self.typedef if header_kwargs is not None: metadata.update(header_kwargs) if header_kwargs.get('raw', False): data = args else: if self.func_serialize is None: data = args else: data = self.func_serialize(args) if (self.encoded_typedef['type'] == 'bytes'): if not isinstance(data, backwards.bytes_type): raise TypeError( ("Serialization function returned object " + "of type '%s', not required '%s' type.") % (type(data), backwards.bytes_type)) metadata['dont_encode'] = True if not no_metadata: metadata['metadata'] = self.datatype.encode_type( args, typedef=self.typedef) if ((self.initialized and (not tools.check_environ_bool('YGG_VALIDATE_ALL_MESSAGES')))): metadata.setdefault('dont_check', True) out = self.encoded_datatype.serialize(data, **metadata) return out def deserialize(self, msg, **kwargs): r"""Deserialize a message. Args: msg (str, bytes): Message to be deserialized. **kwargs: Additional keyword arguments are passed to the deserialize method of the datatype class. Returns: tuple(obj, dict): Deserialized message and header information. Raises: TypeError: If msg is not bytes type (str on Python 2). """ if (((self.func_deserialize is not None) and (self.encoded_typedef['type'] == 'bytes'))): kwargs['dont_decode'] = True if ((self.initialized and (not tools.check_environ_bool('YGG_VALIDATE_ALL_MESSAGES')))): kwargs.setdefault('dont_check', True) out, metadata = self.encoded_datatype.deserialize(msg, **kwargs) if (self.func_deserialize is not None): if metadata['size'] == 0: out = self.empty_msg elif not (metadata.get('incomplete', False) or metadata.get('raw', False)): if 'metadata' in metadata: for k, v in metadata.items(): if k not in ['type', 'precision', 'units', 'metadata']: metadata['metadata'][k] = v metadata = metadata.pop('metadata') if not self.initialized: self.update_serializer(extract=True, **metadata) out = self.func_deserialize(out) # Update serializer typedef_base = metadata.pop('typedef_base', {}) typedef = copy.deepcopy(metadata) typedef.update(typedef_base) if not ((metadata.get('size', 0) == 0) or metadata.get( 'incomplete', False) or metadata.get('raw', False)): self.initialize_serializer(typedef, extract=True) return out, metadata def enable_file_header(self): # pragma: no cover r"""Set serializer attributes to enable file headers to be included in the serializations.""" pass def disable_file_header(self): r"""Set serializer attributes to disable file headers from being included in the serializations.""" pass def serialize_file_header(self): # pragma: no cover r"""Return the serialized header information that should be prepended to files serialized using this class. Returns: bytes: Header string that should be written to the file. """ return b'' def deserialize_file_header(self, fd): # pragma: no cover r"""Deserialize the header information from the file and update the serializer. Args: fd (file): File containing header. """ pass def consolidate_array(self, out): r"""Consolidate message into a structure numpy array if possible. Args: out (list, tuple, np.ndarray): Object to consolidate into a structured numpy array. Returns: np.ndarray: Structured numpy array containing consolidated message. Raises: ValueError: If the array cannot be consolidated. """ np_dtype = self.numpy_dtype if np_dtype and isinstance(out, (list, tuple, np.ndarray)): out = serialize.consolidate_array(out, dtype=np_dtype) else: warnings.warn(("Cannot consolidate message into a structured " + "numpy array: %s") % str(out)) return out # def format_header(self, header_info): # r"""Format header info to form a string that should prepend a message. # Args: # header_info (dict): Properties that should be included in the header. # Returns: # str: Message with header in front. # """ def parse_header(self, msg): r"""Extract header info from a message. Args: msg (str): Message to extract header from. Returns: dict: Message properties. """ return self.datatype.deserialize(msg, no_data=True)
def table_to_array(msg, fmt_str=None, use_astropy=False, names=None, delimiter=None, comment=None, encoding='utf-8'): r"""Extract information from an ASCII table as an array. Args: msg (bytes): ASCII table as bytes string. fmt_str (bytes): Format string that should be used to parse the table. If not provided, this will attempt to determine the types of columns based on their contents. use_astropy (bool, optional): If True, astropy will be used to parse the table if it is installed. Defaults to False. names (list, optional): Field names that should be used for the structured data type of the output array. If not provided, names are generated based on the order of the fields in the table. delimiter (str, optional): String used to separate columns. Defaults to None and is not used. This is only used if fmt_str is not provided. comment (str, optional): String used to denote comments. Defaults to None and is not used. This is only used if fmt_str is not provided. encoding (str, optional): Encoding that should be used in Python 3 or higher to extract information from the message. Defaults to 'utf-8'. Returns: np.ndarray: Table contents as an array. """ if not _use_astropy: use_astropy = False if fmt_str is None: dtype = None info = dict(delimiter=delimiter, comment=comment) else: dtype = cformat2nptype(fmt_str, names=names) info = format2table(fmt_str) names = dtype.names fd = backwards.BytesIO(msg) if names is not None: names = [backwards.as_str(n) for n in names] np_kws = dict() if info.get('delimiter', None) is not None: np_kws['delimiter'] = info['delimiter'] if info.get('comment', None) is not None: np_kws['comments'] = info['comment'] for k, v in np_kws.items(): np_kws[k] = backwards.as_str(v) if use_astropy: # fd = backwards.StringIO(backwards.as_str(msg)) if 'comments' in np_kws: np_kws['comment'] = np_kws.pop('comments') tab = apy_ascii.read(fd, names=names, guess=True, encoding=encoding, format='no_header', **np_kws) arr = tab.as_array() typs = [arr.dtype[i].str for i in range(len(arr.dtype))] cols = [c for c in tab.columns] # Convert type bytes if python 3 if not backwards.PY2: # pragma: Python 3 new_typs = copy.copy(typs) convert = [] for i in range(len(arr.dtype)): if np.issubdtype(arr.dtype[i], np.dtype('U')): new_typs[i] = 'S' + typs[i].split('U')[-1] convert.append(i) if convert: old_arr = arr new_dtyp = np.dtype([(c, t) for c, t in zip(cols, new_typs)]) new_arr = np.zeros(arr.shape, new_dtyp) for i in range(len(arr.dtype)): if i in convert: x = np.char.encode(old_arr[cols[i]], encoding='utf-8') new_arr[cols[i]] = x else: new_arr[cols[i]] = old_arr[cols[i]] arr = new_arr typs = new_typs # Convert complex type for i in range(len(arr.dtype)): if np.issubdtype(arr.dtype[i], np.dtype('S')): new_typs = copy.copy(typs) new_typs[i] = 'complex' new_dtyp = np.dtype([(c, t) for c, t in zip(cols, new_typs)]) try: arr = arr.astype(new_dtyp) except ValueError: pass if dtype is not None: arr = arr.astype(dtype) else: np_ver = tuple([float(x) for x in (np.__version__).split('.')]) if (np_ver >= (1.0, 14.0, 0.0)): arr = np.genfromtxt(fd, encoding='bytes', autostrip=True, dtype=None, names=names, **np_kws) else: arr = np.genfromtxt(fd, autostrip=True, dtype=None, names=names, **np_kws) if dtype is not None: arr = arr.astype(dtype) fd.close() return arr
def discover_header(fd, serializer, newline=_default_newline, comment=_default_comment, delimiter=None, lineno_format=None, lineno_names=None, lineno_units=None, use_astropy=False): r"""Discover ASCII table header info from a file. Args: fd (file): File object containing the table. serializer (DefaultSerialize): Serializer that should be updated with header information. newline (str, optional): Newline character that should be used to split header if it is not already a list. Defaults to _default_newline. comment (bytes, optional): String that should be used to mark the header lines. If not provided and not in format_str, defaults to _default_comment. delimiter (bytes, optional): String that should be used to separate columns. If not provided and not in format_str, defaults to _default_delimiter. lineno_format (int, optional): Line number where formats are located. If not provided, an attempt will be made to locate one. lineno_names (int, optional): Line number where field names are located. If not provided, an attempt will be made to locate one. lineno_units (int, optional): Line number where field units are located. If not provided, an attempt will be made to locate one. use_astropy (bool, optional): If True, astropy will be used to parse the table if it is installed. Defaults to False. """ header_lines = [] header_size = 0 prev_pos = fd.tell() for line in fd: sline = backwards.as_bytes(line.replace( backwards.as_bytes(platform._newline), newline)) if not sline.startswith(comment): break header_size += len(line) header_lines.append(sline) # Parse header & set serializer attributes header = parse_header(header_lines, newline=newline, lineno_format=lineno_format, lineno_names=lineno_names, lineno_units=lineno_units) # Override header with information set explicitly in serializer for k in serializer._oldstyle_kws: v = getattr(serializer, k, None) if v is not None: header[k] = v header.setdefault('format_str', None) if (delimiter is None) or ('format_str' in header): delimiter = header['delimiter'] # Try to determine format from array without header str_fmt = b'%s' if ((header['format_str'] is None) or (str_fmt in header['format_str'])): fd.seek(prev_pos + header_size) all_contents = fd.read() if len(all_contents) == 0: # pragma: debug return # In case the file has not been written arr = table_to_array(all_contents, names=header.get('field_names', None), comment=comment, delimiter=delimiter, use_astropy=use_astropy) header['field_names'] = arr.dtype.names # Get format from array if header['format_str'] is None: header['format_str'] = table2format( arr.dtype, delimiter=delimiter, comment=b'', newline=header['newline']) # Determine maximum size of string field while str_fmt in header['format_str']: field_formats = extract_formats(header['format_str']) ifld = backwards.as_str( header['field_names'][field_formats.index(str_fmt)]) max_len = len(max(arr[ifld], key=len)) new_str_fmt = backwards.as_bytes('%' + str(max_len) + 's') header['format_str'] = header['format_str'].replace( str_fmt, new_str_fmt, 1) # Update serializer serializer.initialize_serializer(header) # Seek to just after the header fd.seek(prev_pos + header_size)
class AsciiMapSerialize(DefaultSerialize): r"""Class for serializing/deserializing name/value mapping. Args: delimiter (str, optional): Delimiter that should be used to separate name/value pairs in the map. Defaults to \t. newline (str, optional): Delimiter that should be used to separate lines. Defaults to \n. """ _seritype = 'ascii_map' _schema_properties = { 'delimiter': { 'type': 'string', 'default': backwards.as_str(_default_delimiter) }, 'newline': { 'type': 'string', 'default': backwards.as_str(_default_newline) } } _default_type = {'type': 'object'} def func_serialize(self, args): r"""Serialize a message. Args: args (dict): Python dictionary to be serialized. Returns: bytes, str: Serialized message. """ out = '' order = sorted([k for k in args.keys()]) for k in order: v = args[k] if not isinstance(k, backwards.string_types): raise ValueError( "Serialization of non-string keys not supported.") out += backwards.as_str(k) + self.delimiter if isinstance(v, backwards.string_types): v = backwards.as_str(v) out += json.dumps(v, cls=JSONReadableEncoder) out += self.newline return backwards.as_bytes(out) def func_deserialize(self, msg): r"""Deserialize a message. Args: msg (str, bytes): Message to be deserialized. Returns: dict: Deserialized Python dictionary. """ out = dict() lines = (backwards.as_str(msg)).split(self.newline) for l in lines: kv = l.split(self.delimiter) if len(kv) <= 1: continue elif len(kv) == 2: if kv[1].startswith("'") and kv[1].endswith("'"): out[kv[0]] = kv[1].strip("'") else: try: out[kv[0]] = json.loads(kv[1]) except BaseException: out[kv[0]] = kv[1] else: raise ValueError("Line has more than one delimiter: " + l) return out @classmethod def get_testing_options(cls): r"""Method to return a dictionary of testing options for this class. Returns: dict: Dictionary of variables to use for testing. """ out = super(AsciiMapSerialize, cls).get_testing_options() out['objects'] = [{ 'args1': int(1), 'args2': 'this', # Should these be separate messages, allowing append? 'args3': float(1), 'args4': [int(1), int(2)] }] out['empty'] = dict() out['contents'] = (b'args1\t1\n' + b'args2\t"this"\n' + b'args3\t1.0\n' + b'args4\t[1, 2]\n') return out
def cformat2nptype(cfmt, names=None): r"""Convert a c format string to a numpy data type. Args: cfmt (str, bytes): c format that should be translated. names (list, optional): Names that should be assigned to fields in the format string if there is more than one. If not provided, names are generated based on the order of the format codes. Returns: np.dtype: Corresponding numpy data type. Raises: TypeError: if cfmt is not a string. ValueError: If the c format does not begin with '%'. ValueError: If the c format does not contain type info. ValueError: If the c format cannot be translated to a numpy datatype. """ # TODO: this may fail on 32bit systems where C long types are 32 bit if not (isinstance(cfmt, list) or isinstance(cfmt, backwards.string_types)): raise TypeError("Input must be a string, bytes string, or list, not %s" % type(cfmt)) if isinstance(cfmt, backwards.string_types): fmt_list = extract_formats(backwards.as_str(cfmt)) if len(fmt_list) == 0: raise ValueError("Could not locate any format codes in the " + "provided format string (%s)." % cfmt) else: fmt_list = cfmt nfmt = len(fmt_list) if nfmt == 1: cfmt_str = fmt_list[0] else: dtype_list = [cformat2nptype(f) for f in fmt_list] if names is None: names = ['f%d' % i for i in range(nfmt)] elif len(names) != nfmt: raise ValueError("Number of names does not match the number of fields.") else: names = [backwards.as_str(n) for n in names] out = np.dtype(dict(names=names, formats=dtype_list)) # out = np.dtype([(n, d) for n, d in zip(names, dtype_list)]) return out out = None if cfmt_str[-1] in ['j']: out = 'complex128' elif cfmt_str[-1] in ['f', 'F', 'e', 'E', 'g', 'G']: # if 'hh' in cfmt_str: # out = 'float8' # elif cfmt_str[-2] == 'h': # out = 'float16' # elif 'll' in cfmt_str: # out = 'longfloat' # elif cfmt_str[-2] == 'l': # out = 'double' # else: # out = 'single' out = 'float64' elif cfmt_str[-1] in ['d', 'i']: if 'hh' in cfmt_str: # short short, single char out = 'int8' elif cfmt_str[-2] == 'h': # short out = 'short' elif ('ll' in cfmt_str) or ('l64' in cfmt_str): out = 'longlong' # long long elif cfmt_str[-2] == 'l': out = 'int_' # long (broken in python) else: out = 'intc' # int, platform dependent elif cfmt_str[-1] in ['u', 'o', 'x', 'X']: if 'hh' in cfmt_str: # short short, single char out = 'uint8' elif cfmt_str[-2] == 'h': # short out = 'ushort' elif ('ll' in cfmt_str) or ('l64' in cfmt_str): out = 'ulonglong' # long long elif cfmt_str[-2] == 'l': out = 'uint64' # long (broken in python) else: out = 'uintc' # int, platform dependent elif cfmt_str[-1] in ['c', 's']: lstr = cfmt_str[1:-1] if lstr: lint = int(lstr) else: lint = 0 lsiz = lint * np.dtype(backwards.np_dtype_str + '1').itemsize out = '%s%d' % (backwards.np_dtype_str, lsiz) else: raise ValueError("Could not find match for format str %s" % cfmt) return np.dtype(out)
def func_deserialize(self, args): # pragma: no cover r"""Method that deserializes using eval.""" if len(args) == 0: return [] x = eval(backwards.as_str(args)) return x
def _func_deserialize(self, args): # pragma: no cover r"""Method that deserializes using eval.""" if len(args) == 0: return self.testing_options['empty'] x = eval(backwards.as_str(args)) return x
def ygginfo(): r"""Print information about yggdrasil installation.""" from yggdrasil import __version__, tools, config, backwards, platform from yggdrasil.components import import_component lang_list = tools.get_installed_lang() prefix = ' ' curr_prefix = '' vardict = [ ('Location', os.path.dirname(__file__)), ('Version', __version__), ('Languages', ', '.join(lang_list)), ('Communication Mechanisms', ', '.join(tools.get_installed_comm())), ('Default Comm Mechanism', tools.get_default_comm()), ('Config File', config.usr_config_file)] parser = argparse.ArgumentParser( description='Display information about the current yggdrasil installation.') parser.add_argument('--no-languages', action='store_true', dest='no_languages', help='Don\'t print information about individual languages.') parser.add_argument('--verbose', action='store_true', help='Increase the verbosity of the printed information.') args = parser.parse_args() try: # Add language information if not args.no_languages: # Install languages vardict.append(('Installed Languages:', '')) curr_prefix += prefix for lang in sorted(lang_list): drv = import_component('model', lang) vardict.append((curr_prefix + '%s:' % lang.upper(), '')) curr_prefix += prefix if lang == 'executable': vardict.append((curr_prefix + 'Location', '')) else: exec_name = drv.language_executable() if not os.path.isabs(exec_name): exec_name = tools.which(exec_name) vardict.append((curr_prefix + 'Location', exec_name)) vardict.append((curr_prefix + 'Version', drv.language_version())) curr_prefix = curr_prefix.rsplit(prefix, 1)[0] curr_prefix = curr_prefix.rsplit(prefix, 1)[0] # Not installed languages vardict.append(("Languages Not Installed:", '')) curr_prefix += prefix for lang in tools.get_supported_lang(): if lang in lang_list: continue drv = import_component('model', lang) vardict.append((curr_prefix + '%s:' % lang.upper(), '')) curr_prefix += prefix vardict.append((curr_prefix + "Language Installed", drv.is_language_installed())) vardict.append((curr_prefix + "Dependencies Installed", drv.are_dependencies_installed())) vardict.append((curr_prefix + "Interface Installed", drv.is_interface_installed())) vardict.append((curr_prefix + "Comm Installed", drv.is_comm_installed())) vardict.append((curr_prefix + "Configured", drv.is_configured())) curr_prefix = curr_prefix.rsplit(prefix, 1)[0] curr_prefix = curr_prefix.rsplit(prefix, 1)[0] # Add verbose information if args.verbose: # Conda info if os.environ.get('CONDA_PREFIX', ''): out = backwards.as_str(subprocess.check_output( ['conda', 'info'])).strip() curr_prefix += prefix vardict.append((curr_prefix + 'Conda Info:', "\n%s%s" % (curr_prefix + prefix, ("\n" + curr_prefix + prefix).join( out.splitlines(False))))) curr_prefix = curr_prefix.rsplit(prefix, 1)[0] # R and reticulate info Rdrv = import_component("model", "R") if Rdrv.is_installed(): env_reticulate = copy.deepcopy(os.environ) env_reticulate['RETICULATE_PYTHON'] = sys.executable # Stack size out = Rdrv.run_executable(["-e", "Cstack_info()"]).strip() vardict.append((curr_prefix + "R Cstack_info:", "\n%s%s" % (curr_prefix + prefix, ("\n" + curr_prefix + prefix).join( out.splitlines(False))))) # Compilation tools interp = 'R'.join(Rdrv.get_interpreter().rsplit('Rscript', 1)) vardict.append((curr_prefix + "R C Compiler:", "")) curr_prefix += prefix for x in ['CC', 'CFLAGS', 'CXX', 'CXXFLAGS']: out = backwards.as_str(subprocess.check_output( [interp, 'CMD', 'config', x])).strip() vardict.append((curr_prefix + x, "%s" % ("\n" + curr_prefix + prefix).join( out.splitlines(False)))) curr_prefix = curr_prefix.rsplit(prefix, 1)[0] # Session info out = Rdrv.run_executable(["-e", "sessionInfo()"]).strip() vardict.append((curr_prefix + "R sessionInfo:", "\n%s%s" % (curr_prefix + prefix, ("\n" + curr_prefix + prefix).join( out.splitlines(False))))) # Reticulate conda_list if os.environ.get('CONDA_PREFIX', ''): out = Rdrv.run_executable( ["-e", ("library(reticulate); " "reticulate::conda_list()")], env=env_reticulate).strip() vardict.append((curr_prefix + "R reticulate::conda_list():", "\n%s%s" % (curr_prefix + prefix, ("\n" + curr_prefix + prefix).join( out.splitlines(False))))) # Windows python versions if platform._is_win: # pragma: windows out = Rdrv.run_executable( ["-e", ("library(reticulate); " "reticulate::py_versions_windows()")], env=env_reticulate).strip() vardict.append((curr_prefix + "R reticulate::py_versions_windows():", "\n%s%s" % (curr_prefix + prefix, ("\n" + curr_prefix + prefix).join( out.splitlines(False))))) # conda_binary if platform._is_win: # pragma: windows out = Rdrv.run_executable( ["-e", ("library(reticulate); " "conda <- reticulate:::conda_binary(\"auto\"); " "system(paste(conda, \"info --json\"))")], env=env_reticulate).strip() vardict.append((curr_prefix + "R reticulate::py_versions_windows():", "\n%s%s" % (curr_prefix + prefix, ("\n" + curr_prefix + prefix).join( out.splitlines(False))))) # Reticulate py_config out = Rdrv.run_executable(["-e", ("library(reticulate); " "reticulate::py_config()")], env=env_reticulate).strip() vardict.append((curr_prefix + "R reticulate::py_config():", "\n%s%s" % (curr_prefix + prefix, ("\n" + curr_prefix + prefix).join( out.splitlines(False))))) finally: # Print things max_len = max(len(x[0]) for x in vardict) lines = [] line_format = '%-' + str(max_len) + 's' + prefix + '%s' for k, v in vardict: lines.append(line_format % (k, v)) logger.info("yggdrasil info:\n%s" % '\n'.join(lines))
def set_env(cls, logging_level=None, language=None, language_driver=None, **kwargs): r"""Get environment variables that should be set for the model process. Args: logging_level (int, optional): Logging level that should be passed to get flags. language (str, optional): Language that is being compiled. Defaults to the first language in cls.languages that isn't toolname. language_driver (ModelDriver, optional): Driver for language that should be used. Defaults to None and will be imported based on language. **kwargs: Additional keyword arguments are passed to the parent class's method. Returns: dict: Environment variables for the model process. """ out = super(MakeCompiler, cls).set_env(**kwargs) if language is None: # This should be the first language that is not the build tool language = cls.languages[1] drv = language_driver if drv is None: drv = components.import_component('model', language) compiler = drv.get_tool('compiler') compile_flags = drv.get_compiler_flags(for_model=True, skip_defaults=True, dont_skip_env_defaults=True, logging_level=logging_level, dont_link=True) linker = drv.get_tool('linker') linker_flags = drv.get_linker_flags(for_model=True, skip_defaults=True, dont_skip_env_defaults=True) for k in [ 'env_compiler', 'env_compiler_flags', 'env_linker', 'env_linker_flags' ]: kwargs.setdefault(k, cls._schema_properties[k]['default']) out[kwargs['env_compiler']] = backwards.as_str( compiler.get_executable()) out[kwargs['env_compiler_flags']] = backwards.as_str( ' '.join(compile_flags)) # yggdrasil requires that linking be done in C++ if (((compiler.languages[0].lower() == 'c') and ('-lstdc++' not in linker_flags))): linker_flags.append('-lstdc++') out[kwargs['env_linker_flags']] = backwards.as_str( ' '.join(linker_flags)) if kwargs['env_compiler'] != kwargs['env_linker']: # pragma: debug out[kwargs['env_linker']] = backwards.as_str( linker.get_executable()) raise NotImplementedError( "Functionality allowing linker to be specified " "in a separate environment variable from the " "compiler is untested.") return out
class PlySerialize(SerializeBase): r"""Class for serializing/deserializing .ply file formats. Args: write_header (bool, optional): If True, headers will be added to serialized output. Defaults to True. newline (str, optional): String that should be used for new lines. Defaults to '\n'. Attributes: write_header (bool): If True, headers will be added to serialized output. newline (str): String that should be used for new lines. default_rgb (list): Default color in RGB that should be used for missing colors. """ _seritype = 'ply' _schema_subtype_description = ('Serialize 3D structures using Ply format.') _schema_properties = { 'newline': { 'type': 'string', 'default': backwards.as_str(_default_newline) } } default_datatype = {'type': 'ply'} concats_as_str = False def __init__(self, *args, **kwargs): r"""Initialize immediately as default is only type.""" super(PlySerialize, self).__init__(*args, **kwargs) self.initialized = True def func_serialize(self, args): r"""Serialize a message. Args: args: List of arguments to be formatted or numpy array to be serialized. Returns: bytes, str: Serialized message. """ return backwards.as_bytes(self.datatype.encode_data( args, self.typedef)) def func_deserialize(self, msg): r"""Deserialize a message. Args: msg: Message to be deserialized. Returns: obj: Deserialized message. """ return PlyDict( self.datatype.decode_data(backwards.as_str(msg), self.typedef)) @classmethod def concatenate(cls, objects, **kwargs): r"""Concatenate objects to get object that would be recieved if the concatenated serialization were deserialized. Args: objects (list): Objects to be concatenated. **kwargs: Additional keyword arguments are ignored. Returns: list: Set of objects that results from concatenating those provided. """ if len(objects) == 0: return [] total = objects[0] for x in objects[1:]: total = total.merge(x) return [total] @classmethod def get_testing_options(cls): r"""Method to return a dictionary of testing options for this class. Returns: dict: Dictionary of variables to use for testing. """ out = super(PlySerialize, cls).get_testing_options() obj = PlyDict({ 'vertices': [{ 'x': float(0), 'y': float(0), 'z': float(0) }, { 'x': float(0), 'y': float(0), 'z': float(1) }, { 'x': float(0), 'y': float(1), 'z': float(1) }], 'faces': [{ 'vertex_index': [int(0), int(1), int(2)] }] }) out.update( objects=[obj, obj], empty=dict(vertices=[], faces=[]), contents=(b'ply\n' + b'format ascii 1.0\n' + b'comment author ygg_auto\n' + b'comment File generated by yggdrasil\n' + b'element vertex 6\n' + b'property double x\n' + b'property double y\n' + b'property double z\n' + b'element face 2\nproperty list uchar int vertex_index\n' + b'end_header\n' + b'0.0000 0.0000 0.0000\n' + b'0.0000 0.0000 1.0000\n' + b'0.0000 1.0000 1.0000\n' + b'0.0000 0.0000 0.0000\n' + b'0.0000 0.0000 1.0000\n' + b'0.0000 1.0000 1.0000\n' + b'3 0 1 2\n' + b'3 3 4 5\n')) out['concatenate'] = [([], [])] return out