def parse(self, buff, delimiter=DELIMITER, separator=SEPARATOR): """ Parse a FIX message. The FIX message is expected to be a bytestring and the output is a dictionary-like object which type is determined by the ``fragment_class`` constructor argument and which keys are ``int`` and values ``unicode``. Note that if there is a non-int tag in the message, this will be stored as a key in the original format (i.e. bytestring) :param buff: Buffer to parse :type buff: ``bytestr`` or ``unicode`` :param delimiter: A character that separate key and values inside the FIX message. Generally '='. Note the type: because of the way the buffer is tokenised, this needs to be unicode (or ``str`` in python 2.7*). :type delimiter: ``unicode`` :param separator: A character that separate key+value pairs inside the FIX message. Generally '\1'. See type observations above. :type separator: ``unicode`` """ def pushback_generator(iterator): """ Generator which allows to push back a previously picked item for example: gen = pushback_generator(range(10)) print next(gen) print next(gen) v = next(gen) print v gen.send(v) print next(gen) :param iterator: :return: """ for value in iterator: back = yield value if back is not None: yield back yield back assert not (delimiter.isalnum() or separator.isalnum()) encoding, encoding_347 = self.encoding, None input_in_unicode = False msg_type = None if isinstance(buff, six.text_type): input_in_unicode = True custom_r = re.compile( six.ensure_text(FIX_REGEX_STRING.format( d=re.escape(delimiter), s=re.escape(separator)), encoding='ascii'), re.DOTALL) if self.encoding is not None: encoding = None # No need to decode warnings.warn( 'Processing a unicode message and ignore the argument "decode_as={}"' .format(self.encoding)) if self.decode_all_as_347: warnings.warn( 'Processing a unicode message and ignore the argument "decode_all_as_347={}"' .format(self.decode_all_as_347)) elif isinstance(buff, bytes): custom_r = re.compile( six.ensure_binary(FIX_REGEX_STRING.format( d=re.escape(delimiter), s=re.escape(separator)), encoding='ascii'), re.DOTALL) else: raise ValueError('Unsupported type of input: {}'.format( type(buff))) tagvals = custom_r.findall(buff) if not self._no_groups and self.spec is not None: for i in range(4): if tagvals[i][0] in (b'35', u'35'): msg_type = self.spec.msg_types.get(tagvals[i][1]) if not input_in_unicode: for tag, val in tagvals: if int_or_str(tag) == 347: encoding_347 = six.ensure_str(val) break if six.ensure_str( tag ) not in HEADER_TAGS_SET: # already enter the message body break if self.decode_all_as_347 and encoding_347: tagvals = ((int_or_str(tval[0], encoding_347), six.ensure_text(tval[1], encoding_347)) for tval in tagvals) elif encoding: tagvals = ((int_or_str(tval[0], encoding), six.ensure_text( tval[1], (encoding_347 if encoding_347 and tval[0].decode() in ENCODED_TAG_SET else encoding))) for tval in tagvals) elif not input_in_unicode and six.PY3: tagvals = ((int_or_str(tval[0], 'ascii'), six.ensure_text( tval[1], (encoding_347 if encoding_347 and tval[0].decode() in ENCODED_TAG_SET else 'UTF-8'))) for tval in tagvals) elif input_in_unicode and six.PY2: tagvals = ((int_or_str(six.ensure_binary(tval[0]), 'ascii'), six.ensure_binary( tval[1], (encoding_347 if encoding_347 and tval[0].encode() in ENCODED_TAG_SET else 'UTF-8'))) for tval in tagvals) else: tagvals = ((int_or_str(tval[0]), tval[1]) for tval in tagvals) if self._no_groups or self.spec is None or msg_type is None: # no groups can be found without a spec, so no point looking up the msg type. return self._frg_class(tagvals) msg = self._frg_class() groups = msg_type.groups tagvals = pushback_generator(tagvals) for tag, value in tagvals: if tag not in groups: msg[tag] = value else: if value in (b'0', u'0'): msg[tag] = RepeatingGroup.create_repeating_group(tag) else: contents, last_tagval = self._process_group( tag, tagvals, msg_type=msg_type, group=groups[tag]) msg[tag] = contents if last_tagval: tagvals.send(last_tagval) return msg
def _process_group(self, identifying_tag, enumerator, msg_type, group): """ Recursively process a group Returns ``(count_tag, [{}, {}])`` """ rep_group = RepeatingGroup() rep_group.number_tag = identifying_tag member = self._frg_class() first_tag = None inner_groups = group.groups valid_tags = group.tags for tag, value in enumerator: if first_tag is None: # handle first tag: we expect all the members of the group to start with this tag first_tag = tag rep_group.first_tag = tag member[tag] = value elif first_tag == tag: # we start a new group, replace the current member by an empty one and add the current tag rep_group.append(member) member = self._frg_class() member[tag] = value elif tag in valid_tags: # tag is a member, we just add member[tag] = value elif tag in inner_groups: # tag is starting a new sub group, we recurse contents, last_tagval = self._process_group( tag, enumerator, msg_type, group.groups[tag]) member[tag] = contents if last_tagval: # we are not at the end of the message. tag, val = last_tagval if tag == first_tag: # the embedded group finished this member rep_group.append(member) member = self._frg_class() member[tag] = val elif tag in group.tags: # didn't finish this member member[tag] = val else: # didn't finish the message but finished the current group rep_group.append(member) return rep_group, (tag, val) else: # we're out of the group. rep_group.append(member) return rep_group, (tag, value) # we are reaching the end of the message, so complete, no further tags to pass on rep_group.append(member) return rep_group, None
def parse(self, buff, delimiter=DELIMITER, separator=SEPARATOR): """ Parse a FIX message. The FIX message is expected to be a bytestring and the output is a dictionary-like object which type is determined by the ``fragment_class`` constructor argument and which keys are ``int`` and values ``unicode``. Note that if there is a non-int tag in the message, this will be stored as a key in the original format (i.e. bytestring) :param buff: Buffer to parse :type buff: ``bytestr`` :param delimiter: A character that separate key and values inside the FIX message. Generally '='. Note the type: because of the way the buffer is tokenised, this needs to be unicode (or ``str`` in python 2.7*). :type delimiter: ``unicode`` :param separator: A character that separate key+value pairs inside the FIX message. Generally '\1'. See type observations above. :type separator: ``unicode`` """ def pushback_generator(iterator): """ Generator which allows to push back a previously picked item for example: gen = pushback_generator(range(10)) print next(gen) print next(gen) v = next(gen) print v gen.send(v) print next(gen) :param iterator: :return: """ for value in iterator: back = yield value if back is not None: yield back yield back custom_r = re.compile( FIX_REGEX_STRING.format(d=re.escape(delimiter), s=re.escape(separator)).encode('UTF-8'), re.DOTALL) tagvals = custom_r.findall(buff) msg_type = None if not self._no_groups and self.spec is not None: for i in range(4): if tagvals[i][0] == b'35': msg_type = self.spec.msg_types.get(tagvals[i][1]) if self.encoding is not None: tagvals = ((int_or_str(tval[0], self.encoding), tval[1].decode(self.encoding)) for tval in tagvals) elif self.decode_all_as_347: tagvals = [(int_or_str(tval[0]), tval[1]) for tval in tagvals] encoding = None for tag, val in tagvals: if tag == 347: encoding = val.decode('UTF-8') break if encoding is not None: tagvals = ((t[0], t[1].decode(encoding)) for t in tagvals) else: tagvals = ((int_or_str(tval[0]), tval[1]) for tval in tagvals) # Need to add logic to parse Encoded* tags according to 347 if self._no_groups or self.spec is None or msg_type is None: # no groups can be found without a spec, so no point looking up the msg type. return self._frg_class(tagvals) msg = self._frg_class() groups = msg_type.groups tagvals = pushback_generator(tagvals) for tag, value in tagvals: if tag not in groups: msg[tag] = value else: if value == '0': msg[tag] = RepeatingGroup.create_repeating_group(tag) else: contents, last_tagval = self._process_group( tag, tagvals, msg_type=msg_type, group=groups[tag]) msg[tag] = contents if last_tagval: tagvals.send(last_tagval) return msg