def _etree2botstree(self, xmlnode): """ recursive. """ newnode = node.Node(record=self._etreenode2botstreenode( xmlnode)) # make new node, use fields for xmlchildnode in xmlnode: # for every node in mpathtree entitytype = self._entitytype(xmlchildnode) if not entitytype: # is a field, or unknown that looks like a field if xmlchildnode.text: # if xml element has content, add as field newnode.record[ xmlchildnode.tag] = xmlchildnode.text # add as a field # convert the xml-attributes of this 'xml-filed' to fields in dict with attributemarker. newnode.record.update( (xmlchildnode.tag + self.ta_info["attributemarker"] + key, value) for key, value in xmlchildnode.items() if value) elif entitytype == 1: # childnode is a record according to grammar # go recursive and add child (with children) as a node/record newnode.append(self._etree2botstree(xmlchildnode)) self.stack.pop( ) # handled the xmlnode, so remove it from the stack else: # is a record, but not in grammar if self.ta_info["checkunknownentities"]: self.add2errorlist( _('[S02]%(linpos)s: Unknown xml-tag "%(recordunkown)s" (within "%(record)s") in message.\n' ) % { "linpos": newnode.linpos(), "recordunkown": xmlchildnode.tag, "record": newnode.record["BOTSID"], }) continue return newnode # return the new node
def putloop(self, *mpaths): if not self.root.record: # no input yet, and start with a putloop(): dummy root if len(mpaths) == 1: self.root.append(node.Node(record=mpaths[0])) return self.root.children[-1] else: raise MappingRootError( _("putloop(%(mpath)s): mpath too long???"), {"mpath": mpaths}) return self.root.putloop(*mpaths)
def initfromfile(self): self.messagegrammarread(typeofgrammarfile="grammars") self._readcontent_edifile() jsonobject = simplejson.loads(self.rawinput) del self.rawinput if isinstance(jsonobject, list): self.root = node.Node() # initialise empty node. self.root.children = self._dojsonlist( jsonobject, self._getrootid()) # fill root with children for child in self.root.children: if not child.record: # sanity test: the children must have content raise InMessageError(_("[J51]: No usable content.")) self.checkmessage(child, self.defmessage) self.ta_info.update(child.queries) elif isinstance(jsonobject, dict): if len(jsonobject) == 1 and isinstance( list(jsonobject.values())[0], dict): # best structure: {rootid:{id2:<dict, list>}} self.root = self._dojsonobject( list(jsonobject.values())[0], list(jsonobject.keys())[0]) elif len(jsonobject) == 1 and isinstance( list(jsonobject.values())[0], list): # root dict has no name; use value from grammar for rootID; {id2:<dict, list>} self.root = node.Node(record={"BOTSID": self._getrootid() }) # initialise empty node. self.root.children = self._dojsonlist( list(jsonobject.values())[0], list(jsonobject.keys())[0]) else: self.root = self._dojsonobject(jsonobject, self._getrootid()) if not self.root: raise InMessageError(_("[J52]: No usable content.")) self.checkmessage(self.root, self.defmessage) self.ta_info.update(self.root.queries) else: # root in JSON is neither dict or list. raise InMessageError( _('[J53]: Content must be a "list" or "object".'))
def initfromfile(self): """ Initialisation from a edi file. """ self.messagegrammarread(typeofgrammarfile="grammars") # **charset errors, lex errors # open file. variants: read with charset, read as binary & handled in sniff, only opened and read in _lex. self._readcontent_edifile() self._sniff( ) # some hard-coded examination of edi file; ta_info can be overruled by syntax-parameters in edi-file # start lexing self._lex() # lex preprocessing via user exit indicated in syntax preprocess_lex = self.ta_info.get("preprocess_lex", False) if callable(preprocess_lex): preprocess_lex(lex=self.lex_records, ta_info=self.ta_info) if hasattr(self, "rawinput"): del self.rawinput # **breaking parser errors self.root = node.Node() # make root Node None. self.iternext_lex_record = iter(self.lex_records) leftover = self._parse(structure_level=self.defmessage.structure, inode=self.root) if leftover: raise InMessageError( _("[A50] line %(line)s pos %(pos)s: Found non-valid data at end of edi file; probably a problem with separators or message structure." ), { "line": leftover[0][LIN], "pos": leftover[0][POS] }, ) # probably not reached with edifact/x12 because of mailbag processing. del self.lex_records # self.root is now root of a tree (of nodes). # **non-breaking parser errors self.checkenvelope() self.checkmessage(self.root, self.defmessage) # get queries-dict for parsed message; this is used to update in database if self.root.record: self.ta_info.update(self.root.queries) else: for childnode in self.root.children: self.ta_info.update(childnode.queries) break
def _dojsonobject(self, jsonobject, name): thisnode = node.Node(record={"BOTSID": name}) # initialise empty node. for key, value in jsonobject.items(): if value is None: continue elif isinstance(value, str): # json field; map to field in node.record ## for generating grammars: empty strings should generate a field if value and not value.isspace( ): # use only if string has a value. thisnode.record[key] = value elif isinstance(value, dict): newnode = self._dojsonobject(value, key) if newnode: thisnode.append(newnode) elif isinstance(value, list): thisnode.children.extend(self._dojsonlist(value, key)) elif isinstance( value, (int, float)): # json field; map to field in node.record thisnode.record[key] = str(value) else: if self.ta_info["checkunknownentities"]: raise InMessageError( _('[J55]: Key "%(key)s" value "%(value)s": is not string, list or dict.' ), { "key": key, "value": value }, ) thisnode.record[key] = str(value) if len(thisnode.record) == 2 and not thisnode.children: return None # node is empty... # ~ thisnode.record['BOTSID']=name return thisnode
def nextmessage(self): """ Passes each 'message' to the mapping script. """ # node preprocessing via user exit indicated in syntax preprocess_nodes = self.ta_info.get("preprocess_nodes", False) if callable(preprocess_nodes): preprocess_nodes(thisnode=self) if (self.defmessage.nextmessage is not None ): # if nextmessage defined in grammar: split up messages # first: count number of messages self.ta_info[ "total_number_of_messages"] = self.getcountoccurrences( *self.defmessage.nextmessage) # yield the messages, using nextmessage count = 0 self.root.processqueries({}, len(self.defmessage.nextmessage)) # eachmessage is a list: [mpath,mpath, etc, node] for eachmessage in self.getloop_including_mpath( *self.defmessage.nextmessage): count += 1 ta_info = self.ta_info.copy() ta_info.update(eachmessage[-1].queries) ta_info["message_number"] = count ta_info[ "bots_accessenvelope"] = self.root # give mappingscript access to envelope yield self._initmessagefromnode(eachmessage[-1], ta_info, eachmessage[:-1]) if (self.defmessage.nextmessage2 is not None): # edifact uses nextmessage2 for UNB-UNG # first: count number of messages self.ta_info[ "total_number_of_messages"] = self.getcountoccurrences( *self.defmessage.nextmessage2) # yield the messages, using nextmessage2 self.root.processqueries({}, len(self.defmessage.nextmessage2)) count = 0 # eachmessage is a list: [mpath,mpath, etc, node] for eachmessage in self.getloop_including_mpath( *self.defmessage.nextmessage2): count += 1 ta_info = self.ta_info.copy() ta_info.update(eachmessage.queries[-1]) ta_info["message_number"] = count ta_info[ "bots_accessenvelope"] = self.root # give mappingscript access to envelope yield self._initmessagefromnode(eachmessage[-1], ta_info, eachmessage[:-1]) # for csv/fixed: nextmessageblock indicates which field(s) determines a message elif self.defmessage.nextmessageblock is not None: # --> as long as the field(s) has same value, it is the same message # note there is only one recordtype (as checked in grammar.py) # first: count number of messages count = 0 for line in self.root.children: kriterium = line.enhancedget(self.defmessage.nextmessageblock) if not count: count += 1 oldkriterium = kriterium elif kriterium != oldkriterium: count += 1 oldkriterium = kriterium # ~ else: # ~ pass #if kriterium is the same self.ta_info["total_number_of_messages"] = count # yield the messages, using nextmessageblock count = 0 for line in self.root.children: kriterium = line.enhancedget(self.defmessage.nextmessageblock) if not count: count += 1 oldkriterium = kriterium newroot = node.Node() # make new empty root node. elif kriterium != oldkriterium: count += 1 oldkriterium = kriterium ta_info = self.ta_info.copy() ta_info.update( oldline.queries ) # update ta_info with information (from previous line) 20100905 ta_info["message_number"] = count yield self._initmessagefromnode(newroot, ta_info) newroot = node.Node() # make new empty root node. else: pass # if kriterium is the same newroot.append(line) oldline = line # save line 20100905 else: if count: # not if count is zero (that is, if there are no lines) ta_info = self.ta_info.copy() ta_info.update( line.queries ) # update ta_info with information (from last line) 20100904 ta_info["message_number"] = count yield self._initmessagefromnode(newroot, ta_info) else: # no split up is indicated in grammar. Normally you really would... # if contains root-record or explicitly indicated (csv): pass whole tree if self.root.record or self.ta_info.get("pass_all", False): ta_info = self.ta_info.copy() ta_info.update(self.root.queries) ta_info["total_number_of_messages"] = 1 ta_info["message_number"] = 1 ta_info[ "bots_accessenvelope"] = self.root # give mappingscript access to envelop yield self._initmessagefromnode(self.root, ta_info) else: # pass nodes under root one by one # first: count number of messages total_number_of_messages = len(self.root.children) # yield the messages count = 0 for child in self.root.children: count += 1 ta_info = self.ta_info.copy() ta_info.update(child.queries) ta_info[ "total_number_of_messages"] = total_number_of_messages ta_info["message_number"] = count ta_info[ "bots_accessenvelope"] = self.root # give mappingscript access to envelope yield self._initmessagefromnode(child, ta_info)
def _parse(self, structure_level, inode): """ This is the heart of the parsing of incoming messages (but not for xml, json) Read the lex_records one by one (self.iternext_lex_record, is an iterator) - parse the records. - identify record (lookup in structure) - identify fields in the record (use the record_definition from the grammar). - add grammar-info to records: field-tag,mpath. Parameters: - structure_level: current grammar/segmentgroup of the grammar-structure. - inode: parent node; all parsed records are added as children of inode 2x recursive: SUBTRANSLATION and segmentgroups """ structure_index = 0 # keep track of where we are in the structure_level countnrofoccurences = 0 # number of occurences of current record in structure structure_end = len(structure_level) # indicate if the next record should be fetched, or if the current_lex_record is still being parsed. get_next_lex_record = True # it might seem logical to test here 'current_lex_record is None', but # this is already used to indicate 'no more records'. while True: if get_next_lex_record: try: current_lex_record = next(self.iternext_lex_record) except StopIteration: # catch when no more lex_record. current_lex_record = None get_next_lex_record = False if (current_lex_record is None or structure_level[structure_index][ID] != current_lex_record[ID][VALUE]): # is record is required in structure_level, and countnrofoccurences==0: error; if structure_level[structure_index][ MIN] and not countnrofoccurences: # enough check here; message is # validated more accurate later try: raise InMessageError( self.messagetypetxt + _('[S50]: Line:%(line)s pos:%(pos)s record:"%(record)s": message has an error in its structure; this record is not allowed here. Scanned in message definition until mandatory record: "%(looked)s".' ), { "record": current_lex_record[ID][VALUE], "line": current_lex_record[ID][LIN], "pos": current_lex_record[ID][POS], "looked": self.mpathformat( structure_level[structure_index][MPATH]), }, ) except TypeError: # when no UNZ (edifact) raise InMessageError( self.messagetypetxt + _('[S51]: Missing mandatory record "%(record)s".'), { "record": self.mpathformat( structure_level[structure_index][MPATH]) }, ) structure_index += 1 if (structure_index == structure_end ): # current_lex_record is not in this level. Go level up # if on 'first level': give specific error if (current_lex_record is not None and structure_level == self.defmessage.structure): raise InMessageError( self.messagetypetxt + _('[S50]: Line:%(line)s pos:%(pos)s record:"%(record)s": message has an error in its structure; this record is not allowed here. Scanned in message definition until mandatory record: "%(looked)s".' ), { "record": current_lex_record[ID][VALUE], "line": current_lex_record[ID][LIN], "pos": current_lex_record[ID][POS], "looked": self.mpathformat( structure_level[structure_index - 1][MPATH]), }, ) # return either None (no more lex_records to parse) or the last # current_lex_record (the last current_lex_record is not found in this # level) return current_lex_record countnrofoccurences = 0 continue # continue while-loop: get_next_lex_record is false as no match with structure is made; go and look at next record of structure # record is found in grammar countnrofoccurences += 1 newnode = node.Node( record=self._parsefields(current_lex_record, structure_level[structure_index]), linpos_info=(current_lex_record[0][LIN], current_lex_record[0][POS]), ) # make new node inode.append( newnode ) # succes! append new node as a child to current (parent)node if SUBTRANSLATION in structure_level[structure_index]: # start a SUBTRANSLATION; find the right messagetype, etc messagetype = newnode.enhancedget( structure_level[structure_index][SUBTRANSLATION]) if not messagetype: raise TranslationNotFoundError( _('Could not find SUBTRANSLATION "%(sub)s" in (sub)message.' ), { "sub": structure_level[structure_index][SUBTRANSLATION] }, ) messagetype = self._manipulatemessagetype(messagetype, inode) try: defmessage = grammar.grammarread( self.__class__.__name__, messagetype, typeofgrammarfile="grammars", ) except BotsImportError: raisenovalidmapping_error = True if hasattr(self.defmessage.module, "getmessagetype"): messagetype2 = runscript( self.defmessage.module, self.defmessage.grammarname, "getmessagetype", editype=self.__class__.__name__, messagetype=messagetype, ) if messagetype2: try: defmessage = grammar.grammarread( self.__class__.__name__, messagetype2, typeofgrammarfile="grammars", ) raisenovalidmapping_error = False except BotsImportError: pass if raisenovalidmapping_error: raise TranslationNotFoundError( _('No (valid) grammar for editype "%(editype)s" messagetype "%(messagetype)s".' ), { "editype": self.__class__.__name__, "messagetype": messagetype, }, ) self.messagecount += 1 self.messagetypetxt = _( "Message nr %(count)s, type %(type)s, " % { "count": self.messagecount, "type": messagetype }) current_lex_record = self._parse( structure_level=defmessage.structure[0][LEVEL], inode=newnode) # copy messagetype into 1st segment of subtranslation (eg UNH, ST) newnode.queries = {"messagetype": messagetype} newnode.queries.update(defmessage.syntax) # ~ newnode.queries = defmessage.syntax.copy() #if using this line instead of previous 2: gives errors eg in incoming edifact...do not understand why self.checkmessage(newnode, defmessage, subtranslation=True ) # check the results of the subtranslation # ~ end SUBTRANSLATION self.messagetypetxt = "" # get_next_lex_record is still False; we are trying to match the last (not # matched) record from the SUBTRANSLATION (named 'current_lex_record'). else: if (LEVEL in structure_level[structure_index] ): # if header, go parse segmentgroup (recursive) current_lex_record = self._parse( structure_level=structure_level[structure_index] [LEVEL], inode=newnode, ) # get_next_lex_record is still False; the current_lex_record that was not # matched in lower segmentgroups is still being parsed. else: get_next_lex_record = True # accomodate for UNS = UNS construction if (structure_level[structure_index][MIN] == structure_level[structure_index][MAX] == countnrofoccurences): if structure_index + 1 == structure_end: pass else: structure_index += 1 countnrofoccurences = 0
def initfromfile(self): """ initialisation from an excel file. file is first converted to csv using python module xlrd """ try: self.xlrd = botsbaseimport("xlrd") except ImportError: raise ImportError( _('Dependency failure: editype "excel" requires python library "xlrd".' )) import csv as csvlib try: import StringIO except: import io as StringIO self.messagegrammarread(typeofgrammarfile="grammars") self.ta_info["charset"] = self.defmessage.syntax[ "charset"] # always use charset of edi file. if self.ta_info["escape"]: doublequote = False else: doublequote = True logger.debug('Read edi file "%(filename)s".', self.ta_info) # xlrd reads excel file; python's csv modules write this to file-like # StringIO (as utf-8); read StringIO as self.rawinput; decode this # (utf-8->str) infilename = abspathdata(self.ta_info["filename"]) try: xlsdata = self.read_xls(infilename) except: txt = txtexc() logger.error( _("Excel extraction failed, may not be an Excel file? Error:\n%(txt)s" ), {"txt": txt}, ) raise InMessageError( _("Excel extraction failed, may not be an Excel file? Error:\n%(txt)s" ), {"txt": txt}, ) rawinputfile = StringIO.StringIO() csvout = csvlib.writer( rawinputfile, quotechar=self.ta_info["quote_char"], delimiter=self.ta_info["field_sep"], doublequote=doublequote, escapechar=self.ta_info["escape"], ) csvout.writerows(map(self.utf8ize, xlsdata)) rawinputfile.seek(0) self.rawinput = rawinputfile.read() rawinputfile.close() self.rawinput = self.rawinput.decode("utf-8") # start lexing and parsing as csv self._lex() if hasattr(self, "rawinput"): del self.rawinput self.root = node.Node() # make root Node None. self.iternext_lex_record = iter(self.lex_records) leftover = self._parse(structure_level=self.defmessage.structure, inode=self.root) if leftover: raise InMessageError( _('[A52]: Found non-valid data at end of excel file: "%(leftover)s".' ), {"leftover": leftover}, ) del self.lex_records self.checkmessage(self.root, self.defmessage)
def __init__(self, ta_info): super(OutMessage, self).__init__(ta_info) # message tree; build via put()-interface in mappingscript. Initialise with empty dict self.root = node.Node(record={}) self._outstream: EdiFile = None