def start_element(self, name, attrs): """ Function called by the parser every time a new element starts """ # Keep track if we are parsing the main job element if name == "mainjob": self._parsing_main_job = True if name == "machine": self._parsing_machine = True # Keep track if we are inside one of the job elements if (name == "setup" or name == "prejob" or name == "mainjob" or name == "postjob" or name == "cleanup"): self._parsing_job_element = True if name == "argument-vector" and name in self._ks_elements: # Start parsing arguments self._parsing_arguments = True elif name == "cwd" and name in self._ks_elements: # Start parsing cwd self._parsing_cwd = True elif name == "checksum" and name in self._ks_elements: # PM-1180 <checksum type="sha256" value="f2307670158c64c4407971f8fad67772724b0bad92bfb48f386b0814ba24e3af"/> self._keys[name] = {} for attr_name in self._ks_elements[name]: if attr_name in attrs: self._keys[name][attr_name] = attrs[attr_name] elif name == "data": # Start parsing data for stdout and stderr output self._parsing_data = True elif name == "file" and name in self._ks_elements: if self._parsing_main_job == True: # Special case for name inside the mainjob element (will change this later) for my_element in self._ks_elements[name]: if my_element in attrs: self._keys[my_element] = attrs[my_element] elif name == "ram" and name in self._ks_elements: if self._parsing_machine == True: # Special case for ram inside the machine element (will change this later) for my_element in self._ks_elements[name]: if my_element in attrs: self._keys[my_element] = attrs[my_element] elif name == "uname" and name in self._ks_elements: if self._parsing_machine == True: # Special case for uname inside the machine element (will change this later) for my_element in self._ks_elements[name]: if my_element in attrs: self._keys[my_element] = attrs[my_element] elif name == "signalled": # PM-1109 grab the attributes we are interested in self._keys[name] = {} #a dictionary indexed by attributes self._parsing_signalled = True self._keys[name]["action"] = "" #grabbed later in char data for attr in attrs: if attr in self._ks_elements[name]: self._keys[name][attr] = attrs[attr] elif name == "statcall": if "id" in attrs: if attrs["id"] == "stdout" and "stdout" in self._ks_elements: self._parsing_stdout = True elif attrs["id"] == "stderr" and "stderr" in self._ks_elements: self._parsing_stderr = True elif attrs["id"] == "final": self._parsing_final_statcall = True self._lfn = attrs["lfn"] elif name == "statinfo": if self._parsing_final_statcall is True: statinfo = FileMetadata() for my_element in self._ks_elements[name]: if my_element in attrs: statinfo.add_attribute(my_element, attrs[my_element]) if "outputs" not in self._keys: self._keys["outputs"] = {} #a dictionary indexed by lfn lfn = self._lfn statinfo.set_id(lfn) if lfn is None or not statinfo: logger.warning( "Malformed/Empty stat record for output lfn %s %s" % (lfn, statinfo)) self._keys["outputs"][lfn] = statinfo elif name == "usage" and name in self._ks_elements: if self._parsing_job_element: # Special case for handling utime and stime, which need to be added for my_element in self._ks_elements[name]: if my_element in attrs: if my_element in self._keys: try: self._keys[my_element] = self._keys[ my_element] + float(attrs[my_element]) except ValueError: logger.warning( "cannot convert element %s to float!" % (my_element)) else: try: self._keys[my_element] = float( attrs[my_element]) except ValueError: logger.warning( "cannot convert element %s to float!" % (my_element)) else: # For all other elements, check if we want them if name in self._ks_elements: for my_element in self._ks_elements[name]: if my_element in attrs: self._keys[my_element] = attrs[my_element]
def start_element(self, name, attrs): """ Function called by the parser every time a new element starts """ # Keep track if we are parsing the main job element if name == "mainjob": self._parsing_main_job = True if name == "machine": self._parsing_machine = True # Keep track if we are inside one of the job elements if (name == "setup" or name == "prejob" or name == "mainjob" or name == "postjob" or name == "cleanup"): self._parsing_job_element = True if name == "argument-vector" and name in self._ks_elements: # Start parsing arguments self._parsing_arguments = True elif name == "cwd" and name in self._ks_elements: # Start parsing cwd self._parsing_cwd = True elif name == "checksum" and name in self._ks_elements: # PM-1180 <checksum type="sha256" value="f2307670158c64c4407971f8fad67772724b0bad92bfb48f386b0814ba24e3af"/> self._keys[name] = {} for attr_name in self._ks_elements[name]: if attr_name in attrs: self._keys[ name ] [attr_name] = attrs[attr_name] elif name == "data": # Start parsing data for stdout and stderr output self._parsing_data = True elif name == "file" and name in self._ks_elements: if self._parsing_main_job == True : # Special case for name inside the mainjob element (will change this later) for my_element in self._ks_elements[name]: if my_element in attrs: self._keys[my_element] = attrs[my_element] elif name == "ram" and name in self._ks_elements: if self._parsing_machine == True: # Special case for ram inside the machine element (will change this later) for my_element in self._ks_elements[name]: if my_element in attrs: self._keys[my_element] = attrs[my_element] elif name == "uname" and name in self._ks_elements: if self._parsing_machine == True: # Special case for uname inside the machine element (will change this later) for my_element in self._ks_elements[name]: if my_element in attrs: self._keys[my_element] = attrs[my_element] elif name == "signalled": # PM-1109 grab the attributes we are interested in self._keys[ name ] = {} #a dictionary indexed by attributes self._parsing_signalled = True self._keys[ name ]["action"] = "" #grabbed later in char data for attr in attrs: if attr in self._ks_elements[name]: self._keys[name][attr] = attrs[attr] elif name == "statcall": if "id" in attrs: if attrs["id"] == "stdout" and "stdout" in self._ks_elements: self._parsing_stdout = True elif attrs["id"] == "stderr" and "stderr" in self._ks_elements: self._parsing_stderr = True elif attrs["id"] == "final" : self._parsing_final_statcall = True self._lfn = attrs["lfn"] elif name == "statinfo": if self._parsing_final_statcall is True: statinfo = FileMetadata() for my_element in self._ks_elements[name]: if my_element in attrs: statinfo.add_attribute( my_element, attrs[my_element]) if "outputs" not in self._keys: self._keys[ "outputs" ] = {} #a dictionary indexed by lfn lfn = self._lfn statinfo.set_id( lfn ) if lfn is None or not statinfo: logger.warning( "Malformed/Empty stat record for output lfn %s %s" %(lfn, statinfo)) self._keys["outputs"][lfn] = statinfo elif name == "usage" and name in self._ks_elements: if self._parsing_job_element: # Special case for handling utime and stime, which need to be added for my_element in self._ks_elements[name]: if my_element in attrs: if my_element in self._keys: try: self._keys[my_element] = self._keys[my_element] + float(attrs[my_element]) except ValueError: logger.warning("cannot convert element %s to float!" % (my_element)) else: try: self._keys[my_element] = float(attrs[my_element]) except ValueError: logger.warning("cannot convert element %s to float!" % (my_element)) else: # For all other elements, check if we want them if name in self._ks_elements: for my_element in self._ks_elements[name]: if my_element in attrs: self._keys[my_element] = attrs[my_element]
def map_yaml_to_ver2_format(self, data): """ Maps from new yaml dict format to old v2 format we used with the xml records """ # unmappable: # "file": ["name"] # new format -> old format my_map = [[["hostname"], ["hostname"]], [["resource"], ["resource"]], [["user"], ["user"]], [["hostaddr"], ["hostaddr"]], [["transformation"], ["transformation"]], [["derivation"], ["derivation"]], [["mainjob", "duration"], ["duration"]], [["mainjob", "start"], ["start"]], [["usage", "utime"], ["utime"]], [["usage", "stime"], ["stime"]], [["machine", "ram_total"], ["ram"]], [["machine", "uname_system"], ["system"]], [["machine", "uname_release"], ["release"]], [["machine", "uname_machine"], ["machine"]], [["mainjob", "executable", "file_name"], ["name"]], [["mainjob", "status", "raw"], ["raw"]], [["mainjob", "status", "signalled_signal"], ["signal"]], [["mainjob", "status", "signalled_name"], ["action"]], [["mainjob", "status", "corefile"], ["corefile"]], [["mainjob", "status", "regular_exitcode"], ["exitcode"]], [["cwd"], ["cwd"]], [["files", "stdout", "data"], ["stdout"]], [["files", "stderr", "data"], ["stderr"]]] # stampede_elements = {"invocation": ["hostname", "resource", "user", "hostaddr", "transformation", "derivation"], # "mainjob": ["duration", "start"], # "usage": ["utime", "stime"], # "ram": ["total"], # "uname": ["system", "release", "machine"], # "file": ["name"], # "status": ["raw"], # "signalled": ["signal", "corefile", "action"], #action is the char data in signalled element # "regular": ["exitcode"], # "argument-vector": [], # "cwd": [], # "stdout": [], # "stderr": [], # "statinfo": ["lfn", "size", "ctime", "user" ], # "checksum": ["type", "value", "timing"], # "type": ["type", "value"]} new_data = {} new_data['invocation'] = True new_data["checksum"] = {} new_data["outputs"] = {} for mapping in my_map: self.dicts_remap(data, mapping[0], new_data, mapping[1]) # some mappings are based on lfns if "files" in data: for lfn in data["files"]: file_data = data["files"][lfn] output = file_data["output"] if "output" in file_data.keys( ) else False if not output: continue meta = FileMetadata() meta._id = lfn """ add whatever 4.9 attributes are { "_type": "file", "_id": "f.b2", "_attributes": { "ctime": "2019-02-19T16:42:52-08:00", "checksum.timing": "0.144", "user": "******", "checksum.type": "sha256", "checksum.value": "4a77bee20a28a446506ef7531ffc038053f52e5211d93a95fe5193746af8d23a", "size": "123" } }, """ if "user" in data["files"][lfn]: meta.add_attribute("user", str(file_data["user"])) if "size" in data["files"][lfn]: meta.add_attribute("size", str(file_data["size"])) if "ctime" in data["files"][lfn]: meta.add_attribute("ctime", file_data["ctime"]) if "sha256" in data["files"][lfn]: meta.add_attribute("checksum.type", "sha256") meta.add_attribute("checksum.value", file_data["sha256"]) if "checksum_timing" in data["files"][lfn]: meta.add_attribute("checksum.timing", str(file_data["checksum_timing"])) # what else? new_data["outputs"][lfn] = meta return new_data
def map_yaml_to_ver2_format(self, data): """ Maps from new yaml dict format to old v2 format we used with the xml records """ # unmappable: # "file": ["name"] # new format -> old format my_map = [ [ ["hostname"], ["hostname"] ], [ ["resource"], ["resource"] ], [ ["user"], ["user"] ], [ ["hostaddr"], ["hostaddr"] ], [ ["transformation"], ["transformation"] ], [ ["derivation"], ["derivation"] ], [ ["mainjob", "duration"], ["duration"] ] , [ ["mainjob", "start"], ["start"] ] , [ ["usage", "utime"], ["utime"] ] , [ ["usage", "stime"], ["stime"] ] , [ ["machine", "ram_total"], ["ram"] ] , [ ["machine", "uname_system"], ["system"] ] , [ ["machine", "uname_release"], ["release"] ] , [ ["machine", "uname_machine"], ["machine"] ] , [ ["mainjob", "executable", "file_name"], ["name"] ] , [ ["mainjob", "status", "raw"], ["raw"] ] , [ ["mainjob", "status", "signalled_signal"], ["signal"] ] , [ ["mainjob", "status", "signalled_name"], ["action"] ] , [ ["mainjob", "status", "corefile"], ["corefile"] ] , [ ["mainjob", "status", "regular_exitcode"], ["exitcode"] ] , [ ["cwd"], ["cwd"] ] , [ ["files", "stdout", "data"], ["stdout"] ] , [ ["files", "stderr", "data"], ["stderr"] ] ] # stampede_elements = {"invocation": ["hostname", "resource", "user", "hostaddr", "transformation", "derivation"], # "mainjob": ["duration", "start"], # "usage": ["utime", "stime"], # "ram": ["total"], # "uname": ["system", "release", "machine"], # "file": ["name"], # "status": ["raw"], # "signalled": ["signal", "corefile", "action"], #action is the char data in signalled element # "regular": ["exitcode"], # "argument-vector": [], # "cwd": [], # "stdout": [], # "stderr": [], # "statinfo": ["lfn", "size", "ctime", "user" ], # "checksum": ["type", "value", "timing"], # "type": ["type", "value"]} new_data = {} new_data['invocation'] = True new_data["checksum"] = {} new_data["outputs"] = {} for mapping in my_map: self.dicts_remap(data, mapping[0], new_data, mapping[1]) # some mappings are based on lfns if "files" in data: for lfn in data["files"]: file_data = data["files"][lfn] output = file_data["output"] if "output" in file_data.keys() else False if not output: continue meta = FileMetadata() meta._id = lfn """ add whatever 4.9 attributes are { "_type": "file", "_id": "f.b2", "_attributes": { "ctime": "2019-02-19T16:42:52-08:00", "checksum.timing": "0.144", "user": "******", "checksum.type": "sha256", "checksum.value": "4a77bee20a28a446506ef7531ffc038053f52e5211d93a95fe5193746af8d23a", "size": "123" } }, """ if "user" in data["files"][lfn]: meta.add_attribute("user",str(file_data["user"])) if "size" in data["files"][lfn]: meta.add_attribute("size",str(file_data["size"])) if "ctime" in data["files"][lfn]: meta.add_attribute("ctime", file_data["ctime"]) if "sha256" in data["files"][lfn]: meta.add_attribute("checksum.type", "sha256") meta.add_attribute("checksum.value", file_data["sha256"]) if "checksum_timing" in data["files"][lfn]: meta.add_attribute("checksum_timing", str(file_data["checksum_timing"])) # what else? new_data["outputs"][lfn] = meta return new_data