def validate(self, instance_stream): """This specific data format's validation process.""" schema = open(self.schema_filename, "r") schema_parsed = etree.parse(schema) schema_parsed_xsd = etree.XMLSchema(schema_parsed) # make a copy of the stream, validate against the copy not the real stream # copy_instance_stream = copy.copy(instance_stream) try: instance_parsed = etree.parse(instance_stream) results = schema_parsed_xsd.validate(instance_parsed) if results == True: fileutils.makeBlock("The %s successfully validated." % self.name) return results if results == False: print "The xml did not successfully validate against %s" % self.name try: detailed_results = schema_parsed_xsd.assertValid(instance_parsed) print detailed_results self.issues = detailed_results # Added by FBY on 2012-01-19 return results except etree.DocumentInvalid, error: print "Document Invalid Exception. Here is the detail:" print error self.issues = error # Added by FBY on 2012-01-19 return results if results == None: print "The validator erred and couldn't determine if the xml \ was either valid or invalid." return results
def __init__(self): ################################################################################### # Looping construct # After that is done, we can enter our wait state for the arrival of new files. ################################################################################### # Display banner if in TEST Mode if settings.MODE == 'TEST': warningTxt = 'CAUTION: TEST MODE - This wipes DB Clean' fileutils.makeBlock(warningTxt) warningTxt = "CTRL-C or CTRL-Break to Stop - (waiting before startup, in case you don't want to wipe your existing db)" fileutils.makeBlock(warningTxt) # sleep for 10 seconds fileutils.sleep(1) # test if we are in debug and TEST Mode. If so we clear out the DB every processing run, PROD mode need should never do this. if settings.DEBUG and settings.MODE == 'TEST': # Only reset the DB in Test mode import postgresutils utils = postgresutils.Utils() utils.blank_database() # setup logging if not os.path.exists(settings.LOGS): os.mkdir(settings.LOGS) else: if settings.DEBUG: print "Logs Directory exists:", settings.LOGS # command argument set's log level or Settings.py # ECJ20111117: removed command argument option and now only uses conf/settings.py #if len(sys.argv) > 1: # level = sys.argv[1] debugMessages = Logger(settings.LOGGING_INI, settings.logging_level) if settings.DEBUG: debugMessages.log("Logging System Online", 0) try: if settings.DEBUG: print "Now instantiating FileHandler" FileHandler() print "calling sys.exit" sys.exit except KeyboardInterrupt: print 'Stopping: KeyboardInterrupt at MainProcessor.py' # need to shutdown the logging system prior to program termination. This is to flush buffers, send messages etc. debugMessages.__quit__() sys.exit()
def validate(self, instance_filename): """JFCS data format validation process""" copy_instance_stream = copy.copy(instance_filename) try: print "Determining by service event schema" results = self.schemaTest(copy_instance_stream, self.service_event_schema_filename) if results == True: fileutils.makeBlock("JFCS service event XML data found. Determined by service event schema.") JFCSXMLInputReader.data_type = "service_event" return results print "Determining by client schema" results = self.schemaTest(copy_instance_stream, self.client_schema_filename) if results == True: fileutils.makeBlock("JFCS client XML data found. Determined by client schema.") JFCSXMLInputReader.data_type = "client" return results print "Determining by service event elements." if self.service_event_elements is not None: print self.service_event_elements results = self.elementTest(copy_instance_stream, self.service_event_elements) if results == True: fileutils.makeBlock("JFCS service event XML data found. Determined by service event elements.") JFCSXMLInputReader.data_type = "service_event" return results print "Determining by client elements." if self.client_elements is not None: print self.client_elements results = self.elementTest(copy_instance_stream, self.client_elements) if results == True: fileutils.makeBlock("JFCS client XML data found. Determined by client elements.") JFCSXMLInputReader.data_type = "client" return results print "returning False" return False else: print "All the JFCS Tests Failed, returning False" self.issues = "All the JFCS Tests Failed, returning False" return False except Exception, exception: print "XML Syntax Error in validate. There appears to be malformed XML. ", exception self.issues = "XML Syntax Error in validate. There appears to be malformed XML. %s" % str(exception) return False
def validate(self, instance_stream): # return True ## use this to skip the validation test # return False ## use this to fail validation test """This specific data format's validation process.""" """Import schema for Operation PARS""" schema = open(self.schema_filename, "r") schema_parsed = etree.parse(schema) schema_parsed_xsd = etree.XMLSchema(schema_parsed) ## if schema fails to compile, catch exception here (except Exception, e: print e.error_log) # make a copy of the file stream, validate against the copy not the real stream copy_instance_stream = copy.copy(instance_stream) xml_doc = etree.parse(copy_instance_stream) """ Explicit check for 'ext' namespace since HUD_HMIS_2.8 xml validates against the extended Operation PAR schema """ ext_namespace_check = xml_doc.xpath( "/ext:SourceDatabase", namespaces={ "ext": "http://xsd.alexandriaconsulting.com/cgi-bin/trac.cgi/export/344/trunk/synthesis/xsd/Operation_PAR_Extend_HUD_HMIS_2_8.xsd" }, ) if len(ext_namespace_check) != 1: return False try: instance_parsed = etree.parse(copy_instance_stream) results = schema_parsed_xsd.validate(instance_parsed) if results == True: """ Elements that do not have the maxLength attribute in schema must be checked to ensure string length conforms to database field. Lengths exceeding 32 characters will cause the xml to be deemed invalid. This adds extra weight to this process and should be removed if maxLength is implemented for all elements in the schema. """ """import original HUD HMIS 2.8 xsd that Operation PARS extended""" schema_hudhmis_filename = settings.SCHEMA_DOCS["hud_hmis_2_8_xml"] schema_hudhmis_raw = open(schema_hudhmis_filename, "r") schema_hudhmis_parsed = etree.parse(schema_hudhmis_raw) """get lists of elements with maxLength attribute greater than 32""" elements_string50 = self.find_elements_by_type(schema_parsed, "hmis:string50") elements_string50_ns = [] for e in elements_string50: elem_with_ns = ( "{http://xsd.alexandriaconsulting.com/cgi-bin/trac.cgi/export/344/trunk/synthesis/xsd/Operation_PAR_Extend_HUD_HMIS_2_8.xsd}" + e ) elements_string50_ns.append(elem_with_ns) elements_string50 = self.find_elements_by_type(schema_hudhmis_parsed, "hmis:string50") for e in elements_string50: elem_with_ns = "{http://www.hmis.info/schema/2_8/HUD_HMIS_2_8.xsd}" + e elements_string50_ns.append(elem_with_ns) """combine lists if your looking for multiple types""" elements_maxlength = elements_string50_ns """find elements without specific attribute and check length""" xml_root = xml_doc.getroot() for e in xml_root.iter(): if str(e.tag) in elements_maxlength: if len(e.text) > 32: print "XML Error. Value %s exceeds database field length." % str(e.tag) return False ## remove this when testing and perform manual truncate in PARXMLReader() # return False ## return invalid, use this to only test validation of string lengths and exit fileutils.makeBlock("The Operation PAR XML successfully validated.") return results if results == False: print "The xml did not successfully validate against \ Operation PAR XML." try: detailed_results = schema_parsed_xsd.assertValid(instance_parsed) print detailed_results return results except etree.DocumentInvalid, error: print "Document Invalid Exception. Here is the detail:" print error return results if results == None: print "The validator erred and couldn't determine if the xml \ was either valid or invalid." return results