Пример #1
0
    def schema(self):
        '''Return a schema to validate against, anzlic/gmd'''
        schema = None
        try:
            with open(ANZLIC) as scontent:
                sdoc = LXMLetree().parse(scontent).gettree()
                schema = LXMLetree().XMLSchema(sdoc)
        except XMLSchemaParseError as xse:
            print(
                'WFSMeta XMLSchemaParseError causing Failure - {0}\nThis usually indicates a libxml2<2.7.8 problem'
                .format(xse))
            self.checklog.error('WFSxs - {0}'.format(xse))

        return schema
Пример #2
0
    def parse(self, pno):
        '''Parses useful date fields from LDS layer metadata XML'''
        retry = True
        ecounter = 0
        n1, n2, n3, n4 = 4 * (0, )
        er = None
        url = 'https://data.linz.govt.nz/layer/{0}/metadata/iso/xml'.format(
            pno)
        dd = {}
        while retry:
            retry = False
            content = None
            try:
                #tree = LXMLetree().parse(url)
                with closing(U2.urlopen(url)) as content:
                    tree = LXMLetree().parse(content)

                    n1 = tree.getroot().find(mxp['fd'], namespaces=NSX).text
                    n2 = tree.getroot().find(mxp['pd'], namespaces=NSX).text
                    if n2 and not (re.search(
                            tree.getroot().find(mxp['pdt'], namespaces=NSX),
                            'publication')):
                        raise DateAvailabilityException(
                            'Cannot associate date to datetype Pub')
                    n3 = tree.getroot().find(mxp['rd'], namespaces=NSX).text
                    if n3 and not (re.search(
                            tree.getroot().find(mxp['rdt'], namespaces=NSX),
                            'revision')):
                        raise DateAvailabilityException(
                            'Cannot associate date to datetype Rev')
                    n4 = tree.getroot().find(mxp['ed'], namespaces=NSX).text

            except U2.HTTPError as he:
                if ecounter > MAX_DVL_RETRY:
                    raise
                else:
                    self.checklog.warn('LPS1 - Error parsing metadata dates ' +
                                       pno)
                    ecounter += 1
                    retry = True
            except DateAvailabilityException as dae:
                self.checklog.warn(
                    'LPS2 - Error identifying date {0}. {1}'.format(pno, dae))
            finally:
                if content: content.close()
        return n1, n2, n3, n4, er
Пример #3
0
    def metadata(self, lid):

        b64s = base64.encodestring('{0}:{1}'.format(*self.userpass)).replace(
            '\n', '')

        svr = None
        url = self.mta.format(lid=lid)
        self.checklog.debug('WFS0u-' + kmask(url))

        retry = True
        while retry:
            retry = False
            content = None
            try:
                #cookiesetup(url)
                req = U2.Request(url)
                req.add_header('Authorization', 'Basic {0}'.format(b64s))
                with closing(U2.urlopen(url=req,
                                        timeout=URL_TIMEOUT)) as content:
                    ct = str(content.read())
                    if self.content(
                            lid, ct
                    ):  #the checkcontent here pagesscrapes for valid XML without known errors so the parser wont break
                        continue
                    xdoc = LXMLetree().fromstring(ct)
                    #print xdoc.getroot().find('gmd:MD_Metadata',namespaces=NSX)
                    if self.schema.validate(
                            xdoc.gettree()) and self.metamandatory(xdoc):
                        #if self.validate(content,schema):
                        self.checklog.debug('WFS0v-{0}'.format(lid))
                    else:
                        raise WFSServiceException(
                            'XML Fails validation against gmd.xsd with {0}'.
                            format(self.schema.error_log.last_error.message))
            except U2.HTTPError as he:
                if re.search('429', str(he)):
                    self.checklog.debug(
                        'WFS0k - Swap keys and Retry on 429. {1}'.format(
                            SLEEP_TIME, he))
                    global KEY
                    KEY = apikey(KEY_FILE)
                    if KEYINDEX == 0:
                        self.checklog.debug(
                            'WFS04 - Wait {0}s and Retry on 429. {1}'.format(
                                SLEEP_TIME, he))
                        time.sleep(SLEEP_TIME)
                    retry = True
                    continue
                print('WFSMeta {0} Failure - {1}'.format(lid, he))
                self.checklog.error(
                    'WFSm0h - {0}/{1}\n{2} on server {3}'.format(
                        lid, he, kmask(url), svr))
            except WFSServiceException as we:
                print('WFSMeta {0} XSD Failure - {1}'.format(lid, we))
                self.checklog.error(
                    'WFSm0se - {0}/{1}\n{2} on server {3}'.format(
                        lid, we, kmask(url), svr))
            except XMLSyntaxError as xe:
                print('WFSMeta {0} XMLSyntaxError causing Parse Failure - {1}'.
                      format(lid, xe))
                self.checklog.error(
                    'WFSm0x - {0}/{1}\n{2}\n{3} on server {4}'.format(
                        lid, xe, kmask(url), ct[:100], svr))
            except U2.URLError as ue:
                print('WFS0ue {0} URLError - {1}'.format(lid, ue))
                if isinstance(ue.reason, socket.timeout):
                    self.checklog.error(
                        'WFS0uet - {0}/{1}\n{2} on server {3}. Retry'.format(
                            lid, ue, kmask(url), svr))
                    retry = True
                    continue
                else:
                    self.checklog.error(
                        'WFS0ueo - {0}/{1}\n{2} on server {3}'.format(
                            lid, ue, kmask(url), svr))
            except Exception as ee:
                print('WFS0ee {0} Exception - {1}'.format(lid, ee))
                self.checklog.error(
                    'WFS0ee - {0}/{1}\n{2} on server {3}'.format(
                        lid, ee, kmask(url), svr))
Пример #4
0
 def __init__(self, upd, checklog):
     self.userpass = upd
     self.checklog = checklog
     self.tree = LXMLetree().parse(
         self.rs3.format(id=id[0].text, csw_v=CSW, gmd=NSX['gmd']))
     self.schema = self.schema()