Exemplo n.º 1
0
    def parse_related_documents(self, response):
        xpath = '/html/body/div/table/tr[3]/td/table/tr/td/table/tr'
        hxs = HtmlXPathSelector(response).select(xpath)
        act = Loader(self, response, LegalAct(), hxs, required=('_id', ))
        act.add_xpath('_id', 'td[2]/b/text()')

        if not act.get_output_value('_id'):
            p_id = unicode(self._get_query_attr(response.url, 'p_id'))
            act.replace_value('_id', u'NONUMBER-%s' % p_id)

        relations = defaultdict(list)
        xpath = '/html/body/div/table/tr[3]/td/table/tr/td/align/table/tr'
        for row in HtmlXPathSelector(response).select(xpath):
            docid = get_all(row, 'td[4]/span//text()')
            rel_type = row.select('td[6]/span/text()')
            if rel_type:
                rel_type = rel_type.extract()[0].strip().lower()

            if rel_type in (u'pakeistas dokumentas',
                            u'ankstesnė dokumento redakcija'):
                relations['amends'].append(docid)

            elif rel_type == u'priimtas dokumentas':
                relations['adopts'].append(docid)

            elif rel_type == u'ryšys su taikymą nusakančiu dokumentu':
                relations['defines_applicability'].append(docid)

            elif rel_type == u'ryšys su galiojimą nusakančiu dokumentu':
                relations['defines_validity'].append(docid)

            elif rel_type == u'negalioja de jure':
                relations['defines_as_no_longer_valid'].append(docid)

            elif rel_type == u'kitas projekto variantas':
                relations['new_draft_version'].append(docid)

            elif rel_type == u'kitas projekto variantas':
                relations['new_draft_version'].append(docid)

            elif rel_type == u'ryšys su ratifikavimo dokumentu':
                relations['ratification'].append(docid)

        if relations:
            act.add_value('relations', dict(relations))
            yield act.load_item()
Exemplo n.º 2
0
    def _parse_law_act(self, response, hxs, base=False):
        """
        Extracts basic document information and returns law act loader.

        Parameters:

        base
            Return only base information about document. This is used, when
            filling some information bits to a law act from several law act
            documents.

        """
        lang = hxs.select('tr[1]/td[4]/b/text()').extract()[0].strip().lower()

        if lang not in (u'lietuvių', u'rusų', u'anglų', u'ispanų'):
            self.error(response, 'Unknown language: %s' % lang)

        if lang != u'lietuvių':
            return None

        act = Loader(self, response, LegalAct(), hxs, required=REQUIRED_FIELDS)

        act.add_xpath('_id', 'tr[1]/td[2]/b/text()')

        source = self._get_source(response.url, 'p_id')

        if not act.get_output_value('_id'):
            act.replace_value('_id', u'NONUMBER-%s' % source['id'])

        if base:
            return act

        act.add_xpath('name', 'caption/text()')
        act.add_xpath('kind', 'tr[1]/td[1]/b/text()')
        act.add_xpath('number', 'tr[1]/td[2]/b/text()')
        act.add_xpath('date', 'tr[1]/td[3]/b/text()')

        act.add_value('source', source)

        self._fix_name_case(act)

        return act