示例#1
0
    def parse(self, response):
        cap = CapsuleParse()
        Codes = cap.read_jason('code_exeption.json')
        formRequests = []
        self.formDetail = []
        form_data = {
                    'countries': 'PE',
                    'country': 'all',
                    'countryid': '',
                    'datacat_id': 'AT',
                    'display': '20',
                    'hscode': '0101',
                    'langId': 'en',
                    'language': 'all',
                    'option': '1',
                    'sector': 'all',
                    'showregimes': '',
                    'submit': 'Search',
                    'license': 'Accept',
                    'year1': '',
                    'year2': ''
                    }
        import pdb
        pdb.set_trace()

        for codes in Codes:
            hts = Hts()
            #hs = Hs()
            form_data['hscode'] = codes['codigo']
            hts['code'] = codes['codigo']
            formRequest = FormRequest(method='GET',
                                   url=self.detail_urls[0],
                                   formdata=form_data,
                                   callback=self.hts_list,
                                   meta={'hts':hts}
                                )
            formRequests.append(formRequest)
        return formRequests
示例#2
0
    def hts_tariff(self, response):
        cap = CapsuleParse()
        hts = response.meta['hts']
        parse = HtmlXPathSelector(response)
        dumping = parse.select("//*[@id='col-2']/div[3]/div[1]/p/text()").extract()
        General = parse.select("//*[@id='col-2']/div[3]/div[2]/p[2]").extract()
        Insurance = parse.select("//*[@id='col-2']/div[3]/div[2]/p[4]").extract()

        AdValoren = hts['tariff_all']

        AdValoren = cap.parse_advalorem(AdValoren)
        Dumping = cap.parse_dumping(dumping[0])
        General = cap.parse_general(General[0])
        insurance = cap.parse_insurance(Insurance[0])

        tariff = []
        #tarrif.append(dumping)
        for general in General:
            tariff.append(general)
        tariff.append(insurance)
        tariff.append(AdValoren)
        hts['tariff_all'] = tariff
        return hts
示例#3
0
    def hts_list(self, response):
        try:
            form_requests = []
            cap = CapsuleParse()
            hts = response.meta['hts']
            #print "CODIGO ENVIADO: " + str(hts)
            parser = HtmlXPathSelector(response)
            rows = parser.select('//div[4]/div[1]/div[1]/div[2]/div[1]/table/tr[position()>2]')
            for row in rows:
                code = row.select('.//td[1]/text()').extract()
                if(len(code) < 1):
                    code = row.select('.//td[1]/a/text()').extract()
                description = row.select('.//td[2]/text()').extract()
                if(len(code) > 0 and  len(description) > 0):
                    code = cap.parse_code(code[0])
                    url = row.select('.//td[1]/a/@href').extract()
                    #Para codios hs
                    #if(len(code) > 0 and  len(description) > 0):
                    #    if len(code) <= 6:
                        #    yield Hs(code=cap.parse_code(code[0]), name=cap.parse_description(description[0]))
                    #Verificar si contiene un link y si el codigo es mayor a 5 cifras
                    if len(url) > 0 and len(code) > 5:
                    #if len(url) > 0 and len(code) > 5:
                        #Solo codigos refenrentes al hs del json
                        print "CODIGO A ANALISAR: " + code + " in " + hts['code']
                        #if code == hts['code']:
                        if code.find(hts['code']) >= 0:
                            hts_tariff = Hts_tariff()
                            advalorem = row.select('.//td[3]/img/@src').extract()
                            if len(advalorem) == 0:
                                advalorem = row.select('.//td[2]/img/@src').extract()
                            advalorem = cap.parse_srcadvalorem(advalorem[0])
                            if(len(code) == 6):
                                hts_tariff['code'] = cap.complete_code(code)
                            else:
                                hts_tariff['code'] = cap.complete_code(code)
                            hts_tariff['name'] = cap.parse_description(description[0])
                            hts_tariff['hs'] = cap.parse_code(code)[:6]
                            hts_tariff['tariff_all'] = advalorem
                            print "CODIGO VALIDO: " +hts_tariff['code']                            

                            form_data = {
                                         'datasetid':'MAAT-PE12-05v001',
                                         'hscode': hts_tariff['code'],
                                         'countries':'PE',
                                         'datacat_id':'AT',
                                         'keyword':'',
                                         'submit':'',
                                         'showall':'T',
                                         'pathtoimage':'http://madb.europa.eu/at/images/&showall=T'
                                         }
                            form_request = FormRequest(
                                              method='GET',
                                              url=self.detail_urls[2],
                                              formdata=form_data,
                                              callback=self.hts_tariff,
                                              meta={'hts':hts_tariff}
                                              )
                            form_requests.append(form_request) 
            return form_requests
        except Exception,e:
            import pdb;pdb.set_trace()