Пример #1
0
    def parse_data(self, response):

        details = response.xpath('//div[@class="booking-info zxcv abcd"]')
        for det in details:

            item = AutodataItem()
            item2 = MetaItem()
            item["Last_Code_Update_Date"] = ""
            item["Scrapping_Date"] = ""
            item["Country"] = "KSA"
            item["City"] = ""
            item["Seller_Type"] = "Large Independent Dealers"
            item["Seller_Name"] = "Xcars"
            item["Car_URL"] = ""
            item["Car_Name"] = ""
            item["Year"] = ""
            item["Make"] = "".join(
                det.xpath('h2/span[1]/text()').extract()).strip()
            item["model"] = "".join(
                det.xpath('h2/span[2]/text()').extract()).strip()
            item["Spec"] = "".join(
                det.xpath('h2/span[2]/text()').extract()).strip()
            item["Car_Name"] = item["Make"] + ' ' + item["model"]
            item["Doors"] = ""
            item["transmission"] = ""
            item["trim"] = ""
            item["bodystyle"] = ""
            item["other_specs_gearbox"] = ""
            item["other_specs_seats"] = ""
            item["other_specs_engine_size"] = ""
            item["other_specs_horse_power"] = ""
            item["colour_exterior"] = ""
            item["colour_interior"] = ""
            item["fuel_type"] = ""
            item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
            item["mileage"] = ""
            item["condition"] = ""
            item["warranty_untill_when"] = ""
            item['service_contract_untill_when'] = ''
            item['Price_Currency'] = ''
            item['asking_price_inc_VAT'] = ''
            item['asking_price_ex_VAT'] = ''
            item['warranty'] = ''
            item['service_contract'] = ''
            item['vat'] = 'yes'
            item['mileage_unit'] = ''
            item['engine_unit'] = ''
            item['Last_Code_Update_Date'] = 'Thursday, June 04, 2019'
            item['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
            item['autodata_Make'] = ''
            item['autodata_Make_id'] = ''
            item['autodata_model'] = ''
            item['autodata_model_id'] = ''
            item['autodata_Spec'] = ''
            item['autodata_Spec_id'] = ''
            item['autodata_transmission'] = ''
            item['autodata_transmission_id'] = ''
            item['autodata_bodystyle'] = ''
            item['autodata_bodystyle_id'] = ''

            for li in det.xpath('//div[@class="col-xs-7"]/ul/li'):
                key = ''.join(li.xpath('p[1]/text()').extract()).strip()
                value = ''.join(li.xpath('p[2]/text()').extract()).strip()
                if 'Price' in key:
                    item['Price_Currency'] = value.split(' ')[-1]
                    item['asking_price_inc_VAT'] = value.split(' ')[0]
                elif 'Year' in key:
                    item['Year'] = value
                elif 'Mileage' in key:
                    item['mileage'] = value.split(' ')[0]
                    item['mileage_unit'] = value.split(' ')[-1]
                elif 'Exterior' in key:
                    item["colour_exterior"] = value
                elif 'Interior' in key:
                    item["colour_interior"] = value

            item2['src'] = "xcars.co"
            item2['ts'] = datetime.utcnow().isoformat()
            item2['name'] = "xcars"
            item2['url'] = response.url
            item2['uid'] = str(uuid.uuid4())
            item2['cs'] = hashlib.md5(
                json.dumps(dict(item),
                           sort_keys=True).encode('utf-8')).hexdigest()
            item['meta'] = dict(item2)
            item['Car_URL'] = response.url
            item['Source'] = item2['src']
            yield item
Пример #2
0
    def parse_data(self, response):

        sel = Selector(response)
        item2 = MetaItem()
        item1 = AutodataItem()

        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = 'Syria'
        item1["City"] = ''.join(
            sel.xpath(
                '//div[@class="location-address module"]/section/div/span[@class="address-city"]/text()'
            ).extract()).strip()
        item1["Seller_Type"] = "Large Independent Dealers"
        item1["Seller_Name"] = "Al-Zayani"
        item1["Car_URL"] = response.url
        item1["Year"] = ''.join(
            sel.xpath(
                '//div[@class="overview-data module overview-data-standard"]/div/div/div[@class="cell reg-year"]/span[@class = "value reg-year"]/text()'
            ).extract()).strip()
        item1["Make"] = ''.join(
            sel.xpath(
                '//div[@class="title module align-center"]/h3/span[@class="make"]/text()'
            ).extract()).strip()
        item1["model"] = ''.join(
            sel.xpath(
                '//div[@class="title module align-center"]/h3/span[@class="model"]/text()'
            ).extract()).strip()
        item1["Spec"] = ''.join(
            sel.xpath(
                '//div[@class="title module align-center"]/h3/span[@class="variant"]/text()'
            ).extract()).strip()
        item1["Car_Name"] = item1["Make"] + ' ' + item1["model"] + ' ' + item1[
            "Spec"]
        item1["Doors"] = ""
        item1["transmission"] = ''.join(
            sel.xpath(
                '//div[@class="overview-data module overview-data-standard"]/div/div/div[@class="cell transmission"]/span[@class = "value transmission"]/text()'
            ).extract()).strip()
        item1["trim"] = ""
        item1["bodystyle"] = ''.join(
            sel.xpath(
                '//div[@class="overview-data module overview-data-standard"]/div/div/div[@class="cell bodystyle"]/span[@class = "value bodystyle"]/text()'
            ).extract()).strip()
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = ' '.join(''.join(
            sel.xpath(
                '//div[@class="overview-data module overview-data-standard"]/div/div/div[@class="cell engine-size"]/span[@class = "value engine-size"]/text()'
            ).extract()).strip().split(' ')[:-1])
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = ''.join(
            sel.xpath(
                '//div[@class="overview-data module overview-data-standard"]/div/div/div[@class="cell exterior-colour"]/span[@class = "value exterior-colour"]/text()'
            ).extract()).strip()
        item1["colour_interior"] = ""
        item1["fuel_type"] = ''.join(
            sel.xpath(
                '//div[@class="overview-data module overview-data-standard"]/div/div/div[@class="cell fuel-type"]/span[@class = "value fuel-type"]/text()'
            ).extract()).strip()
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item1["mileage"] = ' '.join(''.join(
            sel.xpath(
                '//div[@class="overview-data module overview-data-standard"]/div/div/div[@class="cell mileage"]/span[@class = "value mileage"]/text()'
            ).extract()).strip().split(' ')[:-1])
        item1["condition"] = ""
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = ''.join(
            sel.xpath('//div[@class="price-now"]/span[@class="value"]/text()').
            extract()).strip().split(' ')[0]
        item1['asking_price_inc_VAT'] = ''.join(
            sel.xpath('//div[@class="price-now"]/span[@class="value"]/text()').
            extract()).strip().split(' ')[1]
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yes'
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''
        item1['mileage_unit'] = ''.join(
            sel.xpath(
                '//div[@class="overview-data module overview-data-standard"]/div/div/div[@class="cell mileage"]/span[@class = "value mileage"]/text()'
            ).extract()).strip().split(' ')[-1]
        item1['engine_unit'] = ''.join(
            sel.xpath(
                '//div[@class="overview-data module overview-data-standard"]/div/div/div[@class="cell engine-size"]/span[@class = "value engine-size"]/text()'
            ).extract()).strip().split(' ')[-1]

        item2['src'] = "al-zayani.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "zayani"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item1),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item1['meta'] = dict(item2)
        item1['Last_Code_Update_Date'] = 'June 13, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')
        item1['Source'] = item2['src']
        yield item1
Пример #3
0
 def parse_data(self, response):
     item=AutodataItem()
     item2 = MetaItem()
     
     item["Last_Code_Update_Date"] = ""
     item["Scrapping_Date"] = ""
     item["Country"] = ""
     item["City"] = ""
     item["Seller_Type"] = ""
     item["Seller_Name"] = ""
     item["Car_URL"] = ""
     item["Car_Name"] = ""
     item["Year"] = ""
     item["Make"] = ""
     item["model"] = ""
     item["Spec"] = ""
     item["Doors"] = ""
     item["transmission"] = ""
     item["trim"] = ""
     item["bodystyle"] = ""
     item["other_specs_gearbox"] = ""
     item["other_specs_seats"] = ""
     item["other_specs_engine_size"] = ""
     item["other_specs_horse_power"] = ""
     item["colour_exterior"] = ""
     item["colour_interior"] = ""
     item["fuel_type"] = ""
     item["import_yes_no_also_referred_to_as_GCC_spec"] = "" 
     item["mileage"] = ""
     item["condition"] = ""
     item["warranty_untill_when"] = ""
     item['service_contract_untill_when'] = ''
     item['Price_Currency'] = ''
     item['asking_price_inc_VAT'] = ''
     item['asking_price_ex_VAT'] = ''
     item['warranty'] = ''
     item['service_contract'] = ''
     item['vat'] = 'yes'
     item['mileage_unit'] = ''
     item['engine_unit'] = ''
     item['autodata_Make'] = ''
     item['autodata_Make_id'] = ''
     item['autodata_model'] = ''
     item['autodata_model_id'] = ''
     item['autodata_Spec'] = ''
     item['autodata_Spec_id'] = ''
     item['autodata_transmission'] = ''
     item['autodata_transmission_id'] = ''
     item['autodata_bodystyle'] = ''
     item['autodata_bodystyle_id'] = ''
     item["Last_Code_Update_Date"] = "Wednesday,June 19,2019"
     item["Scrapping_Date"] = datetime.today().strftime('%A, %B %d, %Y')
     item["Country"] = "UAE"
     item["City"] = "Dubai"
     item["Seller_Type"] = "MarketPlace"
     item["Seller_Name"] = "111 Used Cars"
     item["Car_URL"] = response.url
     
     item2['src'] = "dubicars.com"
     item2['ts'] = datetime.utcnow().isoformat()
     item2['name'] = "dubi_spider"
     item2['url'] = response.url
     item2['uid'] = str(uuid.uuid4())
     item2['cs'] = hashlib.md5(json.dumps(dict(item), sort_keys=True).encode('utf-8')).hexdigest()
     item['meta'] = dict(item2)
     item['Source'] = item2['src']
     
     item['asking_price_inc_VAT'] = response.xpath("//strong[contains(@class,'money')]/text()").extract()[1].split('AED')[-1].strip()
     item['Price_Currency'] = 'AED'
     arr = response.xpath("//tr/td/text()").extract()
     item['Year'] = str(arr[2])
     item['Make'] = str(arr[0])
     item['model'] = arr[1]
     item["Car_Name"] =item['Make']+' '+item['model']
     if item['Car_Name'] != '':
         item['Spec'] = arr[8].strip()
         item['transmission'] = arr[9]
         item['bodystyle'] = arr[5]
         #item['other_specs_horse_power'] =
         item['colour_exterior'] = arr[4]
         item['fuel_type'] = arr[10]
         item['mileage'] = arr[6]
         item['mileage_unit'] = 'km'
         item['colour_interior'] = arr[13]
         item['other_specs_seats'] = arr[11]
         yield item
Пример #4
0
    def parse_data(self, response):

        ##        item1 = BentleyMuscatItem()
        ##        item2 = MetaItem()
        item2 = MetaItem()
        item1 = AutodataItem()
        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = ""
        item1["City"] = ""
        item1["Seller_Type"] = ""
        item1["Seller_Name"] = "Bentley Motors Muscat"
        item1["Car_URL"] = ""
        item1["Car_Name"] = ""
        item1["Year"] = ""
        item1["Make"] = ""
        item1["model"] = ""
        item1["Spec"] = ""
        item1["Doors"] = ""
        item1["transmission"] = ""
        item1["trim"] = ""
        item1["bodystyle"] = ""
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = ""
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = ""
        item1["colour_interior"] = ""
        item1["fuel_type"] = ""
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item1["mileage"] = ""
        item1["condition"] = ""
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = ''
        item1['asking_price_inc_VAT'] = ''
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yes'
        item1['engine_unit'] = ''
        item1['mileage_unit'] = ''
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''
        item1['wheel_size'] = ''
        item1['top_speed_kph'] = ''
        item1['cylinders'] = ''
        item1['acceleration'] = ''
        item1['torque_Nm'] = ''

        sel = Selector(response)

        item1["Car_URL"] = response.url
        item1["Car_Name"] = ' '.join(''.join(
            sel.xpath('//div[@class="vehicle-title column block"]/h1//text()').
            extract()).strip().split(' ')[2:])

        item1["Price_Currency"] = ''.join(
            sel.xpath('//div[@class="vehicle-prive column block"]/div//text()'
                      ).extract()).strip()[:3]
        item1["asking_price_inc_VAT"] = ''.join(
            sel.xpath('//div[@class="vehicle-prive column block"]/div//text()'
                      ).extract()).strip()[4:]
        lis = sel.xpath(
            '//ul[@class="unstyle tiles-container-10 s-space-5 vertical-collapse"]/li'
        )
        for l in lis:
            key = ''.join(
                l.xpath('div/div[@class="column s50 m50 l50 bold"]//text()').
                extract()).strip()
            item = ''.join(
                l.xpath(
                    'div/div[@class="column s50 m50 l50 vertical-top"]//text()'
                ).extract()).strip()
            if key.lower() == "body style":
                item1["bodystyle"] = item
            elif key.lower() == "paint colour":
                item1["colour_exterior"] = item
            elif key.lower() == "registration date":
                item1["Year"] = item.split('.')[-1]
            elif key.lower() == "mileage":
                item1["mileage"] = item[:-2]
                item1['mileage_unit'] = item[-2:]
            elif key.lower() == "transmission":
                item1["transmission"] = item
            elif key.lower() == "engine":
                item1["other_specs_engine_size"] = item.split(' ')[0]
                item1['engine_unit'] = item.split(' ')[1]
            elif key.lower() == "torque":
                item1['torque_Nm'] = item.split(' ')[0]
            elif "acceleration" in key.lower():
                item1['acceleration'] = item.split(' ')[0]
            elif "maximum speed" in key.lower():
                item1['top_speed_kph'] = item.split(' ')[0]
            elif key.lower() == "power":
                item1["other_specs_horse_power"] = int(
                    item.replace(u'\xa0', u' ').replace(
                        ',', '').split(' ')[0]) * 1.34102

        item1["Make"] = 'Bentley'
        item1["model"] = ' '.join(item1["Car_Name"].split(' ')[1:-1])
        item1['Spec'] = item1["Car_Name"].split(' ')[-1]

        item1["Country"] = "Oman"
        item1["City"] = "Muscat"

        item1['Last_Code_Update_Date'] = 'Thursday, June 04, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item2['src'] = "muscat.bentleymotors.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "bentley"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item1),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item1['meta'] = dict(item2)
        item1['Source'] = item2['src']

        yield item1
Пример #5
0
    def parse_data(self, response):
        item2 = MetaItem()
        item1 = AutodataItem()

        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = "Oman"
        item1["City"] = ""
        item1["Seller_Type"] = "Market Places"
        item1["Seller_Name"] = "Opensooq"
        item1["Car_URL"] = response.url
        item1["Car_Name"] = ''
        item1["Year"] = ""
        item1["Make"] = ""
        item1["model"] = ''
        item1["Spec"] = ""
        item1["Doors"] = ""
        item1["transmission"] = ""
        item1["trim"] = ""
        item1["bodystyle"] = ""
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = ""
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = ""
        item1["colour_interior"] = ""
        item1["fuel_type"] = ""
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = "" 
        item1["mileage"] = ""
        item1["condition"] = ""
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = ''
        item1['asking_price_inc_VAT'] = ''
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yes'
        item1['mileage_unit'] = ''
        item1['engine_unit'] = ''
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''

        details = response.xpath('//div[@class="customP"]/ul/li')
        for det in details:
            key = ''.join(det.xpath('span/text()').extract()).strip()
            value = ''.join(det.xpath('a/text()').extract()).replace('\"','').strip()
            if 'city' in key.lower():
                item1["City"] = value
            elif 'make' in key.lower():
                item1["Make"] = value
            elif 'model' in key.lower():
                item1["model"] = value
            elif 'year' in key.lower():
                item1["Year"] = value
            elif 'condition' in key.lower():
                item1["condition"] = value
            elif 'kilometers' in key.lower():
                item1["mileage"] = value.split(' ')[0].replace('+','')
            elif 'transmission' in key.lower():
                item1["transmission"] = value
            elif 'fuel' in key.lower():
                item1["fuel_type"] = value
            elif 'color' in key.lower():
                item1["colour_exterior"] = value
            elif 'price' in key.lower():
                item1["Price_Currency"] = value
                item1['asking_price_inc_VAT'] = ''.join(det.xpath('a/strong/text()').extract()).strip()

        item1["Car_Name"] = item1["Make"] + ' ' + item1["model"]
        item2['src'] = "om.opensooq.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "opensooq_om"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(json.dumps(dict(item1), sort_keys=True).encode('utf-8')).hexdigest()
        item1['meta'] = dict(item2)
        item1['Last_Code_Update_Date'] = 'Tuesday, June 18, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item1['Source'] = item2['src']
        if item1['Car_Name'] != '':
            yield item1
Пример #6
0
    def parse_data(self, response):
        item = AutodataItem()
        item2 = MetaItem()

        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = "Qatar"
        item["City"] = ""
        item["Seller_Type"] = "Market Places"
        item["Seller_Name"] = "Friday cars"
        item["Car_URL"] = response.url
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = 'QAR'
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['Last_Code_Update_Date'] = 'June 22, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''

        labels = response.xpath('//td[@class="viewad-label"]')
        values = response.xpath('//span[@class="viewad-data"]')

        for lab in range(len(labels)):
            label = ''.join(labels[lab].xpath('text()').extract()).strip()
            value = ''.join(values[lab].xpath('text()').extract()).strip()
            if 'Brand' in label:
                item['Make'] = value
            elif 'Model' in label:
                item['model'] = value

            elif 'Year' in label:
                item['Year'] = value

            elif 'Location' in label:
                item['City'] = value

            elif "Price" in label:

                item["asking_price_inc_VAT"] = value.split(' ')[0]

        print(item['Car_Name'])
        item["Car_Name"] = remove_non_ascii(''.join(
            response.xpath(
                '//div[@class = "panel-body alone pad0 viewad-topinfo"]/h1/text()'
            ).extract()).strip())
        print(item['Car_Name'])
        item2['src'] = "http://qa.fridaymarket.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "fridaymarket"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Car_URL'] = response.url
        item['Source'] = item2['src']

        if item['Car_Name'] != '':
            yield item

        pass
Пример #7
0
    def parse_dir_contents(self,response):
        item = AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = ""
        item["City"] = ""
        item["Seller_Type"] = "Large Independent Dealers"
        item["Seller_Name"] = "Dhofar Automotive"
        item["Car_URL"] = response.url
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = "" 
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''

        item['Last_Code_Update_Date'] = 'June 6, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')

        item2['src'] = "dhofarautomotive.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "dhofar"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(json.dumps(dict(item), sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Source'] = item2['src']

        item['asking_price_inc_VAT'] = response.xpath("//ul[contains(@class, 'omr1')]/li/span/text()").extract()[0]
        item['Price_Currency'] = 'OMR'
        item['Make'] = response.xpath("//ul[contains(@class, 'fist_sec')]/li/text()").extract()[0]
        item['model'] = response.xpath("//ul[contains(@class, 'fist_for')]/li/text()").extract()[0]
        item['Car_Name'] = item['Make'] + ' ' + item['model']
        item['Year'] = response.xpath("//ul[contains(@class, 'fist_sec')]/li/text()").extract()[1]
        item['colour_exterior'] = response.xpath("//ul[contains(@class, 'fist_sec')]/li/text()").extract()[2]
        item['transmission'] = response.xpath("//ul[contains(@class, 'fist_sec')]/li/text()").extract()[3]      
        item['mileage'] = response.xpath("//ul[contains(@class, 'fist_for')]/li/text()").extract()[1].strip()
        item['mileage_unit'] = 'km'
        item['Doors'] = response.xpath("//ul[contains(@class, 'fist_for')]/li/text()").extract()[2]
        item['bodystyle'] = response.xpath("//ul[contains(@class, 'fist_for')]/li/text()").extract()[3]
        
        yield item
Пример #8
0
    def parse(self,response):
        body = json.loads(response.body)
        data = body['results']
        print(len(data))

        for res in data:            
            item2 = MetaItem()
            item1 = AutodataItem()
            item1["Last_Code_Update_Date"] = ""
            item1["Scrapping_Date"] = ""
            item1["trim"] = ''
            item1["Country"] = "Saudi Arabia"
            item1["City"] = res['vehicle']['location']['city']
            item1["Seller_Type"] = "Official Dealers"
            item1["Seller_Name"] = "Mohamed Yousuf Naghi - BMW"
            item1["Car_URL"] = ""
            item1["bodystyle"] = res['vehicle']['bodyType']['pl_PL']
            item1["Car_Name"] = ""
            item1["Year"] = str(res['vehicle']['constructionYear'])
            item1["Make"] = res['vehicle']['make']
            item1["Doors"] = ""
            item1["transmission"] = res['vehicle']['transmission']['key'].split('.')[1]
            if 'seatFabric' in res['vehicle']:
                item1["trim"] = res['vehicle']['seatFabric']['key'].split('.')[1]
            item1["model"] = res['vehicle']['model'].replace(item1['bodystyle'],'')
            item1["Spec"] = res['vehicle']['vehicleVersion'].split('-')[0].replace(item1['bodystyle'],'').strip()
            item1["other_specs_gearbox"] = ""
            item1["other_specs_seats"] = ""
            item1["other_specs_engine_size"] = ""
            item1["other_specs_horse_power"] = res['vehicle']['power_hp']
            item1["colour_exterior"] = res['vehicle']['bodyColor']['key'].split('.')[1]
            item1["colour_interior"] = res['vehicle']['interiorColor']['key'].split('.')[1]
            item1["fuel_type"] = res['vehicle']['fuel']['key'].split('.')[1]
            item1["import_yes_no_also_referred_to_as_GCC_spec"] = "" 
            item1["mileage"] = res['vehicle']['mileage_km']
            item1["condition"] = ""
            item1["warranty_untill_when"] = ""
            item1['service_contract_untill_when'] = ''
            item1['Price_Currency'] = res["retailpricing"]["applicableCurrency"]
            item1['asking_price_inc_VAT'] = res['priceInclusiveVAT']
            item1['asking_price_ex_VAT'] = res['priceExclusiveVAT']
            item1['warranty'] = ''
            item1['service_contract'] = ''
            item1['vat'] = 'yes'
            item1['mileage_unit'] = 'km'
            item1['engine_unit'] = ''
            item1['autodata_Make'] = ''
            item1['autodata_Make_id'] = ''
            item1['autodata_model'] = ''
            item1['autodata_model_id'] = ''
            item1['autodata_Spec'] = ''
            item1['autodata_Spec_id'] = ''
            item1['autodata_transmission'] = ''
            item1['autodata_transmission_id'] = ''
            item1['autodata_bodystyle'] = ''
            item1['autodata_bodystyle_id'] = ''
            item1['wheel_size'] = ''
            item1['top_speed_kph'] = ''
            item1['cylinders'] = ''
            if 'numberOfCylinders' in res['vehicle']:
                item1['cylinders'] = res['vehicle']['numberOfCylinders']
            item1['acceleration'] = ''
            item1['torque_Nm'] = ''
            item1['Last_Code_Update_Date'] = 'Wednesday, July 03, 2019'
            item1['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
            item1["Car_URL"] = response.url
            item1["Car_Name"] = item1["Make"] + ' ' + item1["model"] + item1["Spec"]

            item2['src'] = "bmw-saudiarabia.com"
            item2['ts'] = datetime.utcnow().isoformat()
            item2['name'] = "bmw_sa"
            item2['url'] = response.url
            item2['uid'] = str(uuid.uuid4())
            item2['cs'] = hashlib.md5(json.dumps(dict(item1), sort_keys=True).encode('utf-8')).hexdigest()
            item1['meta'] = dict(item2)
            item1['Source'] = item2['src']

            yield item1
        
        count = int(response.url.split('start=')[1]) + 20
        if len(data) != 0:
            url = "https://retailcomponent.salescre8.com/v1/services/retail-component/publications/search?channelId=www_bmwsa&childQuery=&locale=en_GB&parentQuery=&profile=bmw&rows=20&sort=&sortOrder=&start="+ str(count)
            yield Request(url,callback=self.parse,meta={"url":url,"body":body})
Пример #9
0
    def parse_dir_contents(self, response):
        item = AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = ''.join(
            response.xpath('//span[@class = "address-country"]/text()').
            extract()).strip()
        item["City"] = ''.join(
            response.xpath(
                '//span[@class = "address-city"]/text()').extract()).strip()
        item["Seller_Type"] = ""
        item["Seller_Name"] = ""
        item["Car_URL"] = ""
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['Last_Code_Update_Date'] = 'June 6, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''

        item2['src'] = "astonmartin.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "astonmartin_kw"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Car_URL'] = response.url
        item['Source'] = item2['src']

        item['asking_price_inc_VAT'] = ''.join(
            response.xpath(
                "//div[contains(@class,'price-now')]/span[contains(@class,'value')]/text()"
            ).extract()).strip().split(' ')[1]
        item['Price_Currency'] = ''.join(
            response.xpath(
                "//div[contains(@class,'price-now')]/span[contains(@class,'value')]/text()"
            ).extract()).strip().split(' ')[0]
        item['Year'] = response.xpath(
            "//div[contains(@class, 'title module align-center')]/h3/span[contains(@class, 'year')]/text()"
        ).extract()[0].strip()
        item['Make'] = response.xpath(
            "//div[contains(@class, 'title module align-center')]/h3/span[contains(@class, 'make')]/text()"
        ).extract()[0].strip()
        item['model'] = response.xpath(
            "//div[contains(@class, 'title module align-center')]/h3/span[contains(@class, 'model')]/text()"
        ).extract()[0].strip()
        item['transmission'] = response.xpath(
            "//div[contains(@class, 'cell transmission')]/span[contains(@class, 'value transmission')]/text()"
        ).extract()[0].strip()

        item['mileage'] = ((response.xpath(
            "//div[contains(@class, 'cell mileage')]/span[contains(@class, 'value mileage')]/text()"
        ).extract()[0]).split('km')[0]).strip()
        item['mileage_unit'] = 'km'
        item['other_specs_engine_size'] = ((response.xpath(
            "//div[contains(@class, 'span6')]/div[contains(@class,'custom-html module align-center tech-spec')]/table/tr/td[contains(@class,'value')]/text()"
        ).extract()[0]).split('Litre')[0]).strip()
        item['engine_unit'] = 'Litre'
        item['colour_exterior'] = response.xpath(
            "//div[contains(@class, 'cell exterior-colour')]/span[contains(@class, 'value exterior-colour')]/text()"
        ).extract()[0].strip()
        item['other_specs_horse_power'] = ((response.xpath(
            "//div[contains(@class, 'span6')]/div[contains(@class,'custom-html module align-center tech-spec')]/table/tr/td[contains(@class,'value')]/text()"
        ).extract()[3]).split("BHP")[0]).strip()

        item['colour_interior'] = response.xpath(
            "//div[contains(@class, 'cell interior-colour')]/span[contains(@class, 'value interior-colour')]/text()"
        ).extract()[0].strip()
        item['bodystyle'] = (response.xpath(
            "//div[contains(@class, 'title module align-center')]/h3/span[contains(@class, 'variant')]/text()"
        ).extract()[0]).split()[-1]
        item['Car_Name'] = item['Make'] + ' ' + item['model'] + ' ' + item[
            'bodystyle']

        yield item
Пример #10
0
    def parse(self, response):
        sel = Selector(response)
        data =  json.loads(response.body)
        body = "".join(map(chr, response.body))
        i = 0
        count = 0
        while (count<len(data)):
            d = str(i)
            if d in data:
                item2 = MetaItem()
                item1 = AutodataItem()

                item1["Last_Code_Update_Date"] = ""
                item1["Scrapping_Date"] = ""
                item1["Country"] = ""
                item1["City"] = ""
                item1["Seller_Type"] = ""
                item1["Seller_Name"] = ""
                item1["Car_URL"] = ""
                item1["Car_Name"] = ""
                item1["Year"] = ""
                item1["Make"] = ""
                item1["model"] = ""
                item1["Spec"] = ""
                item1["Doors"] = ""
                item1["transmission"] = ""
                item1["trim"] = ""
                item1["bodystyle"] = ""
                item1["other_specs_gearbox"] = ""
                item1["other_specs_seats"] = ""
                item1["other_specs_engine_size"] = ""
                item1["other_specs_horse_power"] = ""
                item1["colour_exterior"] = ""
                item1["colour_interior"] = ""
                item1["fuel_type"] = ""
                item1["import_yes_no_also_referred_to_as_GCC_spec"] = "" 
                item1["mileage"] = ""
                item1["condition"] = ""
                item1["warranty_untill_when"] = ""
                item1['service_contract_untill_when'] = ''
                item1['Price_Currency'] = ''
                item1['asking_price_inc_VAT'] = ''
                item1['asking_price_ex_VAT'] = ''
                item1['warranty'] = ''
                item1['service_contract'] = ''
                item1['vat'] = ''
                item1["mileage_unit"] = 'km'
                item1["engine_unit"] = ''
                item1['autodata_Make'] = ''
                item1['autodata_Make_id'] = ''
                item1['autodata_model'] = ''
                item1['autodata_model_id'] = ''
                item1['autodata_Spec'] = ''
                item1['autodata_Spec_id'] = ''
                item1['autodata_transmission'] = ''
                item1['autodata_transmission_id'] = ''
                item1['autodata_bodystyle'] = ''
                item1['autodata_bodystyle_id'] = ''

                print("Yes")
                item1["Seller_Type"] = data[d]["SALE_TYPE"]
                item1["Seller_Name"] = data[d]["SELLER_NAME"]
                item1["Car_Name"] = data[d]["DESCRIPTION"]
                item1["Year"] = str(data[d]["MODEL_YEAR"])
                item1["Make"] = data[d]["MAKE_DESC"]
                item1["model"] = data[d]["MODEL_DESC"]
                item1["bodystyle"] = data[d]["BODY_TYPE_DESC"]
                item1["other_specs_engine_size"] = data[d]["ENGINE_SIZE"].split(' ')[0]
                item1["engine_unit"] = data[d]["ENGINE_SIZE"].split(' ')[1]
                item1["colour_exterior"] = data[d]["COLOR"]
                item1["fuel_type"] = data[d]["FUEL_TYPE_DESC"]
                item1["mileage"] = data[d]["MILEAGE"]
                item1["Spec"] = data[d]["VARIANT"]
                item1["trim"] = data[d]["TRIM_TYPE"]
                item1["colour_interior"] = data[d]["INTERIOR_COLOR"]
                item1["City"] = "Dubai"
                item1["asking_price_inc_VAT"] = data[d]["PRICE"]
                item1["Price_Currency"] = "AED"
                item1["Country"] = "Saudi Arabia"
                item1["vat"] = "yes"
                item1["Car_URL"] = "https://en.nissan-dubai.com/certified-preowned-cars/buy-a-car.html"
                if data[d]["TRNS_TYPE_DESC"] == "A/T":
                    item1["transmission"] = "Automatic"
                else:
                    item1["transmission"] = data[d]["TRNS_TYPE_DESC"]
                item1["warranty_untill_when"] = data[d]["WAR_EXP_DATE"]
                if item1["warranty_untill_when"] != "":
                    item1["warranty"] = "yes"
                item2['src'] = "en.nissan-dubai.com"
                item2['ts'] = datetime.utcnow().isoformat()
                item2['name'] = "nissan"
                item2['url'] = response.url
                item2['uid'] = str(uuid.uuid4())
                item2['cs'] = hashlib.md5(json.dumps(dict(item1), sort_keys=True).encode('utf-8')).hexdigest()
                item1['meta'] = dict(item2)
                item1['Source'] = item2['src']
                item1['Last_Code_Update_Date'] = 'June 7, 2019'
                item1['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')
                count = count+1
                yield item1
            i = i+1
        print(count, i)
        pass
Пример #11
0
    def parse_data(self, response):
        item2 = MetaItem()
        item1 = AutodataItem()
        details = response.xpath('//table[@class="table"]')

        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = "Oman"
        item1["City"] = "".join(
            response.xpath(
                '//tr[@class="listing_category_location"]/td[2]/text()').
            extract()).strip().split(',')[0]
        item1["Seller_Type"] = "Large Independent Dealers"
        item1["Seller_Name"] = "Alfarooq Automotive"
        item1["Car_URL"] = response.url
        item1["Car_Name"] = ''.join(
            response.xpath(
                '//div[@class="col-lg-9 col-md-9 col-sm-9 col-xs-12 xs-padding-none"]/h2/text()'
            ).extract()).strip()
        item1["Year"] = "".join(
            response.xpath('//tr[@class="listing_category_year"]/td[2]/text()'
                           ).extract()).strip()
        item1["Make"] = "".join(
            response.xpath('//tr[@class="listing_category_make"]/td[2]/text()'
                           ).extract()).strip()
        item1["model"] = ''.join(
            response.xpath('//tr[@class="listing_category_model"]/td[2]/text()'
                           ).extract()).strip()
        item1["Spec"] = ""
        item1["Doors"] = ""
        item1["transmission"] = "".join(
            response.xpath(
                '//tr[@class="listing_category_transmission"]/td[2]/text()').
            extract()).strip().split(' ')[-1]
        item1["trim"] = ""
        item1["bodystyle"] = "".join(
            response.xpath(
                '//tr[@class="listing_category_body-style"]/td[2]/text()').
            extract()).strip()
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = "".join(
            response.xpath(
                '//tr[@class="listing_category_engine"]/td[2]/text()').extract(
                )).strip().split('L')[0]
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = "".join(
            response.xpath(
                '//tr[@class="listing_category_exterior-color"]/td[2]/text()').
            extract()).strip()
        item1["colour_interior"] = "".join(
            response.xpath(
                '//tr[@class="listing_category_interior-color"]/td[2]/text()').
            extract()).strip()
        item1["fuel_type"] = ""
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item1["mileage"] = "".join(
            response.xpath(
                '//tr[@class="listing_category_mileage"]/td[2]/text()').
            extract()).strip()
        item1["condition"] = "".join(
            response.xpath(
                '//tr[@class="listing_category_condition"]/td[2]/text()').
            extract()).strip()
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = 'OMR'
        item1['asking_price_inc_VAT'] = ''.join(
            response.xpath(
                '//div[@class="col-lg-3 col-md-3 col-sm-3 text-right xs-padding-none"]/h2/text()'
            ).extract()).strip()
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yes'
        item1['mileage_unit'] = 'km'
        item1['engine_unit'] = 'L'
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''

        item2['src'] = "alfarooqautomotive.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "alfarooq"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item1),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item1['meta'] = dict(item2)
        item1['Last_Code_Update_Date'] = 'Tuesday, June 18, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item1['Source'] = item2['src']
        yield item1
Пример #12
0
    def parse_data(self, response):
        data1 = str(response.body)
        data2 = data1[data1.index('{'):-3]
        json_acceptable_string = data2.replace("'", "\"")
        d = json.loads(json_acceptable_string)
        rows = d['result']['rows']
        for row in rows:
            item2 = MetaItem()
            item1 = AutodataItem()

            item1["Last_Code_Update_Date"] = ""
            item1["Scrapping_Date"] = ""
            item1["Country"] = ""
            item1["City"] = ""
            item1["Seller_Type"] = ""
            item1["Seller_Name"] = ""
            item1["Car_URL"] = ""
            item1["Car_Name"] = ""
            item1["Year"] = ""
            item1["Make"] = ""
            item1["model"] = ""
            item1["Spec"] = ""
            item1["Doors"] = ""
            item1["transmission"] = ""
            item1["trim"] = ""
            item1["bodystyle"] = ""
            item1["other_specs_gearbox"] = ""
            item1["other_specs_seats"] = ""
            item1["other_specs_engine_size"] = ""
            item1["other_specs_horse_power"] = ""
            item1["colour_exterior"] = ""
            item1["colour_interior"] = ""
            item1["fuel_type"] = ""
            item1["import_yes_no_also_referred_to_as_GCC_spec"] = ""
            item1["mileage"] = ""
            item1["condition"] = ""
            item1["warranty_untill_when"] = ""
            item1['service_contract_untill_when'] = ''
            item1['Price_Currency'] = ''
            item1['asking_price_inc_VAT'] = ''
            item1['asking_price_ex_VAT'] = ''
            item1['warranty'] = ''
            item1['service_contract'] = ''
            item1['vat'] = ''
            item1['engine_unit'] = ''
            item1['autodata_Make'] = ''
            item1['autodata_Make_id'] = ''
            item1['autodata_model'] = ''
            item1['autodata_model_id'] = ''
            item1['autodata_Spec'] = ''
            item1['autodata_Spec_id'] = ''
            item1['autodata_transmission'] = ''
            item1['autodata_transmission_id'] = ''
            item1['autodata_bodystyle'] = ''
            item1['autodata_bodystyle_id'] = ''

            item1['model'] = row['modelName']
            item1['Year'] = str(row['modelYear'])
            item1['mileage'] = row['mileage'].split(' ')[0]
            item1['mileage_unit'] = row['mileage'].split(' ')[1]
            item1['other_specs_horse_power'] = int(
                row['maxPowerKw'].split(' ')[0]) * 1.34102
            item1['Seller_Name'] = row['dealer']['name']
            item1['Country'] = row['dealer']['country']
            item1['City'] = row['dealer']['city']
            item1['asking_price_inc_VAT'] = row['price']
            item1['Price_Currency'] = row['formattedPrice'].split(' ')[0]
            item1['bodystyle'] = row['bodyStyle']
            item1["colour_exterior"] = row['exterior']
            item1["colour_interior"] = row['interior']
            item1['Last_Code_Update_Date'] = 'June 6, 2019'
            item1['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')
            item1['Make'] = 'Maserati'
            item1['vat'] = 'yes'
            item1["Car_URL"] = response.url
            item1["Car_Name"] = "Maserati" + ' ' + item1['model']
            item2['src'] = "maserati.com"
            item2['ts'] = datetime.utcnow().isoformat()
            item2['name'] = "maserati"
            item2['url'] = response.url
            item2['uid'] = str(uuid.uuid4())
            item2['cs'] = hashlib.md5(
                json.dumps(dict(item1),
                           sort_keys=True).encode('utf-8')).hexdigest()
            item1['meta'] = dict(item2)
            item1['Source'] = item2['src']
            yield item1

        pass
Пример #13
0
    def parse_data(self, response):

        item2 = MetaItem()
        item1 = AutodataItem()

        sel = Selector(response)
        details3 = sel.xpath('//div[@class="dealership"]')
        city = (''.join(
            details3.xpath('div[@class="address"]/p/text()').extract()).strip(
            )).split(',')
        item1['Country'] = city[len(city) - 1][1:]
        if (item1['Country'] == 'QA'):

            item1["Last_Code_Update_Date"] = ""
            item1["Scrapping_Date"] = ""
            item1["Country"] = ""
            item1["City"] = ""
            item1["Seller_Type"] = ""
            item1["Seller_Name"] = ""
            item1["Car_URL"] = ""
            item1["Car_Name"] = ""
            item1["Year"] = ""
            item1["Make"] = ""
            item1["model"] = ""
            item1["Spec"] = ""
            item1["Doors"] = ""
            item1["transmission"] = ""
            item1["trim"] = ""
            item1["bodystyle"] = ""
            item1["other_specs_gearbox"] = ""
            item1["other_specs_seats"] = ""
            item1["other_specs_engine_size"] = ""
            item1["other_specs_horse_power"] = ""
            item1["colour_exterior"] = ""
            item1["colour_interior"] = ""
            item1["fuel_type"] = ""
            item1["import_yes_no_also_referred_to_as_GCC_spec"] = ""
            item1["mileage"] = ""
            item1["condition"] = ""
            item1["warranty_untill_when"] = ""
            item1['service_contract_untill_when'] = ''
            item1['Price_Currency'] = ''
            item1['asking_price_inc_VAT'] = ''
            item1['asking_price_ex_VAT'] = ''
            item1['warranty'] = ''
            item1['service_contract'] = ''
            item1['vat'] = 'yes'
            item1['autodata_Make'] = ''
            item1['autodata_Make_id'] = ''
            item1['autodata_model'] = ''
            item1['autodata_model_id'] = ''
            item1['autodata_Spec'] = ''
            item1['autodata_Spec_id'] = ''
            item1['autodata_transmission'] = ''
            item1['autodata_transmission_id'] = ''
            item1['autodata_bodystyle'] = ''
            item1['autodata_bodystyle_id'] = ''

            item1['Car_URL'] = response.url
            item1['Make'] = "Audi"

            details1 = sel.xpath('//div[@id="vehicle_cta"]/div[1]')
            item1['Car_Name'] = ''.join(
                details1.xpath(
                    'div[@class="title"]/text()').extract()).strip()
            item1['model'] = item1['Car_Name'].split(' ')[0]
            item1['Price_Currency'] = (''.join(
                details1.xpath(
                    'div[@class="price"]/div[@class="cashprice"]/text()').
                extract()).strip())[:3]
            item1['asking_price_inc_VAT'] = (''.join(
                details1.xpath(
                    'div[@class="price"]/div[@class="cashprice"]/text()').
                extract()).strip())[4:]

            details2 = sel.xpath('//div[@class="specs"]/div[@class="item"]')
            for det in details2:
                spec_item = ''.join(
                    det.xpath(
                        'div[@class="spec_item"]/text()').extract()).strip()
                spec_data = ''.join(
                    det.xpath(
                        'div[@class="spec_data"]/text()').extract()).strip()
                if 'year' in spec_item.lower():
                    item1['Year'] = spec_data
                elif 'colour' in spec_item.lower():
                    item1['colour_exterior'] = spec_data
                elif 'transmission' in spec_item.lower():
                    item1['transmission'] = spec_data
                elif 'engine size' in spec_item.lower():
                    item1['other_specs_engine_size'] = spec_data[:-2]
                    item1['engine_unit'] = spec_data[-2:]
                elif 'mileage' in spec_item.lower():
                    item1['mileage'] = spec_data[:-2]
                    item1['mileage_unit'] = spec_data[-2:]
                elif 'fuel type' in spec_item.lower():
                    item1['fuel_type'] = spec_data
                elif 'bhp' in spec_item.lower():
                    item1['other_specs_horse_power'] = spec_data[:3]

            details3 = sel.xpath('//div[@class="dealership"]')
            item1['Seller_Name'] = ''.join(
                details3.xpath(
                    'div[@class="wrapper"]/h2/text()').extract()).strip()
            city = (''.join(
                details3.xpath('div[@class="address"]/p/text()').extract()).
                    strip()).split(',')
            item1['City'] = city[len(city) - 2]
            item1['Country'] = city[len(city) - 1][1:]

            item2['src'] = "audiapproved.com"
            item2['ts'] = datetime.utcnow().isoformat()
            item2['name'] = "audi"
            item2['url'] = response.url
            item2['uid'] = str(uuid.uuid4())
            item2['cs'] = hashlib.md5(
                json.dumps(dict(item1),
                           sort_keys=True).encode('utf-8')).hexdigest()
            item1['meta'] = dict(item2)
            item1['Last_Code_Update_Date'] = 'June 6, 2019'
            item1['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')
            item1['Source'] = item2['src']
            yield item1
Пример #14
0
    def parse_dir_contents(self, response):
        item2 = MetaItem()
        item1 = AutodataItem()

        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = ""
        item1["City"] = ""
        item1["Seller_Type"] = ""
        item1["Seller_Name"] = ""
        item1["Car_URL"] = ""
        item1["Car_Name"] = ""
        item1["Year"] = ""
        item1["Make"] = ""
        item1["model"] = ""
        item1["Spec"] = ""
        item1["Doors"] = ""
        item1["transmission"] = ""
        item1["trim"] = ""
        item1["bodystyle"] = ""
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = ""
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = ""
        item1["colour_interior"] = ""
        item1["fuel_type"] = ""
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = "" 
        item1["mileage"] = ""
        item1["condition"] = ""
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = ''
        item1['asking_price_inc_VAT'] = ''
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yes'
        item1['mileage_unit'] = ''
        item1['engine_unit'] = ''
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''
        item1['Last_Code_Update_Date'] = 'June 17, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')

        car_name = response.xpath("//div[contains(@class,'col_1_01')]/h1/text()").extract()[0]
        if car_name!= "Ramadan Deals":
            item1['Last_Code_Update_Date'] = "17 June 2019"
            item1['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')
            item1['Country'] = 'Bahrain'
            item1['City'] = 'Manama'
            item1['Seller_Type'] = 'Large Independent Dealers'
            item1['Seller_Name'] = 'Zayani Motors'
            item1['Car_URL'] = response.url
            item1['Car_Name'] = car_name
            car = car_name.split()[0]
            if car=="MG":
                item1['Make'] = 'MG'
                item1['model']= (car_name.replace("MG",'')).strip()
            if car=="Lexus":
                item1['Make'] = "Lexus"
                item1['model'] = (car_name.replace("Lexus",'')).strip()
            mod = ["Lancer","L200","Attrage","Pajero","Outlander","Montero","Mast"]
            for m in mod:
                if m in item1['Car_Name']:
                        item1['Make'] = "Mitsubishi"
                        item1['model'] = m

            item1['Year'] = response.xpath("//div[contains(@class,'offer_data')]/ul/li/text()").extract()[0]
            item1['asking_price_inc_VAT']= response.xpath("//div[contains(@class,'offer_price')]/strong/text()").extract()[0].split()[0]

            item1['Price_Currency'] = 'BHD'
            item1['colour_exterior']= ''
            mil=(response.xpath("//div[contains(@class,'offer_data')]/ul/li/text()").extract()[1]).split()[0]
            if mil=="KM":
                item1['mileage'] = ''
            else:
                item1['mileage'] = mil
            item1['mileage_unit'] = 'km'
            item1['fuel_type']=response.xpath("//div[contains(@class,'offer_data')]/ul/li/text()").extract()[2]
            item2['src'] = "zmotors.com"
            item2['ts'] = datetime.utcnow().isoformat()
            item2['name'] = "zmotors"
            item2['url'] = response.url
            item2['uid'] = str(uuid.uuid4())
            item2['cs'] = hashlib.md5(json.dumps(dict(item1), sort_keys=True).encode('utf-8')).hexdigest()
            item1['meta'] = dict(item2)
            item1['Source'] = item2['src']
            yield item1
    def parse_dir_contents(self, response):
        #item = CadillacItem()

        #item2 = MetaItem()
        #       getting make

        item2 = MetaItem()
        item = AutodataItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = "Qatar"
        item["City"] = ""
        item["Seller_Type"] = "Large Independent Dealers"
        item["Seller_Name"] = "Cadillac Mannai Autos"
        item["Car_URL"] = response.url
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''

        item['Last_Code_Update_Date'] = 'June 6, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')

        item2['src'] = "cadillac.mannaiautos.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "cadillac_mannaiautos"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Source'] = item2['src']

        item['asking_price_inc_VAT'] = (response.xpath(
            "//div[contains(@class,'price-now')]/span[contains(@class,'value')]/text()"
        ).extract()[0]).split('QAR')[-1].strip()
        item['Price_Currency'] = 'QAR'
        item['Year'] = response.xpath(
            "//div[contains(@class, 'title module align-center overview-title')]/h3/span[contains(@class, 'year')]/text()"
        ).extract()[0].strip()
        item['Make'] = response.xpath(
            "//div[contains(@class, 'title module align-center overview-title')]/h3/span[contains(@class, 'make')]/text()"
        ).extract()[0].strip()
        item['model'] = response.xpath(
            "//div[contains(@class, 'title module align-center overview-title')]/h3/span[contains(@class, 'model')]/text()"
        ).extract()[0].strip()
        item['Spec'] = response.xpath(
            "//div[contains(@class, 'title module align-center overview-title')]/h3/span[contains(@class, 'variant')]/text()"
        ).extract()[0].strip()
        item['Car_Name'] = item['Make'] + ' ' + item['model'] + ' ' + item[
            'Spec']
        item['transmission'] = response.xpath(
            "//div[contains(@class, 'cell transmission')]/span[contains(@class, 'value transmission')]/text()"
        ).extract()[0].strip()
        item['bodystyle'] = response.xpath(
            "//div[contains(@class, 'cell bodystyle')]/span[contains(@class, 'value bodystyle')]/text()"
        ).extract()[0].strip()
        item['mileage'] = ((response.xpath(
            "//div[contains(@class, 'cell mileage')]/span[contains(@class, 'value mileage')]/text()"
        ).extract()[0]).split('miles')[0]).strip()
        item['mileage_unit'] = 'miles'
        item['other_specs_engine_size'] = ((response.xpath(
            "//div[contains(@class, 'cell engine-size')]/span[contains(@class, 'value engine-size')]/text()"
        ).extract()[0]).split('l')[0]).strip()
        item['engine_unit'] = 'litre'
        item['colour_exterior'] = response.xpath(
            "//div[contains(@class, 'cell colour')]/span[contains(@class, 'value colour')]/text()"
        ).extract()[0].strip()
        item['fuel_type'] = response.xpath(
            "//div[contains(@class, 'cell fuel-type')]/span[contains(@class, 'value fuel-type')]/text()"
        ).extract()[0].strip()
        item['Doors'] = response.xpath(
            "//div[contains(@class, 'cell doors')]/span[contains(@class, 'value doors')]/text()"
        ).extract()[0].strip()
        item['colour_interior'] = response.xpath(
            "//div[contains(@class, 'cell interior-colour')]/span[contains(@class, 'value interior-colour')]/text()"
        ).extract()[0].strip()
        #item['PREVIOUS_OWNERS']=response.xpath("//div[contains(@class, 'cell previous-owners')]/span[contains(@class, 'value previous-owners')]/text()").extract()[0].strip()
        item['other_specs_seats'] = response.xpath(
            "//div[contains(@class, 'cell num-seats')]/span[contains(@class, 'value num-seats')]/text()"
        ).extract()[0].strip()
        #item['Stock_No']=response.xpath("//div[contains(@class, 'cell stock-no')]/span[contains(@class, 'value stock-no')]/text()").extract()[0].strip()
        #item['ADDITIONAL_INFO']=response.xpath("//div[contains(@class, 'inner')]/div[contains(@class,'description-text')]/text()").extract()[0].strip()
        #item2['src'] = "cadillac.mannaiautos.com"
        #item2['ts'] = datetime.datetime.utcnow().isoformat()
        #item2['name'] = "cadillac"
        #item2['url'] = url
        #item2['uid'] = str(uuid.uuid4())
        #item2['cs'] = hashlib.md5(json.dumps(dict(item), sort_keys=True)).hexdigest()

        #extras=response.xpath("//div[contains(@class, 'extras')]/p/text()").extract()
        #n=len(extras)

        #for i in range(0,n):
        #   a=extras[i]
        #   item['ADDITIONAL_INFO'].append(a)

        yield item
Пример #16
0
    def parse_dir_contents(self, response):
        item = AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = ""
        item["City"] = ""
        item["Seller_Type"] = "Large Independent Dealers"
        item["Seller_Name"] = "Dasweltauto"
        item["Car_URL"] = ""
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = ''
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['Last_Code_Update_Date'] = 'Thursday, June 04, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''
        item['wheel_size'] = ''
        item['top_speed_kph'] = ''
        item['cylinders'] = ''
        item['acceleration'] = ''
        item['torque_Nm'] = ''

        item2['src'] = "dasweltauto.me"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "dasweltauto_oman"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Source'] = item2['src']
        item['Car_URL'] = response.url

        dealer = response.xpath(
            "//div[contains(@class,'dealership')]/div[contains(@class,'wrapper')]/h2/text()"
        ).get()
        if " Oman" in dealer:
            item['Make'] = 'Volkswagen'
            item['model'] = ((response.xpath(
                "//div[contains(@id,'vehicle_cta')]/div/div[contains(@class,'title')]/text()"
            ).extract()[0]).split()[0]).strip()
            item['Car_Name'] = item['Make'] + ' ' + item['model']
            item['Year'] = response.xpath(
                "//div[contains(@class,'item')]/div[contains(@class,'spec_data')]/text()"
            ).extract()[0]
            item['asking_price_inc_VAT'] = ((response.xpath(
                "//div[contains(@class,'price')]/div[contains(@class,'cashprice')]/text()"
            ).extract()[0]).split('USD')[-1]).strip()
            item['Price_Currency'] = 'USD'

            item['colour_exterior'] = response.xpath(
                "//div[contains(@class,'item')]/div[contains(@class,'spec_data')]/text()"
            ).extract()[1]
            item['transmission'] = response.xpath(
                "//div[contains(@class,'item')]/div[contains(@class,'spec_data')]/text()"
            ).extract()[2].strip()
            item['other_specs_engine_size'] = ((response.xpath(
                "//div[contains(@class,'item')]/div[contains(@class,'spec_data')]/text()"
            ).extract()[3]).split('cc')[0]).strip()
            item['engine_unit'] = 'cc'
            item['mileage'] = ((response.xpath(
                "//div[contains(@class,'item')]/div[contains(@class,'spec_data')]/text()"
            ).extract()[4]).split('km')[0]).strip()
            item['mileage_unit'] = 'km'
            item['fuel_type'] = response.xpath(
                "//div[contains(@class,'item')]/div[contains(@class,'spec_data')]/text()"
            ).extract()[5]
            #item['Time_For_1_100KM']=response.xpath("//div[contains(@class,'item')]/div[contains(@class,'spec_data')]/text()").extract()[6]
            item['other_specs_horse_power'] = ((response.xpath(
                "//div[contains(@class,'item')]/div[contains(@class,'spec_data')]/text()"
            ).extract()[7]).split('bhp')[0]).strip()
            item['Country'] = 'Oman'
            item['acceleration'] = ((response.xpath(
                "//div[contains(@class,'item')]/div[contains(@class,'spec_data')]/text()"
            ).extract()[6]).split(' ')[0]).strip()
            item['City'] = 'Muscat'
            item['Seller_Name'] = 'Wattayah Motors Oman VW'
            item['Seller_Type'] = 'Large independent Dealers'

            #item['ADDITIONAL_INFO']=response.xpath("//div[contains(@class,'description')]/p/text()").extract()
            #item['FEATURES']=response.xpath("//div[contains(@class,'features')]/ul/li/text()").extract()
            yield item
Пример #17
0
    def parse_data(self,response):

        item=AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = "KSA"
        item["City"] = ""
        item["Seller_Type"] = "Official Dealers"
        item["Seller_Name"] = "Universal Motor Agencies"
        item["Car_URL"] = ""
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = "" 
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = 'yes'
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['Last_Code_Update_Date'] = 'Thursday, June 04, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''

        item2['src'] = "gmc.uma.com.sa"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "gmc_uma"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(json.dumps(dict(item), sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Car_URL'] = response.url
        item['Source'] = item2['src']
        path = Selector(response)
        
        item['Year'] = ''.join(path.xpath('//div[@class="cell reg-year"]/span[@class="value reg-year"]/text()').extract()).strip()
        
        item['other_specs_engine_size'] =''.join(path.xpath('//div[@class="cell engine-size"]/span[@class="value engine-size"]/text()').extract()).strip().split(' ')[0]
        item['engine_unit'] = ''.join(path.xpath('//div[@class="cell engine-size"]/span[@class="value engine-size"]/text()').extract()).strip().split(' ')[1]
        item['colour_exterior'] =''.join(path.xpath('//div[@class="cell colour"]/span[@class="value colour"]/text()').extract()).strip()
        item['bodystyle'] =''.join(path.xpath('//div[@class="cell bodystyle"]/span[@class="value bodystyle"]/text()').extract()).strip()
        item['mileage'] =''.join(path.xpath('//div[@class="cell mileage"]/span[@class="value mileage"]/text()').extract()).strip().split(' ')[0]
        item['mileage_unit'] = ''.join(path.xpath('//div[@class="cell mileage"]/span[@class="value mileage"]/text()').extract()).strip().split(' ')[1]
        item['model'] = ''.join(path.xpath('//div[@class="title module"]/h3/span[@class= "model"]//text()').extract()).strip()
        item['Year'] = ''.join(path.xpath('//div[@class="title module"]/h3/span[@class= "year"]//text()').extract()).strip()
        item['Make'] = ''.join(path.xpath('//div[@class="title module"]/h3/span[@class= "make"]//text()').extract()).strip()
        item["Spec"] = ''.join(path.xpath('//div[@class="title module"]/h3/span[@class= "variant"]//text()').extract()).strip()
        item['asking_price_inc_VAT'] = ''.join(path.xpath('//*[@id="content-wrap"]/div[2]/div/div[1]/div/div[2]/div/div/span[2]/text()').extract()).strip().split('SAR')[1]
        item['Price_Currency'] = 'SAR'
        item['fuel_type'] = ''.join(path.xpath('//div[@class="cell fuel-type"]/span[@class="value fuel-type"]/text()').extract()).strip()
        item['colour_interior'] = ''.join(path.xpath('//div[@class="cell interior-colour"]/span[@class="value interior-colour"]/text()').extract()).strip()
        item['transmission'] = ''.join(path.xpath('//div[@class="cell transmission"]/span[@class="value transmission"]/text()').extract()).strip()
        item['Doors'] = ''.join(path.xpath('//div[@class="cell doors"]/span[@class="value doors"]/text()').extract()).strip()
        item['Car_Name'] = (item['Make'] + ' ' +item['model'] + ' ' + item["Spec"] + ' ' + item['Year']).strip()
        item['warranty_untill_when'] = (datetime.today() + relativedelta(months=+24)).strftime('%Y-%m-%d')
        
        yield item
Пример #18
0
    def getdata(self, response):
        print("******************")
        item = AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = "KSA"
        item["City"] = ""
        item["Seller_Type"] = "Official Dealers"
        item["Seller_Name"] = "Universal Motor Agencies"
        item["Car_URL"] = ""
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = 'yes'
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['Last_Code_Update_Date'] = 'Thursday, June 07, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''

        item2['src'] = "approved.me.jaguar.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "jaguar_sa"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Car_URL'] = response.url
        item['Source'] = item2['src']

        item["Country"] = "Saudi Arabia"
        c = 0
        d = 0
        item["Seller_Type"] = "Large Independent Dealers"
        item["Seller_Name"] = "MOHAMED YOUSUF NAGHI MOTORS"
        item["Car_URL"] = response.url

        name = response.xpath(
            "//hgroup/h1[contains(@class,'section-title')]/text()").get(
            ).split()[0]
        arr = response.xpath("//tr/td/text()").extract()
        key = response.xpath("//tr/th/text()").extract()
        for k in range(len(key)):
            if 'Model Year' in key[k]:
                item["Year"] = arr[k]
            elif 'Exterior' in key[k]:
                item["colour_exterior"] = arr[k]
            elif 'Interior' in key[k]:
                item["colour_interior"] = arr[k]
            elif 'Kilometers' in key[k]:
                item['mileage'] = arr[k].split(' ')[0]
                item['mileage_unit'] = arr[k].split(' ')[-1]
            elif 'Transmission' in key[k]:
                item["transmission"] = arr[k].split(' ')[-1]
            elif 'Bodystyle' in key[k]:
                item["bodystyle"] = arr[k].split(' ')[-1]
                #item["Doors"] = arr[k].split(' ')[0]
                d = k
            elif 'Engine' in key[k]:
                item["other_specs_engine_size"] = arr[k].split(' ')[0]
                item['cylinders'] = arr[k].split(' ')[1]
                c = k
                item['engine_unit'] = 'l'
            elif 'Fuel Type' in key[k]:
                item["fuel_type"] = arr[k]
            elif 'Location' in key[k]:
                item["City"] = arr[k]

        item["Make"] = "Jaguar"
        item["Car_Name"] = item["Make"] + ' ' + ''.join(
            response.xpath('//hgroup/h1[@class="section-title"]/text()').
            extract()).replace(arr[c].upper() + ' ', '').replace(
                arr[d].upper(), '').strip()
        item['Price_Currency'] = 'SAR'
        item['asking_price_inc_VAT'] = response.xpath(
            "//strong[contains(@class,'price-box')]/text()").get().split(
                'SAR')[-1].strip()
        yield item
Пример #19
0
    def parse_dir_contents(self, response):

        item = AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = "Oman"
        item["City"] = ""
        item["Seller_Type"] = ""
        item["Seller_Name"] = ""
        item["Car_URL"] = response.url
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''

        item['Last_Code_Update_Date'] = 'Thursday, June 04, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''
        item['wheel_size'] = ''
        item['top_speed_kph'] = ''
        item['cylinders'] = ''
        item['acceleration'] = ''
        item['torque_Nm'] = ''

        item2['src'] = "oman.pe-mb.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "omanpe"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Source'] = item2['src']

        item['Year'] = ((
            response.xpath("//ul[contains(@class, 'left')]/li/text()").extract(
            )[1]).split(':')[-1]).strip()
        item['Make'] = 'Mercedes-Benz'
        mod = (response.xpath(
            "//div[contains(@class, 'col_8 content car-detail')]/h2/text()").
               extract()[0]).split()
        item['model'] = str.join('', (mod[0], mod[1]))
        item['Car_Name'] = item['Make'] + ' ' + item['model']
        item['transmission'] = ((
            response.xpath("//ul[contains(@class, 'left')]/li/text()").extract(
            )[5]).split(':')[-1]).strip()
        item['trim'] = ((
            response.xpath("//ul[contains(@class, 'right')]/li/text()"
                           ).extract()[2]).split(':')[-1]).strip()
        item['bodystyle'] = ((
            response.xpath("//ul[contains(@class, 'left')]/li/text()").extract(
            )[4]).split(':')[-1]).strip()
        if '/' in item['bodystyle']:
            item['bodystyle'] = 'Coupe/Cabriolet'
        item['wheel_size'] = ''.join(
            response.xpath(
                '//div[@class="resp-tabs-container"]/div[2]/ul/li[4]/text()').
            extract()).strip().split(' ')[-1]
        item['warranty'] = ''.join(
            response.xpath(
                '//div[@class="resp-tabs-container"]/div[2]/ul/li[1]/text()').
            extract()).split('Warranty : ')[-1].strip().split(',')[0]
        item['service_contract'] = ''.join(
            response.xpath(
                '//div[@class="resp-tabs-container"]/div[2]/ul/li[2]/text()').
            extract()).strip().split(' ')[-1]
        item['colour_exterior'] = ((
            response.xpath("//ul[contains(@class, 'left')]/li/text()").extract(
            )[3]).split(':')[-1]).strip()

        item['mileage'] = ((
            response.xpath("//ul[contains(@class, 'left')]/li/text()").extract(
            )[2]).split(':')[-1]).strip()
        item['mileage_unit'] = 'km'
        item['asking_price_inc_VAT'] = ((
            response.xpath("//div[contains(@class,'price')]/h3/text()"
                           ).extract()[0]).split('OMR')[-1]).strip()
        item['Price_Currency'] = 'OMR'
        item['Country'] = ((
            response.xpath("//ul[contains(@class, 'right')]/li/text()"
                           ).extract()[0]).split(':')[-1]).strip()
        item['Seller_Name'] = ((
            response.xpath("//ul[contains(@class, 'right')]/li/text()"
                           ).extract()[1]).split(':')[-1]).strip()

        yield item
Пример #20
0
    def parse_data(self, response):
        #item1 = AutodataItem()
        item2 = MetaItem()
        item1 = AutodataItem()

        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = ""
        item1["City"] = ""
        item1["Seller_Type"] = ""
        item1["Seller_Name"] = ""
        item1["Car_URL"] = ""
        item1["Car_Name"] = ""
        item1["Year"] = ""
        item1["Make"] = ""
        item1["model"] = ""
        item1["Spec"] = ""
        item1["Doors"] = ""
        item1["transmission"] = ""
        item1["trim"] = ""
        item1["bodystyle"] = ""
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = ""
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = ""
        item1["colour_interior"] = ""
        item1["fuel_type"] = ""
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item1["mileage"] = ""
        item1["condition"] = ""
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = ''
        item1['asking_price_inc_VAT'] = ''
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yess'
        item1['mileage_unit'] = ''
        item1['engine_unit'] = ''
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''
        item1['wheel_size'] = ''
        item1['top_speed_kph'] = ''
        item1['cylinders'] = ''
        item1['acceleration'] = ''
        item1['torque_Nm'] = ''

        sel = Selector(response)
        item1['Last_Code_Update_Date'] = 'Thursday, June 04, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item1["Car_URL"] = response.url
        item1["Country"] = "UAE"
        item1["City"] = "Dubai"
        item1["Make"] = "BMW"
        item1["Car_Name"] = "BMW " + ''.join(
            sel.xpath(
                '//div[@class="col-flex pdp-details-top-title"]/h2/text()').
            extract()).strip()
        #item1["model"] = item1["Car_Name"].split(' ')[1]
        #item1["Spec"] = ' '.join(item1["Car_Name"].split(' ')[2:])
        item1["Price_Currency"] = ''.join(
            sel.xpath(
                '//div[@class="pdp-details-top-info-price"]/h3/span//text()').
            extract()).strip().replace(u'\xa0', u' ').split(' ')[0]
        item1["asking_price_inc_VAT"] = ''.join(
            sel.xpath(
                '//div[@class="pdp-details-top-info-price"]/h3/span//text()').
            extract()).strip().replace(u'\xa0', u' ').split(' ')[1]
        details_panels = sel.xpath(
            '//div[@class="vehicle-details-panel"]/div[@class="vehicle-details-panel-feature"]'
        )
        print(len(details_panels))
        for det in details_panels:
            name = ''.join(det.xpath('div[1]//text()').extract()).strip()
            value = ''.join(det.xpath('div[2]//text()').extract()).strip()
            #print(name)
            if name.lower() == "model year":
                item1['Year'] = value
            elif name.lower() == "transmission":
                item1['transmission'] = value
            elif name.lower() == "basic paintwork":
                item1['colour_exterior'] = value
            elif name.lower() == "number of doors":
                item1['Doors'] = value
            elif name.lower() == "mileage":
                item1['mileage'] = value.replace(u'\xa0', u' ').split(' ')[0]
                item1['mileage_unit'] = value.replace(u'\xa0',
                                                      u' ').split(' ')[1]
            elif name.lower() == "model":
                item1['model'] = value
            elif name.lower() == "body type":
                item1['bodystyle'] = value.replace('Coupé', 'Coupe')
            elif name.lower() == "fuel type":
                item1['fuel_type'] = value
            elif name.lower() == "engine power(hp)":
                item1['other_specs_horse_power'] = value.replace(
                    u'\xa0', u' ').split(' ')[0]
            elif name.lower() == "number of seats":
                item1['other_specs_seats'] = value
            elif name.lower() == "upholstery type":
                item1['trim'] = value
            elif name.lower() == "warranty in months":
                item1['warranty_untill_when'] = (
                    datetime.today() +
                    relativedelta(months=+int(value))).strftime('%Y-%m-%d')
            elif name == "Number of Cylinders":
                item1['cylinders'] = value
            elif name.lower() == "cylinder capacity":
                item1['other_specs_engine_size'] = int(
                    value.replace(u'\xa0', u' ').split(' ')[0])
                item1['engine_unit'] = 'cc'
                #item1['other_specs_engine_size'] = int(value.replace(u'\xa0', u' ').split(' ')[0]) * 0.001

        if item1['warranty_untill_when'] != "":
            item1['warranty'] = "yes"
        item1["Seller_Name"] = ''.join(
            sel.xpath(
                '//p[@class="plan-route-map-container-dealer-info-name"]/text()'
            ).extract()).strip()
        item2['src'] = "bmw-dubai.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "bmw"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item1),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item1['meta'] = dict(item2)
        item1['Source'] = item2['src']
        yield item1
Пример #21
0
    def parse_data(self, response):
        item = AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = ""
        item["City"] = ""
        item["Seller_Type"] = ""
        item["Seller_Name"] = ""
        item["Car_URL"] = ""
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''
        item["Last_Code_Update_Date"] = "Wednesday,June 19,2019"
        item["Scrapping_Date"] = datetime.today().strftime('%A, %B %d, %Y')
        item["Country"] = "UAE"
        item["City"] = "Dubai"
        item["Seller_Type"] = "Large Independent Dealers"
        item["Seller_Name"] = "Pearl Motors"
        item["Car_URL"] = response.url
        item['asking_price_inc_VAT'] = response.xpath(
            "//div[contains(@class,'car-price')]/h2/text()").extract(
            )[0].split('AED')[-1].strip().replace(',', '')
        item['Price_Currency'] = 'AED'
        item["Car_Name"] = response.xpath(
            "//span[contains(@class,'underline text-black')]/text()").extract(
            )[0].strip()

        if "ROLLS ROYCE" in item["Car_Name"]:
            item["Make"] = "ROLLS ROYCE"
            item["model"] = item["Car_Name"].replace('ROLLS ROYCE', '').strip()
        if "FERRARI" in item["Car_Name"]:
            item["Make"] = "FERRARI"
            item["model"] = item["Car_Name"].replace('FERRARI', '').strip()
        if "MERCEDES" in item["Car_Name"]:
            item["Make"] = "MERCEDES"
            item["model"] = item["Car_Name"].replace('MERCEDES BENZ',
                                                     '').strip()
        if "MASERATI" in item["Car_Name"]:
            item["Make"] = "MASERATI"
            item["model"] = item["Car_Name"].replace('MASERATI', '').strip()
        if "BENTLEY" in item["Car_Name"]:
            item["Make"] = "BENTLEY"
            item["model"] = item["Car_Name"].replace('BENTLEY', '').strip()
        if "LAMBORGHINI" in item["Car_Name"]:
            item["Make"] = "LAMBORGHINI"
            item["model"] = item["Car_Name"].replace('LAMBORGHINI', '').strip()
        if "MCLAREN" in item["Car_Name"]:
            item["Make"] = "MCLAREN"
            item["model"] = item["Car_Name"].replace('MCLAREN', '').strip()
        if "ASTON MARTIN" in item["Car_Name"]:
            item["Make"] = "ASTON MARTIN"
            item["model"] = item["Car_Name"].replace('ASTON MARTIN',
                                                     '').strip()
        if "RANGE ROVER" in item["Car_Name"]:
            item["Make"] = "LAND ROVER"
            item["model"] = "RANGE ROVER"
            item["Spec"] = item["Car_Name"].replace('RANGE ROVER', '').strip()

        arr = response.xpath("//tr/td/text()").extract()
        '''item["Year"] = arr[1]
        item["mileage"] = arr[7]
        item['mileage_unit'] = 'KM'
        item["other_specs_engine_size"] = arr[9].split('L')[0]
        item['engine_unit'] ='L'''

        arrs = list(OrderedDict.fromkeys(arr))
        for i in range(0, len(arrs)):
            if arrs[i] == 'Year':
                item["Year"] = arr[i + 1]
            if arr[i] == 'Kilometers':
                item["mileage"] = arr[i + 1]
                item['mileage_unit'] = 'KM'
            if arrs[i] == 'Engine':
                item["other_specs_engine_size"] = arrs[i + 1].split('L')[0]
                item['engine_unit'] = 'L'
            if arr[i] == 'Horsepower':
                item["other_specs_horse_power"] = arr[i + 1]

            if arrs[i] == 'Fuel Type':
                item["fuel_type"] = arrs[i + 1]
            if arrs[i] == 'Warranty':
                item['warranty'] = arrs[i + 1]
            if arrs[i] == 'Motors Trim':
                item['bodystyle'] = arrs[i + 1]

        item2['src'] = "pearl-motors.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "pearl"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Source'] = item2['src']
        yield item
Пример #22
0
    def parse_data(self, response):
        item2 = MetaItem()
        item1 = AutodataItem()

        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = "Saudi Arabia"
        item1["City"] = ""
        item1["Seller_Type"] = "Large Independent Dealers"
        item1["Seller_Name"] = "Mas Cars"
        item1["Car_URL"] = response.url
        item1["Car_Name"] = ''.join(response.xpath('//span[@class="inner-nice-model-title"]/text()').extract()).strip()
        item1["Year"] = ""
        item1["Make"] = ""
        item1["model"] = ''.join(response.xpath('//div[@class="internal_details"]//ul//span[@id = "get-my-model"]/text()').extract()).strip()
        item1["Spec"] = ""
        item1["Doors"] = ""
        item1["transmission"] = ""
        item1["trim"] = ""
        item1["bodystyle"] = ""
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = ""
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = ""
        item1["colour_interior"] = ""
        item1["fuel_type"] = ""
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = "" 
        item1["mileage"] = ""
        item1["condition"] = ""
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = ''
        item1['asking_price_inc_VAT'] = ''
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yes'
        item1['mileage_unit'] = ''
        item1['engine_unit'] = ''
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''

        item1["Make"] = ''.join(response.xpath('//div[@class="internal_details"]//ul//span[@id = "get-my-manufac"]/text()').extract()).strip()
        details = response.xpath('//div[@class="internal_details"]//ul/li')
        for det in details:
            key = ''.join(det.xpath('strong[1]/text()').extract()).strip()
            value = ''.join(det.xpath('span[1]/text()').extract()).strip()
            if key == "Year:":
                item1['Year'] = value
            elif key == "Car Color:":
                item1["colour_exterior"] = value
            elif key == "Internal Color:":
                item1["colour_interior"] = value
            elif key == "Transmission:":
                item1["transmission"] = value
            elif key == "Engine:":
                item1["mileage"] = value
                item1["mileage_unit"] = 'km'
            elif key == "Price:":
                item1["asking_price_inc_VAT"] = value.split(' ')[0]
                item1["Price_Currency"] = value.split(' ')[1]
            elif key == "Warrenty:":
                item1["warranty"] = value
##            elif key == "Type:":
##                item1["colour_exterior"] = value
        item1["Car_Name"] = item1['Make'] + ' ' + item1['model']
        
        item2['src'] = "www.mascars.net"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "mascars"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(json.dumps(dict(item1), sort_keys=True).encode('utf-8')).hexdigest()
        item1['meta'] = dict(item2)
        item1['Last_Code_Update_Date'] = 'Tuesday, June 18, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        #item1['Seller_Name'] = 'Universal Motors Agencies'
        item1['Source'] = item2['src']
        yield item1
Пример #23
0
    def parse_data(self, response):
        item2 = MetaItem()
        item1 = AutodataItem()

        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = "Saudi Arabia"
        item1["City"] = ""
        item1["Seller_Type"] = "Market Places"
        #item1["Seller_Name"] = ""
        item1["Seller_Name"] = "".join(
            response.xpath('//div[@class="seller_logo_main clearfix"]/text()').
            extract()).strip().split('-')[0].replace('Dealer:', '').strip()
        item1["Seller_Name"] = re.sub('[^0-9a-zA-Z]+', ' ',
                                      item1["Seller_Name"]).strip()
        item1["Car_URL"] = response.url
        item1["Car_Name"] = ''.join(
            response.xpath(
                '//h1[@class="vif_heading"]/text()').extract()).strip()
        item1["Year"] = ""
        item1["Make"] = ""
        item1["model"] = ""
        item1["Spec"] = ""
        item1["Doors"] = ""
        item1["transmission"] = ""
        item1["trim"] = ""
        item1["bodystyle"] = ""
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = ""
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = ""
        item1["colour_interior"] = ""
        item1["fuel_type"] = ""
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item1["mileage"] = ""
        item1["condition"] = ""
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = ''
        item1['asking_price_inc_VAT'] = ''
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yes'
        item1['mileage_unit'] = ''
        item1['engine_unit'] = ''
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''

        details = response.xpath('//div[@class="vif_info"]')
        item1['Price_Currency'] = 'SAR'
        item1['asking_price_inc_VAT'] = ''.join(
            details.xpath('h3/text()').extract()).strip().split(
                'SAR')[0].strip()
        dets = details.xpath('ul/li')
        for det in dets:
            key = ''.join(det.xpath('text()').extract()).strip().split(':')[0]
            value = ''.join(
                det.xpath('text()').extract()).strip().split(':')[1].strip()
            #print(key, value)
            if key == 'Make':
                item1['Make'] = value
            elif key == 'Model':
                item1['model'] = value
            elif key == 'Year':
                item1['Year'] = value
            elif key == 'Mileage':
                item1['mileage'] = value.split(' ')[0]
                item1['mileage_unit'] = 'km'
            elif key == 'City':
                item1['City'] = value
            elif key == 'Color':
                item1['colour_exterior'] = value
            elif key == 'Engine size':
                item1['other_specs_engine_size'] = value
            elif key == 'Gearbox':
                item1['transmission'] = value

        item2['src'] = "www.abisayara.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "abisayara"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item1),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item1['meta'] = dict(item2)
        item1['Last_Code_Update_Date'] = 'Tuesday, June 18, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item1['Source'] = item2['src']
        yield item1
Пример #24
0
    def parse_dir_contents(self, response):
        item = AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = ""
        item["City"] = ""
        item["Seller_Type"] = ""
        item["Seller_Name"] = ""
        item["Car_URL"] = ""
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['Last_Code_Update_Date'] = 'June 15, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''
        item['wheel_size'] = ''

        item2['src'] = "abu-dhabi.pe-mb.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "abudhabi"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Car_URL'] = response.url
        item['Source'] = item2['src']

        #       getting make
        #item['Car_Name'] = response.xpath("//div[contains(@class, 'col_8 content car-detail')]/h2/text()").extract()[0].strip()
        item['Year'] = ((
            response.xpath("//ul[contains(@class, 'left')]/li/text()").extract(
            )[1]).split(':')[-1]).strip()
        item['Make'] = 'Mercedes-Benz'
        mod = (response.xpath(
            "//div[contains(@class, 'col_8 content car-detail')]/h2/text()").
               extract()[0]).split()
        item['model'] = str.join('', (mod[0], mod[1]))
        #item['Spec'] =
        #item['Doors'] =
        item['Car_Name'] = item['Make'] + ' ' + item['model']
        size = len(
            response.xpath(
                "//ul[contains(@class, 'left')]/li/text()").extract())
        if size > 5:
            item['transmission'] = ((
                response.xpath("//ul[contains(@class, 'left')]/li/text()"
                               ).extract()[5]).split(':')[-1]).strip()
        item['trim'] = ((
            response.xpath("//ul[contains(@class, 'right')]/li/text()"
                           ).extract()[2]).split(':')[-1]).strip()
        item['bodystyle'] = ((
            response.xpath("//ul[contains(@class, 'left')]/li/text()").extract(
            )[4]).split(':')[-1]).strip()
        #item['other_specs_gearbox'] =
        #item['other_specs_seats'] =
        #item['other_specs_engine_size'] =
        #item['other_specs_horse_power'] =
        item['colour_exterior'] = ((
            response.xpath("//ul[contains(@class, 'left')]/li/text()").extract(
            )[3]).split(':')[-1]).strip()

        #item['fuel_type'] =
        item['mileage'] = ((
            response.xpath("//ul[contains(@class, 'left')]/li/text()").extract(
            )[2]).split(':')[-1]).strip()
        item['mileage_unit'] = 'km'
        item['asking_price_inc_VAT'] = ((
            response.xpath("//div[contains(@class,'price')]/h3/text()"
                           ).extract()[0]).split('AED')[-1]).strip()
        item['Price_Currency'] = 'AED'
        item['Country'] = 'UAE'
        item['City'] = ((
            response.xpath("//ul[contains(@class, 'right')]/li/text()"
                           ).extract()[0]).split(':')[-1]).strip()
        item['Seller_Name'] = ((
            response.xpath("//ul[contains(@class, 'right')]/li/text()"
                           ).extract()[1]).split(':')[-1]).strip()
        arr = response.xpath("//ul/li/text()").extract()
        for i in range(0, len(arr)):
            if "Warranty :" in arr[i]:
                war = ((arr[i]).split('Warranty :')[-1]).strip()
                item['warranty'] = (war.split(',')[0]).strip()
                item['warranty_untill_when'] = (war.split(',')[-1]).strip()
            elif "Wheel Size" in arr[i]:
                item['wheel_size'] = ((
                    arr[i]).split('Wheel Size :')[-1]).strip()
            elif "With a service package" in arr[i]:
                item['service_contract'] = ((
                    arr[i]).split('With a service package :')[-1]).strip()

        item['warranty_untill_when'] = (datetime.today() + relativedelta(
            years=+int(item['warranty_untill_when'].split(' ')[0]))
                                        ).strftime('%Y-%m-%d')
        yield item
Пример #25
0
    def parse_data(self, response):
        item=AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = ""
        item["City"] = ""
        item["Seller_Type"] = ""
        item["Seller_Name"] = ""
        item["Car_URL"] = ""
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = "" 
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''

        item["Last_Code_Update_Date"] = "Wednesday,June 19,2019"
        item["Scrapping_Date"] = datetime.today().strftime('%A, %B %d, %Y')
        item["Country"] = "UAE"
        item["City"] = "Dubai"
        item["Seller_Type"] = "Large Independent Dealers"
        item["Seller_Name"] = "Sun City Motors"
        item["Car_URL"] = response.url
        item['asking_price_inc_VAT'] = response.xpath("//span[contains(@class,'price_figure ')]/text()").extract()[0].split('AED')[-1].strip()
        item['Price_Currency'] = 'AED'
        item["Car_Name"] = response.xpath("//div[contains(@class,'single_page_title hidden-sm hidden-xs')]/h1/text()").extract()[0].strip()
        if "BMW" in item["Car_Name"]:
            item["Make"] = "BMW"
            item["model"] = item["Car_Name"].split()[1]        
        if "AUDI" in item["Car_Name"]:
            item["Make"] = "AUDI"
            item["model"] = item["Car_Name"].split()[1]
            sp = item["Car_Name"].replace('AUDI','')
            item["Spec"] = sp.replace(item["model"],'').strip()
        if "RANGE ROVER" in item["Car_Name"]:
            item["Make"] = "LAND ROVER"
            item["model"] = "RANGE ROVER"
            item["Spec"] = item["Car_Name"].replace('RANGE ROVER','').strip()
        if "DODGE" in item["Car_Name"]:
            item["Make"] = "DODGE"
            item["model"] = item["Car_Name"].replace('DODGE','').strip()
        if "LAND ROVER" in item["Car_Name"]:
            item["Make"] = "LAND ROVER"
            item["model"] = item["Car_Name"].replace('LAND ROVER','').strip()
        if "MASERATI" in item["Car_Name"]:
            item["Make"] = "MASERATI"
            item["model"] = item["Car_Name"].split()[1]
            item["Spec"] = " ".join(re.findall("[a-zA-Z]+",item["Car_Name"].split()[2]))        
        if "PORSCHE" in item["Car_Name"]:
            item["Make"] = "PORSCHE"
            item["model"] = item["Car_Name"].split()[1]
            sp = item["Car_Name"].replace('PORSCHE','')
            item["Spec"] = sp.replace(item["model"],'').strip()
        if "CHEVROLET" in item["Car_Name"]:
            item["Make"] = "CHEVROLET"
            item["model"] = item["Car_Name"].split()[1]
            item["Spec"] = item["Car_Name"].split()[2]
        if "JAGUAR" in item["Car_Name"]:
            item["Make"] = "JAGUAR"
            item["model"] = item["Car_Name"].replace('JAGUAR','').strip()
        if "FORD" in item["Car_Name"]:
            item["Make"] = "FORD"
            item["model"] = item["Car_Name"].split()[1]
            sp = item["Car_Name"].replace('FORD','')
            item["Spec"] = sp.replace(item["model"],'').strip()
        if "JEEP" in item["Car_Name"]:
            item["Make"] = "JEEP"
            item["model"] = item["Car_Name"].split()[1]
            sp = item["Car_Name"].replace('JEEP','')
            item["Spec"] = sp.replace(item["model"],'').strip()
        if "MERCEDES-BENZ" in item["Car_Name"]:
            item["Make"] = "MERCEDES"
            item["model"] = item["Car_Name"].replace('MERCEDES-BENZ','').strip()
        if "NISSAN" in item["Car_Name"]:
            item["Make"] = "NISSAN"
            item["model"] = item["Car_Name"].replace('NISSAN','').strip()

        arr = response.xpath("//tr/td/text()").extract()
        item["Year"] = arr[1]
        item["mileage"] = arr[7]
        item['mileage_unit'] = 'KM'
        item["other_specs_engine_size"] = arr[9].split('L')[0]
        item['engine_unit'] ='L'
        
        arrs = list(OrderedDict.fromkeys(arr))
        for i in range(0,len(arrs)):
            '''if arrs[i]=='Year':
                item["Year"] = arr[i+1]
            if arr[i]=='Kilometers':
                item["mileage"] = arr[i+1]
                item['mileage_unit'] = 'KM'
            if arr[i]=='Engine':
                item["other_specs_engine_size"] = arr[i+1].split('L')[0]
                item['engine_unit'] ='L' '''
            if arr[i]=='Horsepower':
                item["other_specs_horse_power"] =arr[i+1]
            if arrs[i]=='Exterior Color':
                item["colour_exterior"] = arrs[i+1]
            if arrs[i]=='Fuel Type':
                item["fuel_type"] = arrs[i+1]
            if arrs[i]=='Warranty':
                item['warranty'] = arrs[i+1]
                    
        item2['src'] = "suncity.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "suncity_spider"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(json.dumps(dict(item), sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Source'] = item2['src']
        yield item
Пример #26
0
    def parse_data(self, response):
        print("DONE")
        item = AutodataItem()
        item2 = MetaItem()

        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = "Bahrain"
        item["City"] = ""
        item["Seller_Type"] = "Market Places"
        item["Seller_Name"] = "Yallamotors"
        item["Car_URL"] = response.url
        item["Car_Name"] = ''.join(
            response.xpath(
                '//*[@id="mainContent"]/section[3]/div/div/div[1]/h1/text()').
            extract()).replace('Used ', '').strip()
        item["Year"] = ""
        item["Make"] = item["Car_Name"].split(' ')[0]
        item["model"] = item["Car_Name"].split(' ')[1]
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = 'BHD'
        item['asking_price_inc_VAT'] = ''.join(
            response.xpath(
                '//span[@class="price-count h3 green bold block"]/text()').
            extract()).strip()
        item['asking_price_ex_VAT'] = ''.join(
            response.xpath(
                "div[@class = 'col-md-3 used-car-user-info']/span[@class = 'price-count h3 green bold block']/span[@class ='price-count_small']/text()"
            ).extract()).strip()
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = 'km'
        item['engine_unit'] = ''
        item['Last_Code_Update_Date'] = 'Thursday, June 07, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''
        sel = response.xpath('//div[@class="pull-left text-left"]')

        for s in sel:
            key = ''.join(s.xpath("i//text()").extract()).strip()
            value = ''.join(
                s.xpath("strong[@class='block']//text()").extract()).strip()
            print("@@@@@@@@", value)
            if key == "Location":
                item['City'] = value
            elif key == "Model Year":
                item['Year'] = value
            elif key == "Car Driven":
                item["mileage"] = value
            elif key == "Transmission:":
                item["transmission"] = value
            elif key == "Fuel Type:":
                item["fuel_type"] = value
            elif key == "Number of Doors":
                item["Doors"] = value.replace('Door', '').strip()
            elif key == "Number of Cylinders":
                item['cylinders'] = value
            elif key == "Body Style:":
                item['bodystyle'] = value
            elif key == "Exterior Color":
                item['colour_exterior'] = value

        print('########', item['Car_Name'])

        item2['src'] = "bahrain.yallamotor.com"
        item2['name'] = "yallamotor"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Car_URL'] = response.url
        item['Source'] = item2['src']
        yield item
        pass
Пример #27
0
    def parse_data(self, response):

        item2 = MetaItem()
        item1 = AutodataItem()

        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = ""
        item1["City"] = ""
        item1["Seller_Type"] = ""
        item1["Seller_Name"] = ""
        item1["Car_URL"] = ""
        item1["Car_Name"] = ""
        item1["Year"] = ""
        item1["Make"] = ""
        item1["model"] = ""
        item1["Spec"] = ""
        item1["Doors"] = ""
        item1["transmission"] = ""
        item1["trim"] = ""
        item1["bodystyle"] = ""
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = ""
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = ""
        item1["colour_interior"] = ""
        item1["fuel_type"] = ""
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item1["mileage"] = ""
        item1["condition"] = ""
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = ''
        item1['asking_price_inc_VAT'] = ''
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yes'
        item1['mileage_unit'] = ''
        item1['engine_unit'] = ''
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''
        item1['wheel_size'] = ''
        item1['top_speed_kph'] = ''
        item1['cylinders'] = ''
        item1['acceleration'] = ''
        item1['torque_Nm'] = ''

        sel = Selector(response)
        details1 = sel.xpath('//div[@class = "title module align-center"]')
        details2 = sel.xpath(
            '//div[@class="overview-data module overview-data-standard"]/div/div'
        )
        details3 = sel.xpath('//div[@class="address"]')
        details4 = sel.xpath(
            '//div[@class="price module u-hidden-sm-only"]/div[@class="price-now"]'
        )

        item1['Car_URL'] = response.url
        item1['Year'] = ''.join(
            details1.xpath(
                'h3/span[@class= "year"]//text()').extract()).strip()
        item1['Make'] = ''.join(
            details1.xpath(
                'h3/span[@class= "make"]//text()').extract()).strip()
        item1['model'] = ''.join(
            details1.xpath(
                'h3/span[@class= "model"]//text()').extract()).strip()
        item1['Spec'] = ''.join(
            details1.xpath(
                'h3/span[@class= "variant"]//text()').extract()).strip()
        item1['Car_Name'] = item1['Make'] + ' ' + item1['model'] + ' ' + item1[
            'Spec']

        item1['mileage'] = (''.join(
            details2.xpath(
                './/div[@class="cell mileage"]/span[@class="value mileage"]//text()'
            ).extract()).strip()).split(' ')[0]
        item1['mileage_unit'] = (''.join(
            details2.xpath(
                './/div[@class="cell mileage"]/span[@class="value mileage"]//text()'
            ).extract()).strip()).split(' ')[1]
        item1['fuel_type'] = ''.join(
            details2.xpath(
                './/div[@class="cell fuel-type"]/span[@class="value fuel-type"]//text()'
            ).extract()).strip()
        item1['Doors'] = ''.join(
            details2.xpath(
                './/div[@class="cell doors"]/span[@class="value doors"]//text()'
            ).extract()).strip()
        item1['other_specs_engine_size'] = (''.join(
            details2.xpath(
                './/div[@class="cell engine-size"]/span[@class="value engine-size"]//text()'
            ).extract()).strip()).split(' ')[0]
        item1['engine_unit'] = (''.join(
            details2.xpath(
                './/div[@class="cell engine-size"]/span[@class="value engine-size"]//text()'
            ).extract()).strip()).split(' ')[1]
        item1['trim'] = ''.join(
            details2.xpath(
                './/div[@class="cell interior-colour"]/span[@class="value interior-colour"]//text()'
            ).extract()).strip()
        item1['transmission'] = ''.join(
            details2.xpath(
                './/div[@class="cell transmission"]/span[@class="value transmission"]//text()'
            ).extract()).strip()
        item1['bodystyle'] = ''.join(
            details2.xpath(
                './/div[@class="cell bodystyle"]/span[@class="value bodystyle"]//text()'
            ).extract()).strip()
        item1['colour_exterior'] = ''.join(
            details2.xpath(
                './/div[@class="cell exterior-colour"]/span[@class="value exterior-colour"]//text()'
            ).extract()).strip()

        item1['City'] = ''.join(
            details3.xpath(
                'span[@class="address-city"]//text()').extract()).strip()
        item1['Country'] = 'Saudi Arabia'
        item1['Price_Currency'] = (''.join(
            details4.xpath('span[@class="value"]//text()').extract()).strip()
                                   )[:3]
        item1['asking_price_inc_VAT'] = (''.join(
            details4.xpath('span[@class="value"]//text()').extract()).strip()
                                         )[3:]

        item2['src'] = "chevrolet.uma.com.sa"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "spider"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item1),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item1['meta'] = dict(item2)
        item1['Last_Code_Update_Date'] = 'June 6, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%Y-%m-%d')
        item1['Seller_Name'] = 'Universal Motors Agencies'
        item1['Source'] = item2['src']
        yield item1
Пример #28
0
    def parse_data(self, response):
        item = AutodataItem()
        item2 = MetaItem()

        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = ""
        item["City"] = ""
        item["Seller_Type"] = ""
        item["Seller_Name"] = ""
        item["Car_URL"] = ""
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = 'yes'
        item['mileage_unit'] = ''
        item['engine_unit'] = ''
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''
        item['wheel_size'] = ''
        item['top_speed_kph'] = ''
        item['cylinders'] = ''
        item['acceleration'] = ''
        item['torque_Nm'] = ''
        item["Last_Code_Update_Date"] = "Wednesday,June 19,2019"
        item["Scrapping_Date"] = datetime.today().strftime('%A, %B %d, %Y')
        item["Country"] = "UAE"
        item["City"] = "Dubai"
        item["Seller_Type"] = "MarketPlace"
        item["Seller_Name"] = "111 Used Cars"
        item["Car_URL"] = response.url

        item2['src'] = "dubizzle.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "dubizzle_spider"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Source'] = item2['src']

        item["Last_Code_Update_Date"] = "Friday,June 21,2019"
        item["Scrapping_Date"] = datetime.today().strftime('%A, %B %d, %Y')
        item["Country"] = "UAE"
        item["City"] = ""
        item["Seller_Type"] = "Marketplaces"
        item["Seller_Name"] = "Dubizzle"
        item["Car_URL"] = response.url
        url = item["Car_URL"]
        m = url.split('used-cars/')[-1]
        item['asking_price_inc_VAT'] = response.xpath(
            "//span[contains(@id,'actualprice')]/text()").get()
        item['Price_Currency'] = 'AED'

        arr = response.xpath(
            '//div[@id="listing-details-list"]//li/strong/text()').extract()
        label = response.xpath(
            '//div[@id="listing-details-list"]//li/span/text()').extract()
        for lab in range(len(label)):
            label[lab] = label[lab].strip()
        label = list(filter(None, label))

        for i in range(len(arr)):
            if 'Year' in label[i]:
                item["Year"] = arr[i].strip()
            elif 'Kilometers' in label[i]:
                item["mileage"] = arr[i].strip()
                item['mileage_unit'] = 'KM'
            elif 'Color' in label[i]:
                item['colour_exterior'] = arr[i].strip()
            elif 'Doors' in label[i]:
                item['Doors'] = arr[i].split()[0].replace('+', '').strip()
            elif 'Warranty' in label[i]:
                item['warranty'] = arr[i].strip()
            elif 'Specs' in label[i]:
                item["Spec"] = arr[i].strip()
            elif 'Transmission' in label[i]:
                item['transmission'] = arr[i].replace('Transmission',
                                                      '').strip()
            elif 'Body Type' in label[i]:
                item['bodystyle'] = arr[i].strip()
            elif 'Fuel Type' in label[i]:
                item["fuel_type"] = arr[i].strip()
            elif 'Trim' in label[i]:
                item['trim'] = arr[11].strip()
            elif 'Cylinders' in label[i]:
                if 'Unknown' not in arr[i]:
                    item['cylinders'] = arr[i].strip()
            elif 'Make' in label[i]:
                item['Make'] = arr[i].strip()
                item['Make'] = remove_non_ascii(item['Make'])
            elif 'Model' in label[i]:
                item['model'] = arr[i].strip()
                item['Make'] = remove_non_ascii(item['model'])
            elif 'Horsepower' in label[i]:
                if 'unknown' not in arr[i].strip().lower():
                    item["other_specs_horse_power"] = arr[i].split(
                        'HP')[0].strip()
        if item['Make'] != '':
            item["Car_Name"] = item["Make"] + ' ' + item["model"]

        if item['Car_Name'] != '':
            yield item
Пример #29
0
    def parse_dir_contents(self, response):
        item = AutodataItem()
        item2 = MetaItem()
        item["Last_Code_Update_Date"] = ""
        item["Scrapping_Date"] = ""
        item["Country"] = "Bahrain"
        item["City"] = "Sitra"
        item["Seller_Type"] = ""
        item["Seller_Name"] = ""
        item["Car_URL"] = ""
        item["Car_Name"] = ""
        item["Year"] = ""
        item["Make"] = ""
        item["model"] = ""
        item["Spec"] = ""
        item["Doors"] = ""
        item["transmission"] = ""
        item["trim"] = ""
        item["bodystyle"] = ""
        item["other_specs_gearbox"] = ""
        item["other_specs_seats"] = ""
        item["other_specs_engine_size"] = ""
        item["other_specs_horse_power"] = ""
        item["colour_exterior"] = ""
        item["colour_interior"] = ""
        item["fuel_type"] = ""
        item["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item["mileage"] = ""
        item["condition"] = ""
        item["warranty_untill_when"] = ""
        item['service_contract_untill_when'] = ''
        item['Price_Currency'] = ''
        item['asking_price_inc_VAT'] = ''
        item['asking_price_ex_VAT'] = ''
        item['warranty'] = ''
        item['service_contract'] = ''
        item['vat'] = ''
        item['mileage_unit'] = ''
        item['engine_unit'] = 'l'
        item['Last_Code_Update_Date'] = 'Thursday, June 07, 2019'
        item['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')
        item['autodata_Make'] = ''
        item['autodata_Make_id'] = ''
        item['autodata_model'] = ''
        item['autodata_model_id'] = ''
        item['autodata_Spec'] = ''
        item['autodata_Spec_id'] = ''
        item['autodata_transmission'] = ''
        item['autodata_transmission_id'] = ''
        item['autodata_bodystyle'] = ''
        item['autodata_bodystyle_id'] = ''
        item['wheel_size'] = ''
        item['top_speed_kph'] = ''
        item['cylinders'] = ''
        item['acceleration'] = ''
        item['torque_Nm'] = ''

        item2['src'] = "behbehaniusedcars.com"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "behbaniused"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item['meta'] = dict(item2)
        item['Car_URL'] = response.url
        item['Source'] = item2['src']

        #       getting make
        #item['Car_Name'] = response.xpath("//div[contains(@class, 'col_8 content car-detail')]/h2/text()").extract()[0].strip()
        item['Car_Name'] = ''.join(
            response.xpath("//div[contains(@id,'listings')]/h2/text()").
            extract()).replace('(Approved)', '').replace('“', '').replace(
                '”', '').replace("Approved", '').strip()
        #item['Car_Name'] = re.sub(r'[^a-zA-Z0-9./]', r'', item['Car_Name'])
        item['Year'] = response.xpath(
            "//ul[contains(@class,'specs')]/li/text()").extract()[1].strip()

        item['Make'] = ''.join(
            response.xpath('//p[@class="showroom"]/text()').extract()).split(
                'Showroom')[0].strip()
        if 'used car' in item['Make'].lower():
            item['Make'] = ''.join(
                response.xpath("//div[contains(@id,'listings')]/h2/text()").
                extract()).replace(item['Year'], '').strip().split(' ')[0]
        if 'alfa romeo' in item['Car_Name'].lower():
            item['Make'] = 'Alfa Romeo'
        if 'jetta' in item['Car_Name'].lower():
            item['Make'] = 'Volkswagen'
            item['Car_Name'] = item['Make'] + ' ' + item['Car_Name']
        #item['model'] = ''.join(response.xpath("//div[contains(@id,'listings')]/h2/text()").extract()).strip()
        #item['Spec'] = ''.join(response.xpath("//div[contains(@id,'listings')]/h2/text()").extract()).strip()
        #item['Doors'] =
        item['transmission'] = response.xpath(
            "//ul[contains(@class,'specs')]/li/text()").extract()[3].strip()
        #item['trim'] =
        item['cylinders'] = response.xpath(
            "//ul[contains(@class,'specs')]/li/text()").extract()[6].strip(
            ).split(' ')[0]
        item['bodystyle'] = response.xpath(
            "//ul[contains(@class,'specs')]/li/text()").extract()[7].strip()
        #item['other_specs_gearbox'] =
        #item['other_specs_seats'] =
        item['other_specs_engine_size'] = response.xpath(
            "//ul[contains(@class,'specs')]/li/text()").extract()[4].strip(
            ).replace('L', '').replace('TC', '').replace('SC',
                                                         '').replace('T', '')
        if len(item['other_specs_engine_size']) > 3:
            item['engine_unit'] = 'cc'
        #item['other_specs_engine_size'] = re.search(r'\d+', item['other_specs_engine_size']).group()
        #item['other_specs_horse_power'] =
        #item['colour_exterior'] =
        #item['fuel_type'] =
        item['mileage'] = response.xpath(
            "//ul[contains(@class,'specs')]/li/text()").extract()[2].strip()
        item['mileage_unit'] = 'km'
        price = response.xpath(
            "//div[contains(@id,'listings')]/h3/text()").extract()[0]
        item['asking_price_inc_VAT'] = ''.join(re.findall(r'\d+', price))
        item['Price_Currency'] = 'BD'
        #item['Country'] =
        #item['City'] =
        item['Seller_Name'] = 'Behbehani Brothers'

        lis = response.xpath('//ul[@id="menu-footer-brands"]/li')
        for li in lis:
            make = ''.join(li.xpath('a/text()').extract()).strip()
            if make in item['Car_Name']:
                item['Make'] = make
        print("############", len(lis))
        ##        if 'volkswagen' in ''.join(response.xpath('//p[@class="showroom"]/text()').extract()).lower():
        ##            item['Make'] = 'Volkswagen'
        ##            item['Car_Name'] = item['Make'] + ' ' + item['Car_Name']
        yield item
Пример #30
0
    def parse_data(self, response):
        item2 = MetaItem()
        item1 = AutodataItem()
        sel = Selector(response)

        item1["Last_Code_Update_Date"] = ""
        item1["Scrapping_Date"] = ""
        item1["Country"] = "Oman"
        item1["City"] = ""
        item1["Seller_Type"] = "Market Places"
        item1["Seller_Name"] = ""
        item1["Car_URL"] = response.url
        item1["Car_Name"] = ""
        item1["Year"] = ""
        item1["Make"] = ""
        item1["model"] = ""
        item1["Spec"] = ""
        item1["Doors"] = ""
        item1["transmission"] = ""
        item1["trim"] = ""
        item1["bodystyle"] = ""
        item1["other_specs_gearbox"] = ""
        item1["other_specs_seats"] = ""
        item1["other_specs_engine_size"] = ""
        item1["other_specs_horse_power"] = ""
        item1["colour_exterior"] = ""
        item1["colour_interior"] = ""
        item1["fuel_type"] = ""
        item1["import_yes_no_also_referred_to_as_GCC_spec"] = ""
        item1["mileage"] = ""
        item1["condition"] = ""
        item1["warranty_untill_when"] = ""
        item1['service_contract_untill_when'] = ''
        item1['Price_Currency'] = ''
        item1['asking_price_inc_VAT'] = ''
        item1['asking_price_ex_VAT'] = ''
        item1['warranty'] = ''
        item1['service_contract'] = ''
        item1['vat'] = 'yes'
        item1['mileage_unit'] = ''
        item1['engine_unit'] = ''
        item1['autodata_Make'] = ''
        item1['autodata_Make_id'] = ''
        item1['autodata_model'] = ''
        item1['autodata_model_id'] = ''
        item1['autodata_Spec'] = ''
        item1['autodata_Spec_id'] = ''
        item1['autodata_transmission'] = ''
        item1['autodata_transmission_id'] = ''
        item1['autodata_bodystyle'] = ''
        item1['autodata_bodystyle_id'] = ''

        item2['src'] = "olx.com.om"
        item2['ts'] = datetime.utcnow().isoformat()
        item2['name'] = "olx_om"
        item2['url'] = response.url
        item2['uid'] = str(uuid.uuid4())
        item2['cs'] = hashlib.md5(
            json.dumps(dict(item1),
                       sort_keys=True).encode('utf-8')).hexdigest()
        item1['meta'] = dict(item2)
        item1['Source'] = item2['src']
        item1['Last_Code_Update_Date'] = 'Tuesday, June 18, 2019'
        item1['Scrapping_Date'] = datetime.today().strftime('%A, %B %d, %Y')

        #det = ''.join(sel.xpath('//div[@class="lang-selector small"]/ul/li[@class="inlblk"]/a[@class="x-normal"]/@href').extract()).strip()
        dets = sel.xpath('//table[@class="item"]//tr')
        for det in dets:

            key = ''.join(det.xpath('th/text()').extract()).strip()
            value = ''.join(det.xpath('td//text()').extract()).strip()
            #print(key,value)
            if key == 'Model':
                item1["model"] = value
            elif key == 'Transmission Type':
                item1["transmission"] = value
            elif key == 'Year':
                item1["Year"] = value
            elif key == 'Color':
                item1["colour_exterior"] = value
            elif key == 'Body Type':
                item1["bodystyle"] = value
            elif key == 'Kilometers':
                item1["mileage"] = value.split(' ')[0]
            elif key == 'Warranty':
                item1['warranty'] = value
                if value == 'Does Not Apply':
                    item1['warranty'] = ''
        item1['asking_price_inc_VAT'] = ''.join(
            sel.xpath('//div[@class="pricelabel tcenter"]//text()').extract()
        ).strip().split(' ')[0]
        item1['Price_Currency'] = 'OMR'
        item1["City"] = ''.join(
            sel.xpath('//strong[@class="c2b small"]//text()').extract()).strip(
            ).split(' ')[-1]
        item1['Make'] = ''.join(
            sel.xpath('//ul[@class="clearfix"]/li[4]//span/text()').extract()
        ).strip().split(' ')[0][1:]
        item1['Car_Name'] = item1['Make'] + ' ' + item1['model']

        yield item1