Python ItemLoader.strip примеры использования

Язык программирования: Python

Пространство имен/Пакет: scrapy.loader

Класс/Тип: ItemLoader

Метод/Функция: strip

Примеров на hotexamples.com: 1

Python ItemLoader.strip - 1 пример найден. Это лучшие примеры Python кода для scrapy.loader.ItemLoader.strip, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ItemLoader(30)

add_xpath(30)

load_item(30)

get_xpath(30)

default_output_processor(30)

default_input_processor(30)

get_collected_values(30)

add_css(30)

add_value(30)

replace_value(28)

get_output_value(28)

nested_css(14)

nested_xpath(11)

_add_value(8)

get_css(6)

selector(6)

__init__(6)

get_value(4)

items(2)

values(2)

price_in(2)

number_of_reviews_in(1)

strip(1)

add_xpath_string(1)

address_out(1)

replace_css(1)

replace(1)

originCity_in(1)

features_in(1)

TakeFirst(1)

ad_value(1)

load_items(1)

default_onput_processor(1)

default_ouput_processor(1)

_local_item(1)

defualt_output_processor(1)

destinationCity_in(1)

deafult_input_processor(1)

Пример #1

Показать файл

Файл: lists_of_cities.py Проект: igorkruglyak/place_to_live

    def parse_item(self, response):
        """Scrape data from the country' page."""
        i = ItemLoader(item=CityItem(), response=response)

        country = response.xpath("//span[@itemprop='name']/text()").extract()[-1]
        i.add_value("country", country)

        quality_list = response.css(".table_indices ::text").extract()
        quality_of_life_index = [i.strip() for i in quality_list][-3]
        i.add_value("quality_of_life_index", quality_of_life_index)

        purchasing_power_index = response.xpath(
            "//a[contains(text(), "
            "'Purchasing Power Index')]/parent::td/following-sibling::td/text()"
        ).extract_first()
        i.add_value("purchasing_power_index", purchasing_power_index)

        safety_index = response.xpath(
            "//a[contains(text(), "
            "'Safety Index')]/parent::td/following-sibling::td/text()"
        ).extract_first()
        i.add_value("safety_index", safety_index)

        health_care_index = response.xpath(
            "//a[contains(text(), "
            "'Health Care Index')]/parent::td/following-sibling::td/text()"
        ).extract_first()
        i.add_value("health_care_index", health_care_index)

        climate_index = response.xpath(
            "//a[contains(text(), "
            "'Climate Index')]/parent::td/following-sibling::td/text()"
        ).extract_first()
        i.add_value("climate_index", climate_index)

        cost_of_living_index = response.xpath(
            "//a[contains(text(), "
            "'Cost of Living Index')]/parent::td/following-sibling::td/text()"
        ).extract_first()
        i.add_value("cost_of_living_index", cost_of_living_index)

        property_price_to_income_ratio = response.xpath(
            "//a[contains(text(), "
            "'Property Price to Income Ratio')]/parent::td/following-sibling::td/text()"
        ).extract_first()
        i.add_value("property_price_to_income_ratio", property_price_to_income_ratio)

        traffic_commute_time_index = response.xpath(
            "//a[contains(text(), "
            "'Traffic Commute Time Index')]/parent::td/following-sibling::td/text()"
        ).extract_first()
        i.add_value("traffic_commute_time_index", traffic_commute_time_index)

        pollution_index = response.xpath(
            "//a[contains(text(), "
            "'Pollution Index')]/parent::td/following-sibling::td/text()"
        ).extract_first()
        i.add_value("pollution_index", pollution_index)

        try:
            base_url = "https://freedomhouse.org/report/freedom-world/2018/{}"
            res = requests.get(base_url.format(country))
            soup_text = BeautifulSoup(res.text, "lxml").text
            regex = r"Aggregate Score:(.{0,9})"
            reg = re.compile(regex)
            sc_dirty = reg.search(soup_text).group(1)
            score = "".join([s for s in sc_dirty.split()[0] if s.isdigit()])
            score = float(score)
        except AttributeError:
            score = None

        i.add_value("freedomhouse_score", score)
        yield i.load_item()