Python body_or_str примеры, scrapy.utils.response.body_or_str Python примеры использования

Пример #1

0

Показать файл

Файл: iterators.py Проект: 1012/scrapy

def csviter(obj, delimiter=None, headers=None, encoding=None):
    """ Returns an iterator of dictionaries from the given csv object

    obj can be:
    - a Response object
    - a unicode string
    - a string encoded as utf-8

    delimiter is the character used to separate field on the given obj.

    headers is an iterable that when provided offers the keys
    for the returned dictionaries, if not the first row is used.
    """
    encoding = obj.encoding if isinstance(obj, TextResponse) else encoding or 'utf-8'
    def _getrow(csv_r):
        return [str_to_unicode(field, encoding) for field in csv_r.next()]

    lines = StringIO(body_or_str(obj, unicode=False))
    if delimiter:
        csv_r = csv.reader(lines, delimiter=delimiter)
    else:
        csv_r = csv.reader(lines)

    if not headers:
        headers = _getrow(csv_r)

    while True:
        row = _getrow(csv_r)
        if len(row) != len(headers):
            log.msg(format="ignoring row %(csvlnum)d (length: %(csvrow)d, should be: %(csvheader)d)",
                    level=log.WARNING, csvlnum=csv_r.line_num, csvrow=len(row), csvheader=len(headers))
            continue
        else:
            yield dict(zip(headers, row))

Пример #2

0

Показать файл

def csviter(obj, delimiter=None, headers=None, encoding=None):
    """ Returns an iterator of dictionaries from the given csv object

    obj can be:
    - a Response object
    - a unicode string
    - a string encoded as utf-8

    delimiter is the character used to separate field on the given obj.

    headers is an iterable that when provided offers the keys
    for the returned dictionaries, if not the first row is used.
    """
    encoding = obj.encoding if isinstance(obj, TextResponse) else encoding or 'utf-8'
    def _getrow(csv_r):
        return [str_to_unicode(field, encoding) for field in csv_r.next()]

    lines = StringIO(body_or_str(obj, unicode=False))
    if delimiter:
        csv_r = csv.reader(lines, delimiter=delimiter)
    else:
        csv_r = csv.reader(lines)

    if not headers:
        headers = _getrow(csv_r)

    while True:
        row = _getrow(csv_r)
        if len(row) != len(headers):
            log.msg("ignoring row %d (length: %d, should be: %d)" % (csv_r.line_num, len(row), len(headers)), log.WARNING)
            continue
        else:
            yield dict(zip(headers, row))

Пример #3

0

Показать файл

Файл: test2.py Проект: Helen-ChenHan/AljazeeraSpider

 def parseList(self, response):
     nodename = 'loc'
     text = body_or_str(response)
     r = re.compile(r"(<%s[\s>])(.*?)(</%s>)" % (nodename, nodename),
                    re.DOTALL)
     for match in r.finditer(text):
         url = match.group(2)
         yield scrapy.Request(url, self.parse_items)

Пример #4

0

Показать файл

Файл: mytimeSpider.py Проект: digger5212/mytime

	def parse(self, response):
		nodename = 'loc'
		text = body_or_str(response)
		r = re.compile(r"(<%s[\s>])(.*?)(</%s>)" % (nodename, nodename), re.DOTALL)
		for match in r.finditer(text):
			url = match.group(2)
			if url != '':
			#if url == 'http://www.mytime.de/Haushalt/Kueche/Kuechenkleingeraete/Eismaschinen/Bestron_DHY_1705_Eismaschine-_gruen_0810951088.html' or url == 'http://www.mytime.de/Suesswaren_und_Knabbereien/Suessgebaeck/Waffeln_und_Waffelmischungen/Findeisen_Original_Waffel-Eistueten_4502020624.html' or url == 'http://www.mytime.de/Schreibwaren/Schreiben_und_Zeichnen/Fineliner/Stabilo_OHPen_universal_permanent_Folienschreiber_superfein_gruen_4510050990.html' or url =='http://www.mytime.de/Schreibwaren/Ordnen-_Archivieren_und_Organisieren/Schreibtisch-Utensilien/Casio_XR-9X1_Schriftband_9mm_Black_Ink_clear_4510050962.html' or url =='http://www.mytime.de/Schreibwaren/Schreiben_und_Zeichnen/Farbstifte_und_Filzstifte/Stabilo_Trio_Scribbi_Fasermaler_gruen_4510050924.html' or url=='http://www.mytime.de/Suesswaren_und_Knabbereien/Knabbereien/Nuesse_und_Knabbermischungen/Jeden_Tag_Cashew-Kerne_geroestet_und_gesalzen_4502020587.html' or url== 'http://www.mytime.de/Suesswaren_und_Knabbereien/Knabbereien/Nuesse_und_Knabbermischungen/Seeberger_Blanchierte_Mandeln_Honig_und_Salz_4502111669.html':
				yield Request(url, callback=self.parse_page)

Пример #5

0

Показать файл

Файл: test_utils_response.py Проект: 00gpowe/scrapy

    def test_body_or_str_encoding(self):
        self.assertTrue(isinstance(body_or_str(self.dummy_response, unicode=False), str))
        self.assertTrue(isinstance(body_or_str(self.dummy_response, unicode=True), unicode))

        self.assertTrue(isinstance(body_or_str('text', unicode=False), str))
        self.assertTrue(isinstance(body_or_str('text', unicode=True), unicode))

        self.assertTrue(isinstance(body_or_str(u'text', unicode=False), str))
        self.assertTrue(isinstance(body_or_str(u'text', unicode=True), unicode))

Пример #6

0

Показать файл

Файл: test_utils_response.py Проект: reenvs/self-summary

    def test_body_or_str_encoding(self):
        self.assertTrue(
            isinstance(body_or_str(self.dummy_response, unicode=False), str))
        self.assertTrue(
            isinstance(body_or_str(self.dummy_response, unicode=True),
                       unicode))

        self.assertTrue(isinstance(body_or_str('text', unicode=False), str))
        self.assertTrue(isinstance(body_or_str('text', unicode=True), unicode))

        self.assertTrue(isinstance(body_or_str(u'text', unicode=False), str))
        self.assertTrue(isinstance(body_or_str(u'text', unicode=True),
                                   unicode))

Пример #7

0

Показать файл

 def parse(self, response):
     nodename = 'loc'
     list = []
     text = body_or_str(response)
     r = re.compile(r"(<%s[\s>])(.*?)(</%s>)" % (nodename, nodename), re.DOTALL)
     for match in r.finditer(text):
         url = match.group(2)
         list.append(url)
     # post_nodes = response.css('tbody .floated-thumb .post-thumb a')
     # 循环整个数组,并将当前列表页每一个节点交给scrapy
     for post_node in list:
         # 解析每一个节点上的url
         if re.search('breed',post_node):
             post_url = post_node
         # yield 关键字会将后面的网址自动交个scrapy进行下载
         # urljoin方法会将url拼接成完整的地址
         # callback回调函数将给scrapy下载目标网址操作执行完成之后执行的方法
         # 并将列表页取到的img地址,通过meat属性进行传递,交给parse_detail函数
             yield Request(url=post_url, callback=self.parse_detail,  dont_filter=True)

Пример #8

0

Показать файл

Файл: iterators.py Проект: myli-cn/scrapy

def xmliter(obj, nodename):
    """Return a iterator of XPathSelector's over all nodes of a XML document,
       given tha name of the node to iterate. Useful for parsing XML feeds.

    obj can be:
    - a Response object
    - a unicode string
    - a string encoded as utf-8
    """
    HEADER_START_RE = re.compile(r"^(.*?)<\s*%s(?:\s|>)" % nodename, re.S)
    HEADER_END_RE = re.compile(r"<\s*/%s\s*>" % nodename, re.S)
    text = body_or_str(obj)

    header_start = re.search(HEADER_START_RE, text)
    header_start = header_start.group(1).strip() if header_start else ""
    header_end = re_rsearch(HEADER_END_RE, text)
    header_end = text[header_end[1] :].strip() if header_end else ""

    r = re.compile(r"<%s[\s>].*?</%s>" % (nodename, nodename), re.DOTALL)
    for match in r.finditer(text):
        nodetext = header_start + match.group() + header_end
        yield XmlXPathSelector(text=nodetext).select("//" + nodename)[0]

Пример #9

0

Показать файл

Файл: iterators.py Проект: reprior123/TraderSoftwareRP

def xmliter(obj, nodename):
    """Return a iterator of Selector's over all nodes of a XML document,
       given tha name of the node to iterate. Useful for parsing XML feeds.

    obj can be:
    - a Response object
    - a unicode string
    - a string encoded as utf-8
    """
    HEADER_START_RE = re.compile(r'^(.*?)<\s*%s(?:\s|>)' % nodename, re.S)
    HEADER_END_RE = re.compile(r'<\s*/%s\s*>' % nodename, re.S)
    text = body_or_str(obj)

    header_start = re.search(HEADER_START_RE, text)
    header_start = header_start.group(1).strip() if header_start else ''
    header_end = re_rsearch(HEADER_END_RE, text)
    header_end = text[header_end[1]:].strip() if header_end else ''

    r = re.compile(r"<%s[\s>].*?</%s>" % (nodename, nodename), re.DOTALL)
    for match in r.finditer(text):
        nodetext = header_start + match.group() + header_end
        yield Selector(text=nodetext, type='xml').xpath('//' + nodename)[0]

Пример #10

0

Показать файл

Файл: test_utils_response.py Проект: 00gpowe/scrapy

 def test_body_or_str_extraction(self):
     self.assertEqual(body_or_str(self.dummy_response), 'dummy_response')
     self.assertEqual(body_or_str('text'), 'text')

Пример #11

0

Показать файл

Файл: test_utils_response.py Проект: 00gpowe/scrapy

 def test_body_or_str_input(self):
     self.assertTrue(isinstance(body_or_str(self.dummy_response), basestring))
     self.assertTrue(isinstance(body_or_str('text'), basestring))
     self.assertRaises(Exception, body_or_str, 2)

Пример #12

0

Показать файл

Файл: test_utils_response.py Проект: reenvs/self-summary

 def test_body_or_str_extraction(self):
     self.assertEqual(body_or_str(self.dummy_response), 'dummy_response')
     self.assertEqual(body_or_str('text'), 'text')

Пример #13

0

Показать файл

Файл: test_utils_response.py Проект: reenvs/self-summary

 def test_body_or_str_input(self):
     self.assertTrue(
         isinstance(body_or_str(self.dummy_response), basestring))
     self.assertTrue(isinstance(body_or_str('text'), basestring))
     self.assertRaises(Exception, body_or_str, 2)

Пример #14

0

Показать файл

Файл: test_utils_response.py Проект: radjosh/scrapy

 def test_body_or_str_extraction(self):
     self.assertEqual(body_or_str(self.dummy_response), "dummy_response")
     self.assertEqual(body_or_str("text"), "text")

Python body_or_str примеры использования