Пример #1
0
def load_author(response,author):
    auths = response.xpath(author['auth'])
    for auth in auths:
        l = ItemLoader(item = AuthorItem(), response = response)
        l.default_onput_processor = TakeFirst()

        # author's first name and last name
        fn = auth.xpath(author['fn']).extract()[0]
        ln = auth.xpath(author['ln']).extract()[0]
        l.add_value('fname', fn)
        l.add_value('lname', ln)

        # author's email
        try:
            email = auth.xpath(author['email']).extract()[0][7:]
            l.add_value('email', email)
        except:
            pass

        # author's address and institution
        try:
            fid = auth.xpath(author['fid']).extract()[0][1:]
            address = l.get_xpath(author['address'] %fid)

            for i in address[0].split(', '):
                if 'niversity' in i:
                    institution = i
                    break
            l.add_value('address', address)
            l.add_value('institution', institution)
        except:
            pass

        # author's vitae
        try:
            href = auth.xpath(author['href']).extract()[0][1:]
            vitae = response.xpath(author['vitae'] %href).extract()[0]
            l.add_value('vitae', fn+' '+ln+vitae)
        except:
            pass

        # author's avatar
        try:
            href = auth.xpath(author['href']).extract()[0][1:]
            avatar = response.xpath(author['avatar'] %href).extract()[0]
            l.add_value('avatar', avatar)
        except:
            pass

        yield l