示例#1
0
    def process_item(self, item, spider):
        if not spider.name in ['politician', 'plinks']:
            return item
        try:
            legislative = Legislative.objects.get(roman_code=item['legislative_id'])

            first_name = item['first_name'].title().strip()
            last_name = item['last_name'].title().strip()
            try:
                politician = Politician.objects.get(politician_id=item['politician_id'])
            except Politician.DoesNotExist:
                politician = Politician(
                            politician_id=item['politician_id'], first_name=first_name, last_name=last_name,
                            email=item.get('email', ''), profile_url=item['profile_url'], profile_id=item['profile_id'][2:])

                if item['photo_url']:
                    filename = item['photo_url'].split('/')[-1]

                    img_temp = NamedTemporaryFile(delete=True)
                    img_temp.write(urllib2.urlopen(item['photo_url']).read())
                    img_temp.flush()
                    politician.photo.save(filename, File(img_temp))

                politician.save()

            party_name = item['party'].title().strip()
            party_code = ''.join([list(word)[0] for word in party_name.split(' ') if word][1:])
            party, party_created = Party.objects.get_or_create(name=party_name, code=party_code)

            try:
                leg_pol = LegislativePolitician.objects.get(legislative=legislative, politician=politician, party=party)
            except LegislativePolitician.DoesNotExist:
                leg_pol = LegislativePolitician(date=legislative.start_date, legislative=legislative, politician=politician, party=party)

            try:
                house = House.objects.get(rol_name__icontains=item['role'].split()[0])
            except House.DoesNotExist:
                house = House(name=item['role'], rol_name=item['role'].split()[0])
                house.save()

            leg_pol.house = house
            leg_pol.state = item.get('state', '').title()
            leg_pol.save()
            print u'%s - Perfil #%s: %s' % (legislative.code, politician.pk, politician)
        except Exception, e:
            print item
            print e