def process_item(self, item, spider): if not spider.name in ['politician', 'plinks']: return item try: legislative = Legislative.objects.get(roman_code=item['legislative_id']) first_name = item['first_name'].title().strip() last_name = item['last_name'].title().strip() try: politician = Politician.objects.get(politician_id=item['politician_id']) except Politician.DoesNotExist: politician = Politician( politician_id=item['politician_id'], first_name=first_name, last_name=last_name, email=item.get('email', ''), profile_url=item['profile_url'], profile_id=item['profile_id'][2:]) if item['photo_url']: filename = item['photo_url'].split('/')[-1] img_temp = NamedTemporaryFile(delete=True) img_temp.write(urllib2.urlopen(item['photo_url']).read()) img_temp.flush() politician.photo.save(filename, File(img_temp)) politician.save() party_name = item['party'].title().strip() party_code = ''.join([list(word)[0] for word in party_name.split(' ') if word][1:]) party, party_created = Party.objects.get_or_create(name=party_name, code=party_code) try: leg_pol = LegislativePolitician.objects.get(legislative=legislative, politician=politician, party=party) except LegislativePolitician.DoesNotExist: leg_pol = LegislativePolitician(date=legislative.start_date, legislative=legislative, politician=politician, party=party) try: house = House.objects.get(rol_name__icontains=item['role'].split()[0]) except House.DoesNotExist: house = House(name=item['role'], rol_name=item['role'].split()[0]) house.save() leg_pol.house = house leg_pol.state = item.get('state', '').title() leg_pol.save() print u'%s - Perfil #%s: %s' % (legislative.code, politician.pk, politician) except Exception, e: print item print e