def get_votesmart_id(self, candidates, name, state, district, seat): ''' attempt to determine the votesmart_id of this legislator, or return None. ''' # narrow down by district (if approppriate?) #print "{0} {1} {2} {3}".format(name, state, district, seat) possibilities = [ x for x in candidates if x.electionDistrictName in [str(district), 'At-Large'] ] name_obj = PoliticianNameCleaver(name).parse() if isinstance(name_obj, RunningMatesNames): name_obj = name_obj.mates()[0] # just use the governor, not lt. governor (this is the only case where it's a list) name_possibilities = [ x for x in possibilities if \ (x.lastName.lower() == name_obj.last.lower() \ or (name_obj.middle \ and ( \ x.lastName.lower() == ' '.join([name_obj.middle.lower(), name_obj.last.lower()]) \ or x.lastName.lower() == '-'.join([name_obj.middle.lower(), name_obj.last.lower()]) \ )\ ) \ ) and name_obj.first.lower() in [ x.firstName.lower(), x.preferredName.lower(), x.nickName.lower() ] \ #and x.electionStatus == 'Running' ] if len(name_possibilities) == 1: cand = name_possibilities[0] #if cand.electionStatus == 'Running': return cand.candidateId #else: # return None elif len(name_possibilities) > 1: self.too_many.append([(name_obj.first, name_obj.middle, name_obj.last), [ (x.firstName, x.preferredName, x.nickName, x.lastName) for x in possibilities ]]) else: self.no_match.append([(name_obj.first, name_obj.middle, name_obj.last), [ (x.firstName, x.preferredName, x.nickName, x.lastName) for x in possibilities ]])
def handle(self, *args, **options): self.log.info("Starting...") #candidates = self.get_all_congressional_candidates() #self.candidates = self.filter_candidates(candidates) cursor = connection.cursor() # get count cursor.execute("select count(*) from politician_metadata_latest_cycle_view") total = cursor.fetchone() transaction.rollback() select_sql = """ select entity_id, name, state, district, seat, cycle from politician_metadata_latest_cycle_view m inner join matchbox_entity e on e.id = m.entity_id where entity_id not in (select entity_id from matchbox_votesmartinfo) and seat in ('state:governor', 'federal:house', 'federal:senate') order by entity_id """ self.log.debug(select_sql) cursor.execute(select_sql) politicians = cursor.fetchall() transaction.rollback() self.log.info("{0} federal politicians located to find VoteSmart ids for".format(len(politicians))) # Reset existing data #cursor.execute("delete from matchbox_votesmartinfo") for (entity_id, name, state, district, seat, cycle) in politicians: name_obj = PoliticianNameCleaver(name).parse() if isinstance(name_obj, RunningMatesNames): for mate in name_obj.mates(): try: self.process_politician(cursor, entity_id, name, state, district, seat, cycle, mate) except django.db.utils.IntegrityError: continue else: self.process_politician(cursor, entity_id, name, state, district, seat, cycle, name_obj) self.log.info("Done.") self.log.info("Names with too many matches:") too_many_file = open("/home/akr/work/datacommons/too_many_matches.txt", "w") too_many_file.write(self.pp.pformat(self.too_many)) too_many_file.close() self.log.info("Names with no matches:") no_match_file = open("/home/akr/work/datacommons/no_match.txt", "w") no_match_file.write(self.pp.pformat(self.no_match)) no_match_file.close()