def handle(self, **options): for collection in ('organization', 'person'): api_collection = getattr(self.api, collection + 's') message = "{titled} {base_url}{plural}/{id}" for item in popit_unwrap_pagination(api_collection, embed='', per_page=100): print message.format(titled=collection.title(), base_url=get_base_url(), plural=(collection + "s"), id=item['id']) for image in item.get('images', []): print " Image with URL:", image['url'] fix_image(image) # Some images have an empty 'created' field, which # causes an Elasticsearch indexing error, so change it # to null if that's the case: if not image.get('created'): image['created'] = None fix_dates(item) try: api_collection(item['id']).put(item) except HttpClientError as e: print "HttpClientError", e.content sys.exit(1) # If this is a person, make sure that the # corresponding cache entries are invalidated: if collection == 'person': person = PopItPerson.create_from_dict(item) person.invalidate_cache_entries()
def handle(self, **options): all_people = [] for person_dict in popit_unwrap_pagination( self.api.persons, embed="membership.organization", per_page=100, ): if person_dict.get('standing_in') \ and person_dict['standing_in'].get(options['year']): person = PopItPerson.create_from_dict(person_dict) all_people.append(person.as_dict(year=options['year'])) csv = list_to_csv(all_people) # Write to stdout if no output filename is specified, or if it # is '-' if options['output_filename'] in (None, '-'): with sys.stdout as f: f.write(csv) else: # Otherwise write to a temporary file and atomically # rename into place: ntf = NamedTemporaryFile(delete=False, dir=dirname(options['output_filename'])) ntf.write(csv) chmod(ntf.name, 0o644) rename(ntf.name, options['output_filename'])
def handle(self, **options): all_people = [] for person_dict in popit_unwrap_pagination( self.api.persons, embed="membership.organization", per_page=100, ): if person_dict.get('standing_in') \ and person_dict['standing_in'].get(options['year']): person = PopItPerson.create_from_dict(person_dict) all_people.append(person.as_dict(year=options['year'])) csv = list_to_csv(all_people) # Write to stdout if no output filename is specified, or if it # is '-' if options['output_filename'] in (None, '-'): with sys.stdout as f: f.write(csv) else: # Otherwise write to a temporary file and atomically # rename into place: ntf = NamedTemporaryFile( delete=False, dir=dirname(options['output_filename']) ) ntf.write(csv) chmod(ntf.name, 0o644) rename(ntf.name, options['output_filename'])
def get_existing_popit_person(vi_person_id): from candidates.models import PopItPerson from candidates.popit import get_search_url # See if this person already exists by searching for the # ID they were imported with: query_format = \ 'identifiers.identifier:"{id}" AND ' + \ 'identifiers.scheme:"{scheme}"' search_url = get_search_url( 'persons', query_format.format( id=vi_person_id, scheme='import-id' ), embed='membership.organization' ) results = requests.get(search_url).json() total = results['total'] if total > 1: message = "Multiple matches for CI ID {0}" raise Exception(message.format(vi_person_id)) if total == 0: return None # Otherwise there was exactly one result: return PopItPerson.create_from_dict(results['result'][0])
def handle(self, **options): for collection in ('organization', 'person'): api_collection = getattr(self.api, collection + 's') message = "{titled} {base_url}{plural}/{id}" for item in popit_unwrap_pagination( api_collection, embed='', per_page=100 ): print message.format( titled=collection.title(), base_url=get_base_url(), plural=(collection + "s"), id=item['id'] ) for image in item.get('images', []): print " Image with URL:", image['url'] fix_image(image) # Some images have an empty 'created' field, which # causes an Elasticsearch indexing error, so change it # to null if that's the case: if not image.get('created'): image['created'] = None fix_dates(item) try: api_collection(item['id']).put(item) except HttpClientError as e: print "HttpClientError", e.content sys.exit(1) # If this is a person, make sure that the # corresponding cache entries are invalidated: if collection == 'person': person = PopItPerson.create_from_dict(item) person.invalidate_cache_entries()
def handle(self, **options): for person_data in popit_unwrap_pagination( self.api.persons, embed='', per_page=100 ): msg = "Person {0}persons/{1}" print msg.format(get_base_url(), person_data['id']) strip_bogus_fields( person_data, [ 'founding_date', 'dissolution_date', 'start_date', 'end_date' ] ) for image in person_data.get('images', []): strip_bogus_fields( image, [ 'birth_date', 'death_date', 'founding_date', 'dissolution_date', 'start_date', 'end_date' ] ) person = PopItPerson.create_from_dict(person_data) person.save_to_popit(self.api) person.invalidate_cache_entries()
def parse_data(self, json_file): with open(json_file) as f: for ec_party in json.load(f): ec_party_id = ec_party['ECRef'].strip() # We're only interested in political parties: if not ec_party_id.startswith('PP'): continue party_id = self.clean_id(ec_party_id) if ec_party['RegulatedEntityTypeName'] == 'Minor Party': register = ec_party['RegisterNameMinorParty'].replace( ' (minor party)', '') else: register = ec_party['RegisterName'] party_name, party_dissolved = self.clean_name( ec_party['RegulatedEntityName']) party_founded = self.clean_date(ec_party['ApprovedDate']) party_data = { 'id': party_id, 'name': party_name, 'slug': slugify(party_name), 'classification': 'Party', 'descriptions': get_descriptions(ec_party), 'founding_date': party_founded, 'dissolution_date': party_dissolved, 'register': register, 'identifiers': [{ 'identifier': ec_party_id, 'scheme': 'electoral-commission', }] } try: self.api.organizations.post(party_data) self.upload_images(ec_party['PartyEmblems'], party_id) except HttpServerError as e: if 'E11000' in e.content: # Duplicate Party Found self.api.organizations(party_id).put(party_data) self.upload_images(ec_party['PartyEmblems'], party_id) else: raise organization_with_memberships = \ self.api.organizations(party_id).get(embed='membership.person')['result'] # Make sure any members of these parties are # invalidated from the cache so that the embedded # party information when getting posts and persons is # up-to-date: for membership in organization_with_memberships.get( 'memberships', []): person = PopItPerson.create_from_dict( membership['person_id']) person.invalidate_cache_entries()
def parse_data(self, json_file): with open(json_file) as f: for ec_party in json.load(f): ec_party_id = ec_party['ECRef'].strip() # We're only interested in political parties: if not ec_party_id.startswith('PP'): continue party_id = self.clean_id(ec_party_id) if ec_party['RegulatedEntityTypeName'] == 'Minor Party': register = ec_party['RegisterNameMinorParty'].replace( ' (minor party)', '' ) else: register = ec_party['RegisterName'] party_name, party_dissolved = self.clean_name(ec_party['RegulatedEntityName']) party_founded = self.clean_date(ec_party['ApprovedDate']) party_data = { 'id': party_id, 'name': party_name, 'slug': slugify(party_name), 'classification': 'Party', 'descriptions': get_descriptions(ec_party), 'founding_date': party_founded, 'dissolution_date': party_dissolved, 'register': register, 'identifiers': [ { 'identifier': ec_party_id, 'scheme': 'electoral-commission', } ] } try: self.api.organizations.post(party_data) self.upload_images(ec_party['PartyEmblems'], party_id) except HttpServerError as e: if 'E11000' in e.content: # Duplicate Party Found self.api.organizations(party_id).put(party_data) self.upload_images(ec_party['PartyEmblems'], party_id) else: raise organization_with_memberships = \ self.api.organizations(party_id).get(embed='membership.person')['result'] # Make sure any members of these parties are # invalidated from the cache so that the embedded # party information when getting posts and persons is # up-to-date: for membership in organization_with_memberships.get( 'memberships', [] ): person = PopItPerson.create_from_dict(membership['person_id']) person.invalidate_cache_entries()
def handle(self, **options): for o in popit_unwrap_pagination(self.api.organizations, per_page=100, embed='membership.person'): if o['classification'] != 'Party': continue print o['name'] for image in o.get('images', []): print " DELETE", image['_id'] self.api.organizations(o['id']).image(image['_id']).delete() # The person pages get party images via the # membership.organization embed, so invalidate the cache # entries for any person who's a member of this party: for membership in o.get('memberships', []): person = PopItPerson.create_from_dict(membership['person_id']) person.invalidate_cache_entries()
def handle(self, *args, **options): from candidates.models import PopItPerson from candidates.popit import create_popit_api_object self.verbosity = int(options.get('verbosity', 1)) api = create_popit_api_object() if len(args) != 1: raise CommandError("You must provide a person.js URL") person_js_url = args[0] people_data = requests.get(person_js_url).json() for person_data in people_data['persons']: twfy_person = PopItPerson.create_from_dict(person_data) ynmp_id = twfy_person.get_identifier('yournextmp') if not ynmp_id: continue parlparse_id = twfy_person.id ynmp_person = PopItPerson.create_from_popit(api, ynmp_id) existing_parlparse_id = ynmp_person.get_identifier('uk.org.publicwhip') if existing_parlparse_id: if existing_parlparse_id == parlparse_id: # That's fine, there's already the right parlparse ID pass else: # Otherwise there's a mismatch, which needs investigation msg = "Warning: parlparse ID mismatch between YNMP {0} " msg += "and TWFY {1} for YNMP person {2}\n" self.stderr.write( msg.format( existing_parlparse_id, parlparse_id, ynmp_id, ) ) continue msg = "Updating the YourNextMP person {0} with parlparse_id {1}\n" self.stdout.write(msg.format(ynmp_id, parlparse_id)) ynmp_person.set_identifier( 'uk.org.publicwhip', parlparse_id, ) change_metadata = get_change_metadata( None, "Fetched a new parlparse ID" ) ynmp_person.record_version(change_metadata) ynmp_person.save_to_popit(api) ynmp_person.invalidate_cache_entries()
def handle(self, *args, **options): from candidates.models import PopItPerson from candidates.popit import create_popit_api_object self.verbosity = int(options.get('verbosity', 1)) api = create_popit_api_object() if len(args) != 1: raise CommandError("You must provide a person.js URL") person_js_url = args[0] people_data = requests.get(person_js_url).json() for person_data in people_data['persons']: twfy_person = PopItPerson.create_from_dict(person_data) ynmp_id = twfy_person.get_identifier('yournextmp') if not ynmp_id: continue parlparse_id = twfy_person.id ynmp_person = PopItPerson.create_from_popit(api, ynmp_id) existing_parlparse_id = ynmp_person.get_identifier( 'uk.org.publicwhip') if existing_parlparse_id: if existing_parlparse_id == parlparse_id: # That's fine, there's already the right parlparse ID pass else: # Otherwise there's a mismatch, which needs investigation msg = "Warning: parlparse ID mismatch between YNMP {0} " msg += "and TWFY {1} for YNMP person {2}\n" self.stderr.write( msg.format( existing_parlparse_id, parlparse_id, ynmp_id, )) continue msg = "Updating the YourNextMP person {0} with parlparse_id {1}\n" self.stdout.write(msg.format(ynmp_id, parlparse_id)) ynmp_person.set_identifier( 'uk.org.publicwhip', parlparse_id, ) change_metadata = get_change_metadata( None, "Fetched a new parlparse ID") ynmp_person.record_version(change_metadata) ynmp_person.save_to_popit(api) ynmp_person.invalidate_cache_entries()
def handle(self, **options): for o in popit_unwrap_pagination( self.api.organizations, per_page=100, embed='membership.person' ): if o['classification'] != 'Party': continue print o['name'] for image in o.get('images', []): print " DELETE", image['_id'] self.api.organizations(o['id']).image(image['_id']).delete() # The person pages get party images via the # membership.organization embed, so invalidate the cache # entries for any person who's a member of this party: for membership in o.get('memberships', []): person = PopItPerson.create_from_dict(membership['person_id']) person.invalidate_cache_entries()
def get_existing_popit_person(vi_person_id): # See if this person already exists by searching for the # ID they were imported with: query_format = \ 'identifiers.identifier:"{id}" AND ' + \ 'identifiers.scheme:"{scheme}"' search_url = get_search_url('persons', query_format.format(id=vi_person_id, scheme='import-id'), embed='membership.organization') results = requests.get(search_url).json() total = results['total'] if total > 1: message = "Multiple matches for CI ID {0}" raise Exception(message.format(vi_person_id)) if total == 0: return None # Otherwise there was exactly one result: return PopItPerson.create_from_dict(results['result'][0])
def handle(self, **options): for person_data in popit_unwrap_pagination( self.api.persons, embed='', per_page=100 ): needs_update = False for version in person_data.get('versions', []): data = version['data'] if data.get('last_party'): needs_update = True msg = "Fixing person {0}persons/{1}" print msg.format(get_base_url(), person_data['id']) del data['last_party'] if not needs_update: continue person = PopItPerson.create_from_dict(person_data) person.save_to_popit(self.api) person.invalidate_cache_entries()
def handle(self, *args, **options): if len(args) != 1: msg = "You must supply the prefix for output filenames" raise CommandError(msg) output_prefix = args[0] all_people = [] election_to_people = defaultdict(list) for person_dict in popit_unwrap_pagination( self.api.persons, embed="membership.organization", per_page=100, ): standing_in = person_dict.get('standing_in') if not standing_in: continue for election in standing_in.keys(): if not standing_in[election]: continue person = PopItPerson.create_from_dict(person_dict) person_as_csv_dict = person.as_dict(election=election) all_people.append(person_as_csv_dict) election_to_people[election].append(person_as_csv_dict) elections = election_to_people.keys() + [None] for election in elections: if election is None: output_filename = output_prefix + '-all.csv' people_data = all_people else: output_filename = output_prefix + '-' + election + '.csv' people_data = election_to_people[election] csv = list_to_csv(people_data) # Otherwise write to a temporary file and atomically # rename into place: ntf = NamedTemporaryFile( delete=False, dir=dirname(output_filename) ) ntf.write(csv) chmod(ntf.name, 0o644) rename(ntf.name, output_filename)
def test_age_full_obvious(self, mock_date): mock_date.today.return_value = date(1977, 9, 3) mock_date.side_effect = \ lambda *args, **kwargs: date(*args, **kwargs) p = PopItPerson.create_from_dict({'birth_date': '1976-09-01'}) self.assertEqual(p.age, '1')
def test_age_month_early_in_year(self, mock_date): mock_date.today.return_value = date(1977, 8, 15) mock_date.side_effect = \ lambda *args, **kwargs: date(*args, **kwargs) p = PopItPerson.create_from_dict({'birth_date': '1976-09'}) self.assertEqual(p.age, '0')
def test_age_year_ambiguous(self, mock_date): mock_date.today.return_value = date(1977, 9, 10) mock_date.side_effect = \ lambda *args, **kwargs: date(*args, **kwargs) p = PopItPerson.create_from_dict({'birth_date': '1975'}) self.assertEqual(p.age, '1 or 2')