def handle(self, **options):
     for collection in ('organization', 'person'):
         api_collection = getattr(self.api, collection + 's')
         message = "{titled} {base_url}{plural}/{id}"
         for item in popit_unwrap_pagination(api_collection,
                                             embed='',
                                             per_page=100):
             print message.format(titled=collection.title(),
                                  base_url=get_base_url(),
                                  plural=(collection + "s"),
                                  id=item['id'])
             for image in item.get('images', []):
                 print "  Image with URL:", image['url']
                 fix_image(image)
                 # Some images have an empty 'created' field, which
                 # causes an Elasticsearch indexing error, so change it
                 # to null if that's the case:
                 if not image.get('created'):
                     image['created'] = None
             fix_dates(item)
             try:
                 api_collection(item['id']).put(item)
             except HttpClientError as e:
                 print "HttpClientError", e.content
                 sys.exit(1)
             # If this is a person, make sure that the
             # corresponding cache entries are invalidated:
             if collection == 'person':
                 person = PopItPerson.create_from_dict(item)
                 person.invalidate_cache_entries()
 def handle(self, **options):
     all_people = []
     for person_dict in popit_unwrap_pagination(
             self.api.persons,
             embed="membership.organization",
             per_page=100,
     ):
         if person_dict.get('standing_in') \
             and person_dict['standing_in'].get(options['year']):
             person = PopItPerson.create_from_dict(person_dict)
             all_people.append(person.as_dict(year=options['year']))
     csv = list_to_csv(all_people)
     # Write to stdout if no output filename is specified, or if it
     # is '-'
     if options['output_filename'] in (None, '-'):
         with sys.stdout as f:
             f.write(csv)
     else:
         # Otherwise write to a temporary file and atomically
         # rename into place:
         ntf = NamedTemporaryFile(delete=False,
                                  dir=dirname(options['output_filename']))
         ntf.write(csv)
         chmod(ntf.name, 0o644)
         rename(ntf.name, options['output_filename'])
 def handle(self, **options):
     all_people = []
     for person_dict in popit_unwrap_pagination(
             self.api.persons,
             embed="membership.organization",
             per_page=100,
     ):
         if person_dict.get('standing_in') \
             and person_dict['standing_in'].get(options['year']):
             person = PopItPerson.create_from_dict(person_dict)
             all_people.append(person.as_dict(year=options['year']))
     csv = list_to_csv(all_people)
     # Write to stdout if no output filename is specified, or if it
     # is '-'
     if options['output_filename'] in (None, '-'):
         with sys.stdout as f:
             f.write(csv)
     else:
         # Otherwise write to a temporary file and atomically
         # rename into place:
         ntf = NamedTemporaryFile(
             delete=False,
             dir=dirname(options['output_filename'])
         )
         ntf.write(csv)
         chmod(ntf.name, 0o644)
         rename(ntf.name, options['output_filename'])
def get_existing_popit_person(vi_person_id):
    from candidates.models import PopItPerson
    from candidates.popit import get_search_url
    # See if this person already exists by searching for the
    # ID they were imported with:
    query_format = \
        'identifiers.identifier:"{id}" AND ' + \
        'identifiers.scheme:"{scheme}"'
    search_url = get_search_url(
        'persons',
        query_format.format(
            id=vi_person_id, scheme='import-id'
        ),
        embed='membership.organization'
    )
    results = requests.get(search_url).json()

    total = results['total']
    if total > 1:
        message = "Multiple matches for CI ID {0}"
        raise Exception(message.format(vi_person_id))
    if total == 0:
        return None
    # Otherwise there was exactly one result:
    return PopItPerson.create_from_dict(results['result'][0])
 def handle(self, **options):
     for collection in ('organization', 'person'):
         api_collection = getattr(self.api, collection + 's')
         message = "{titled} {base_url}{plural}/{id}"
         for item in popit_unwrap_pagination(
                 api_collection,
                 embed='',
                 per_page=100
         ):
             print message.format(
                 titled=collection.title(),
                 base_url=get_base_url(),
                 plural=(collection + "s"),
                 id=item['id']
             )
             for image in item.get('images', []):
                 print "  Image with URL:", image['url']
                 fix_image(image)
                 # Some images have an empty 'created' field, which
                 # causes an Elasticsearch indexing error, so change it
                 # to null if that's the case:
                 if not image.get('created'):
                     image['created'] = None
             fix_dates(item)
             try:
                 api_collection(item['id']).put(item)
             except HttpClientError as e:
                 print "HttpClientError", e.content
                 sys.exit(1)
             # If this is a person, make sure that the
             # corresponding cache entries are invalidated:
             if collection == 'person':
                 person = PopItPerson.create_from_dict(item)
                 person.invalidate_cache_entries()
示例#6
0
 def handle(self, **options):
     for person_data in popit_unwrap_pagination(
             self.api.persons,
             embed='',
             per_page=100
     ):
         msg = "Person {0}persons/{1}"
         print msg.format(get_base_url(), person_data['id'])
         strip_bogus_fields(
             person_data,
             [
                 'founding_date',
                 'dissolution_date',
                 'start_date',
                 'end_date'
             ]
         )
         for image in person_data.get('images', []):
             strip_bogus_fields(
                 image,
                 [
                     'birth_date',
                     'death_date',
                     'founding_date',
                     'dissolution_date',
                     'start_date',
                     'end_date'
                 ]
             )
         person = PopItPerson.create_from_dict(person_data)
         person.save_to_popit(self.api)
         person.invalidate_cache_entries()
 def parse_data(self, json_file):
     with open(json_file) as f:
         for ec_party in json.load(f):
             ec_party_id = ec_party['ECRef'].strip()
             # We're only interested in political parties:
             if not ec_party_id.startswith('PP'):
                 continue
             party_id = self.clean_id(ec_party_id)
             if ec_party['RegulatedEntityTypeName'] == 'Minor Party':
                 register = ec_party['RegisterNameMinorParty'].replace(
                     ' (minor party)', '')
             else:
                 register = ec_party['RegisterName']
             party_name, party_dissolved = self.clean_name(
                 ec_party['RegulatedEntityName'])
             party_founded = self.clean_date(ec_party['ApprovedDate'])
             party_data = {
                 'id':
                 party_id,
                 'name':
                 party_name,
                 'slug':
                 slugify(party_name),
                 'classification':
                 'Party',
                 'descriptions':
                 get_descriptions(ec_party),
                 'founding_date':
                 party_founded,
                 'dissolution_date':
                 party_dissolved,
                 'register':
                 register,
                 'identifiers': [{
                     'identifier': ec_party_id,
                     'scheme': 'electoral-commission',
                 }]
             }
             try:
                 self.api.organizations.post(party_data)
                 self.upload_images(ec_party['PartyEmblems'], party_id)
             except HttpServerError as e:
                 if 'E11000' in e.content:
                     # Duplicate Party Found
                     self.api.organizations(party_id).put(party_data)
                     self.upload_images(ec_party['PartyEmblems'], party_id)
                 else:
                     raise
             organization_with_memberships = \
                 self.api.organizations(party_id).get(embed='membership.person')['result']
             # Make sure any members of these parties are
             # invalidated from the cache so that the embedded
             # party information when getting posts and persons is
             # up-to-date:
             for membership in organization_with_memberships.get(
                     'memberships', []):
                 person = PopItPerson.create_from_dict(
                     membership['person_id'])
                 person.invalidate_cache_entries()
 def parse_data(self, json_file):
     with open(json_file) as f:
         for ec_party in json.load(f):
             ec_party_id = ec_party['ECRef'].strip()
             # We're only interested in political parties:
             if not ec_party_id.startswith('PP'):
                 continue
             party_id = self.clean_id(ec_party_id)
             if ec_party['RegulatedEntityTypeName'] == 'Minor Party':
                 register = ec_party['RegisterNameMinorParty'].replace(
                     ' (minor party)', ''
                 )
             else:
                 register = ec_party['RegisterName']
             party_name, party_dissolved = self.clean_name(ec_party['RegulatedEntityName'])
             party_founded = self.clean_date(ec_party['ApprovedDate'])
             party_data = {
                 'id': party_id,
                 'name': party_name,
                 'slug': slugify(party_name),
                 'classification': 'Party',
                 'descriptions': get_descriptions(ec_party),
                 'founding_date': party_founded,
                 'dissolution_date': party_dissolved,
                 'register': register,
                 'identifiers': [
                     {
                         'identifier': ec_party_id,
                         'scheme': 'electoral-commission',
                     }
                 ]
             }
             try:
                 self.api.organizations.post(party_data)
                 self.upload_images(ec_party['PartyEmblems'], party_id)
             except HttpServerError as e:
                 if 'E11000' in e.content:
                     # Duplicate Party Found
                     self.api.organizations(party_id).put(party_data)
                     self.upload_images(ec_party['PartyEmblems'], party_id)
                 else:
                     raise
             organization_with_memberships = \
                 self.api.organizations(party_id).get(embed='membership.person')['result']
             # Make sure any members of these parties are
             # invalidated from the cache so that the embedded
             # party information when getting posts and persons is
             # up-to-date:
             for membership in organization_with_memberships.get(
                     'memberships', []
             ):
                 person = PopItPerson.create_from_dict(membership['person_id'])
                 person.invalidate_cache_entries()
示例#9
0
 def handle(self, **options):
     for o in popit_unwrap_pagination(self.api.organizations,
                                      per_page=100,
                                      embed='membership.person'):
         if o['classification'] != 'Party':
             continue
         print o['name']
         for image in o.get('images', []):
             print "  DELETE", image['_id']
             self.api.organizations(o['id']).image(image['_id']).delete()
         # The person pages get party images via the
         # membership.organization embed, so invalidate the cache
         # entries for any person who's a member of this party:
         for membership in o.get('memberships', []):
             person = PopItPerson.create_from_dict(membership['person_id'])
             person.invalidate_cache_entries()
    def handle(self, *args, **options):
        from candidates.models import PopItPerson
        from candidates.popit import create_popit_api_object

        self.verbosity = int(options.get('verbosity', 1))
        api = create_popit_api_object()
        if len(args) != 1:
            raise CommandError("You must provide a person.js URL")
        person_js_url = args[0]
        people_data = requests.get(person_js_url).json()
        for person_data in people_data['persons']:
            twfy_person = PopItPerson.create_from_dict(person_data)
            ynmp_id = twfy_person.get_identifier('yournextmp')
            if not ynmp_id:
                continue
            parlparse_id = twfy_person.id
            ynmp_person = PopItPerson.create_from_popit(api, ynmp_id)
            existing_parlparse_id = ynmp_person.get_identifier('uk.org.publicwhip')
            if existing_parlparse_id:
                if existing_parlparse_id == parlparse_id:
                    # That's fine, there's already the right parlparse ID
                    pass
                else:
                    # Otherwise there's a mismatch, which needs investigation
                    msg = "Warning: parlparse ID mismatch between YNMP {0} "
                    msg += "and TWFY {1} for YNMP person {2}\n"
                    self.stderr.write(
                        msg.format(
                            existing_parlparse_id,
                            parlparse_id,
                            ynmp_id,
                        )
                    )
                continue
            msg = "Updating the YourNextMP person {0} with parlparse_id {1}\n"
            self.stdout.write(msg.format(ynmp_id, parlparse_id))
            ynmp_person.set_identifier(
                'uk.org.publicwhip',
                parlparse_id,
            )
            change_metadata = get_change_metadata(
                None, "Fetched a new parlparse ID"
            )
            ynmp_person.record_version(change_metadata)
            ynmp_person.save_to_popit(api)
            ynmp_person.invalidate_cache_entries()
    def handle(self, *args, **options):
        from candidates.models import PopItPerson
        from candidates.popit import create_popit_api_object

        self.verbosity = int(options.get('verbosity', 1))
        api = create_popit_api_object()
        if len(args) != 1:
            raise CommandError("You must provide a person.js URL")
        person_js_url = args[0]
        people_data = requests.get(person_js_url).json()
        for person_data in people_data['persons']:
            twfy_person = PopItPerson.create_from_dict(person_data)
            ynmp_id = twfy_person.get_identifier('yournextmp')
            if not ynmp_id:
                continue
            parlparse_id = twfy_person.id
            ynmp_person = PopItPerson.create_from_popit(api, ynmp_id)
            existing_parlparse_id = ynmp_person.get_identifier(
                'uk.org.publicwhip')
            if existing_parlparse_id:
                if existing_parlparse_id == parlparse_id:
                    # That's fine, there's already the right parlparse ID
                    pass
                else:
                    # Otherwise there's a mismatch, which needs investigation
                    msg = "Warning: parlparse ID mismatch between YNMP {0} "
                    msg += "and TWFY {1} for YNMP person {2}\n"
                    self.stderr.write(
                        msg.format(
                            existing_parlparse_id,
                            parlparse_id,
                            ynmp_id,
                        ))
                continue
            msg = "Updating the YourNextMP person {0} with parlparse_id {1}\n"
            self.stdout.write(msg.format(ynmp_id, parlparse_id))
            ynmp_person.set_identifier(
                'uk.org.publicwhip',
                parlparse_id,
            )
            change_metadata = get_change_metadata(
                None, "Fetched a new parlparse ID")
            ynmp_person.record_version(change_metadata)
            ynmp_person.save_to_popit(api)
            ynmp_person.invalidate_cache_entries()
 def handle(self, **options):
     for o in popit_unwrap_pagination(
             self.api.organizations,
             per_page=100,
             embed='membership.person'
     ):
         if o['classification'] != 'Party':
             continue
         print o['name']
         for image in o.get('images', []):
             print "  DELETE", image['_id']
             self.api.organizations(o['id']).image(image['_id']).delete()
         # The person pages get party images via the
         # membership.organization embed, so invalidate the cache
         # entries for any person who's a member of this party:
         for membership in o.get('memberships', []):
             person = PopItPerson.create_from_dict(membership['person_id'])
             person.invalidate_cache_entries()
示例#13
0
def get_existing_popit_person(vi_person_id):
    # See if this person already exists by searching for the
    # ID they were imported with:
    query_format = \
        'identifiers.identifier:"{id}" AND ' + \
        'identifiers.scheme:"{scheme}"'
    search_url = get_search_url('persons',
                                query_format.format(id=vi_person_id,
                                                    scheme='import-id'),
                                embed='membership.organization')
    results = requests.get(search_url).json()
    total = results['total']
    if total > 1:
        message = "Multiple matches for CI ID {0}"
        raise Exception(message.format(vi_person_id))
    if total == 0:
        return None
    # Otherwise there was exactly one result:
    return PopItPerson.create_from_dict(results['result'][0])
示例#14
0
 def handle(self, **options):
     for person_data in popit_unwrap_pagination(
             self.api.persons,
             embed='',
             per_page=100
     ):
         needs_update = False
         for version in person_data.get('versions', []):
             data = version['data']
             if data.get('last_party'):
                 needs_update = True
                 msg = "Fixing person {0}persons/{1}"
                 print msg.format(get_base_url(), person_data['id'])
                 del data['last_party']
         if not needs_update:
             continue
         person = PopItPerson.create_from_dict(person_data)
         person.save_to_popit(self.api)
         person.invalidate_cache_entries()
 def handle(self, *args, **options):
     if len(args) != 1:
         msg = "You must supply the prefix for output filenames"
         raise CommandError(msg)
     output_prefix = args[0]
     all_people = []
     election_to_people = defaultdict(list)
     for person_dict in popit_unwrap_pagination(
             self.api.persons,
             embed="membership.organization",
             per_page=100,
     ):
         standing_in = person_dict.get('standing_in')
         if not standing_in:
             continue
         for election in standing_in.keys():
             if not standing_in[election]:
                 continue
             person = PopItPerson.create_from_dict(person_dict)
             person_as_csv_dict = person.as_dict(election=election)
             all_people.append(person_as_csv_dict)
             election_to_people[election].append(person_as_csv_dict)
     elections = election_to_people.keys() + [None]
     for election in elections:
         if election is None:
             output_filename = output_prefix + '-all.csv'
             people_data = all_people
         else:
             output_filename = output_prefix + '-' + election + '.csv'
             people_data = election_to_people[election]
         csv = list_to_csv(people_data)
         # Otherwise write to a temporary file and atomically
         # rename into place:
         ntf = NamedTemporaryFile(
             delete=False,
             dir=dirname(output_filename)
         )
         ntf.write(csv)
         chmod(ntf.name, 0o644)
         rename(ntf.name, output_filename)
示例#16
0
 def test_age_full_obvious(self, mock_date):
     mock_date.today.return_value = date(1977, 9, 3)
     mock_date.side_effect = \
         lambda *args, **kwargs: date(*args, **kwargs)
     p = PopItPerson.create_from_dict({'birth_date': '1976-09-01'})
     self.assertEqual(p.age, '1')
示例#17
0
 def test_age_month_early_in_year(self, mock_date):
     mock_date.today.return_value = date(1977, 8, 15)
     mock_date.side_effect = \
         lambda *args, **kwargs: date(*args, **kwargs)
     p = PopItPerson.create_from_dict({'birth_date': '1976-09'})
     self.assertEqual(p.age, '0')
示例#18
0
 def test_age_year_ambiguous(self, mock_date):
     mock_date.today.return_value = date(1977, 9, 10)
     mock_date.side_effect = \
         lambda *args, **kwargs: date(*args, **kwargs)
     p = PopItPerson.create_from_dict({'birth_date': '1975'})
     self.assertEqual(p.age, '1 or 2')
 def test_age_full_obvious(self, mock_date):
     mock_date.today.return_value = date(1977, 9, 3)
     mock_date.side_effect = \
         lambda *args, **kwargs: date(*args, **kwargs)
     p = PopItPerson.create_from_dict({'birth_date': '1976-09-01'})
     self.assertEqual(p.age, '1')
 def test_age_month_early_in_year(self, mock_date):
     mock_date.today.return_value = date(1977, 8, 15)
     mock_date.side_effect = \
         lambda *args, **kwargs: date(*args, **kwargs)
     p = PopItPerson.create_from_dict({'birth_date': '1976-09'})
     self.assertEqual(p.age, '0')
 def test_age_year_ambiguous(self, mock_date):
     mock_date.today.return_value = date(1977, 9, 10)
     mock_date.side_effect = \
         lambda *args, **kwargs: date(*args, **kwargs)
     p = PopItPerson.create_from_dict({'birth_date': '1975'})
     self.assertEqual(p.age, '1 or 2')