def definitions(base='.'):
  Check that all files are valid.
  def warn(message, slug):
    if message not in seen:
      print('%-50s %s' % (slug, message))

  seen = set()
  division_ids = set()
  for slug, config in registry(base).items():
    directory = dirname(config['file'])

    # Validate LICENSE.txt.
    license_path = os.path.join(directory, 'LICENSE.txt')
    if os.path.exists(license_path):
      with open(license_path) as f:
        license_text ='\n')
      if config.get('licence_url'):
        licence_url = config['licence_url']
        if licence_url in open_data_licenses or licence_url in some_rights_reserved_licenses:
          if not terms.get(licence_url) and not terms_re.get(licence_url):
            warn('No LICENSE.txt template for License URL %s' % licence_url, slug)
          elif terms.get(licence_url) and license_text != terms[licence_url] or terms_re.get(licence_url) and not terms_re[licence_url].search(license_text):
            print('%-50s Expected LICENSE.txt to match license-specific template' % slug)
        elif licence_url in all_rights_reserved_licenses:
          if not
            print('%-50s Expected LICENSE.txt to match "all rights reserved" template' % slug)
          print('%-50s Unrecognized License URL %s' % (slug, licence_url))
      elif not
        print('%-50s Expected LICENSE.txt to match "all rights reserved" template' % slug)

    # Check for invalid keys, non-unique or empty values.
    invalid_keys = set(config.keys()) - valid_keys
    if invalid_keys:
      print('%-50s Unrecognized key: %s' % (slug, ', '.join(invalid_keys)))
    values = [value for key, value in config.items() if key != 'metadata']
    if len(values) > len(set(values)):
      print('%-50s Non-unique values' % slug)
    for key, value in config.items():
      if not value:
        print('%-50s Empty value for %s' % (slug, key))

    # Check for missing required keys.
    for key in ('domain', 'last_updated', 'name_func', 'authority', 'encoding'):
      if not config.get(key):
        print('%-50s Missing %s' % (slug, key))
    if not config.get('source_url') and config.get('data_url'):
      print('%-50s Missing source_url' % slug)
    if config.get('source_url') and not config.get('licence_url') and not config.get('data_url'):
      print('%-50s Missing licence_url or data_url' % slug)

    # Validate fields.
    for key in ('name', 'singular'):
      if config.get(key):
        print('%-50s Expected %s to be missing' % (slug, key))
    if config.get('encoding') and config['encoding'] != 'iso-8859-1':
      print('%-50s Expected encoding to be iso-8859-1 not %s' % (slug, config['encoding']))

    if slug not in ('Census divisions', 'Census subdivisions'):
      # Check for invalid keys or empty values.
      invalid_keys = set(config['metadata'].keys()) - valid_metadata_keys
      if invalid_keys:
        print('%-50s Unrecognized key: %s' % (slug, ', '.join(invalid_keys)))
      for key, value in config['metadata'].items():
        if not value:
          print('%-50s Empty value for %s' % (slug, key))

      division_id = get_division_id(slug, config)

      # Ensure division_id is unique.
      if division_id in division_ids:
        print('%-50s Duplicate division_id %s' % (slug, division_id))

      expected_slug, expected_config = get_definition(division_id)

      # Check for unexpected values.
      assert_match(slug, 'slug', slug, expected_slug)
      for key, value in expected_config.items():
        assert_match(slug, key, config[key], value)
def spreadsheet(base='.', private_base='../represent-canada-private-data'):
  sgc_code_to_ocdid_map = sgc_code_to_ocdid()
  ocdid_to_sgc_code_map = {v: k for k, v in sgc_code_to_ocdid_map.items()}
  records = OrderedDict()

  reader = csv_reader('')
  for row in reader:
    municipal_subdivisions[row[0].split(':')[-1]] = row[1]

  urls = {}
  reader = csv_reader('')
  for row in reader:
    urls[row[0].split(':')[-1]] = row[1]

  abbreviations = {}
  reader = csv_reader('')
  for row in reader:
    abbreviations[row[2]] = row[0].split(':')[-1].upper()

  # Get scraper URLs.
  scraper_urls = {}
  for representative_set in requests.get('').json()['objects']:
    boundary_set_url = representative_set['related']['boundary_set_url']
    if boundary_set_url:
      if boundary_set_url != '/boundary-sets/census-subdivisions/':
        boundary_set = requests.get('' % boundary_set_url).json()
        if boundary_set.get('extra') and boundary_set['extra'].get('geographic_code'):
          scraper_urls[boundary_set['extra']['geographic_code']] = representative_set['data_about_url'] or representative_set['data_url']
          sys.stderr.write('%-60s No extra\n' % boundary_set_url)
      sys.stderr.write('%-60s No boundary_set_url\n' % representative_set['url'])

  # Create records for provinces and territories.
  reader = csv_reader('')
  for row in reader:
    records[ocdid_to_sgc_code_map[row[0]]] = {
      'OCD': row[0],
      'Geographic code': ocdid_to_sgc_code_map[row[0]],
      'Geographic name': row[1],
      'Geographic type': '',
      'Province or territory': row[0].split(':')[-1].upper(),
      'Population': '',
      'URL': '',
      'Scraper?': scraper_urls.get(ocdid_to_sgc_code_map[row[0]], ''),
      'Shapefile?': '',
      'Contact': '',
      'Request notes': '',
      'Received via': '',
      'Last boundary': '',
      'Next boundary': '',
      'Permission to distribute': '',
      'Highrise URL': '',
      'Type of license': '',
      'License URL': '',
      'Denial notes': '',

  # Create records for census subdivisions.
  reader = csv_reader('')  # title  # headers
  for row in reader:
    if row:
      result ='\A(.+) \((.+)\)\Z', row[1].decode('iso-8859-1'))
      if result:
        name =
        province_or_territory = abbreviations[]
      elif row[1] == 'Canada':
        name = 'Canada'
        province_or_territory = ''
        raise Exception('Unrecognized name "%s"' % row[1])

      record = {
        'OCD': sgc_code_to_ocdid_map[row[0]],
        'Geographic code': row[0],
        'Geographic name': name,
        'Province or territory': province_or_territory,
        'Geographic type': row[2].decode('iso-8859-1'),
        'Population': row[4],
        'URL': urls.get(row[0], ''),
        'Scraper?': scraper_urls.get(row[0], ''),
        'Contact': '',
        'Request notes': '',
        'Received via': '',
        'Last boundary': '',
        'Next boundary': '',
        'Permission to distribute': '',
        'Highrise URL': '',
        'Type of license': '',
        'License URL': '',
        'Denial notes': '',

      if municipal_subdivisions.get(row[0]):
        if municipal_subdivisions[row[0]] == 'N':
          for header in ['Shapefile?'] + request_headers + receipt_headers:
            record[header] = 'N/A'
        elif municipal_subdivisions[row[0]] == 'Y':
          record['Shapefile?'] = 'Request'
        elif municipal_subdivisions[row[0]] == '?':
          record['Shapefile?'] = ''
        record['Shapefile?'] = ''

      records[row[0]] = record

  # Merge information from received data.
  for directory, permission_to_distribute in [(base, 'Y'), (private_base, 'N')]:
    boundaries.registry = {}
    for slug, config in registry(directory).items():
      if config.get('metadata'):
        geographic_code = config['metadata'].get('geographic_code')
        if geographic_code:
          license_path = os.path.join(dirname(config['file']), 'LICENSE.txt')
          license_text = ''
          if os.path.exists(license_path):
            with open(license_path) as f:
              license_text ='\n').decode('utf-8')

          record = records[geographic_code]
          record['Shapefile?'] = 'Y'
          record['Permission to distribute'] = permission_to_distribute
          record['License URL'] = config.get('licence_url', '')

          if config.get('last_updated'):
            record['Last boundary'] = config['last_updated'].strftime('%-m/%-d/%Y')

          if config.get('source_url'):
            record['Contact'] = 'N/A'
            record['Received via'] = 'online'
            match =
            if match:
              record['Contact'] =
            record['Received via'] = 'email'

          if config.get('licence_url'):
            licence_url = config['licence_url']
            if licence_url in open_data_licenses:
              record['Type of license'] = 'Open'
            elif licence_url in some_rights_reserved_licenses:
              record['Type of license'] = 'Most rights reserved'
            elif licence_url in all_rights_reserved_licenses:
              record['Type of license'] = 'All rights reserved'
              raise Exception(licence_url)
            record['Type of license'] = 'All rights reserved'
            record['Type of license'] = 'Custom'
        elif not config['metadata'].get('ocd_division'):
          sys.stderr.write('%-60s No geographic_code or ocd_division\n' % slug)

  reader = csv.DictReader(StringIO(requests.get('').content))
  for row in reader:
    geographic_code = row['Geographic code']
    record = records[geographic_code]

    for key in row:
      a = record[key]
      b = row[key].decode('utf-8')

      if a != b:
        # Columns that are always tracked manually.
        # Scrapers for municipalities without wards are tracked manually.
        # In-progress requests are tracked manually.
        # Contacts for in-progress requests and private data are tracked manually.
        # We may have a contact to confirm the nonexistence of municipal subdivisions.
        # MFIPPA requests are tracked manually.
        # Additional details about license agreements and written consent are tracked manually.
        # We may have information for a bad shapefile from an in-progress request.
        if b and (
           (key in ('Highrise URL', 'Request notes', 'Next boundary', 'Denial notes')) or
           (key == 'Scraper?'         and not a         and record['Shapefile?'] == 'N/A') or
           (key == 'Shapefile?'       and not a         and b in ('Request', 'Requested')) or
           (key == 'Shapefile?'       and a == 'Request'and b == 'Requested') or
           (key == 'Contact'          and not a         and (row['Shapefile?'] == 'Requested' or record['Permission to distribute'] == 'N')) or
           (key == 'Contact'          and a == 'N/A'    and record['Shapefile?'] == 'N/A') or
           (key == 'Received via'     and a == 'email'  and b == 'MFIPPA') or
           (key == 'Type of license'  and '(' in b) or
           (key in ('Received via', 'Type of license', 'Permission to distribute') and not a and row['Shapefile?'] == 'Requested')):
          record[key] = b
        # The spreadsheet can add contacts and URLs.
        elif key != 'Population' and (key not in ('Contact', 'URL') or a):  # separators
          sys.stderr.write(u'%-25s %s: expected "%s" got "%s"\n' % (key, geographic_code, a, b))

  writer = UnicodeWriter(sys.stdout)
  for _, record in records.items():
    writer.writerow([record[header] for header in headers])
