示例#1
0
  def test_join_exact_matching(self):
    fbi_data1 = pandas.DataFrame(
      {
        'index': ['california_sunnyvale', 'alabama_montgomery'],
        'foo': [1, 2],
        'State': ['CA', 'AL'],
        'City': ['Sunnyvale', 'Montgomery'],
        'Population': [100, 200],
      },
      index=[0, 1])

    fbi_data2 = pandas.DataFrame(
      {
        'index': ['alabama_montgomery', 'california_sunnyvale'],
        'bar': [3, 4],
        'State': ['AL', 'CA'],
        'City': ['Montgomery', 'Sunnyvale'],
        'Population': [200, 100],
      },
      index=[0, 1])

    fbi_table1 = fbi_data_table(data=fbi_data1, suffix='_table1')
    fbi_table2 = fbi_data_table(data=fbi_data2, suffix='_table2')
    # Note that the indices,
    # index=['california_sunnyvale', 'alabama_montgomery']
    # will be ignored.
    expected_data = pandas.DataFrame({
      'foo': [1, 2],
      'bar': [4, 3],
      'State_table1': ['CA', 'AL'],
      'City_table1': ['Sunnyvale', 'Montgomery'],
      'Population_table1': [100, 200],
      'State_table2': ['CA', 'AL'],
      'City_table2': ['Sunnyvale', 'Montgomery'],
      'Population_table2': [100, 200],
      'index': ['california_sunnyvale', 'alabama_montgomery']
    }).sort_index(axis=1)
    actual_data = fbi_table1.join_exact_matching(fbi_table2).data.sort_index(
      axis=1)
    # We sort the pandas DataFrame columns in order to compare
    self.assertTrue(expected_data.equals(actual_data))
示例#2
0
 def test_init_from_data(self):
   # Test initializing an `Fbi` DataTable from pandas dataframe.
   df = pandas.DataFrame(
     {
       'foo': 1,
       'State': 'CA',
       'City': 'Sunnyvale',
       'Population': 100,
       'state': 'ignored',
       'city': 'ignored'
     },
     index=[0])
   fbi_table = fbi_data_table(data=df)
   self.assertTrue(fbi_table.data.equals(df))
示例#3
0
 def test_get_fuzzy_matching_key(self):
   df = pandas.DataFrame(
     {
       'foo': 1,
       'state': 'CA',
       'city': 'Sunnyvale',
       'population': 100,
       'State': 'ignored',
       'City': 'ignored'
     },
     index=[0])
   fbi_table = fbi_data_table(data=df)
   self.assertEqual(
     fbi_table.get_fuzzy_matching_key(df.iloc[0]),
     FuzzyMatchingKey(state='CA', city='Sunnyvale', population=100))
示例#4
0
  def test_join_fuzzy_matching(self):
    fbi_data1 = pandas.DataFrame(
      {
        'foo': [1, 2, 3],
        'state': ['CA', 'AL', 'Hidden'],
        'city': ['Sunnyvale', 'Montgomery', 'Lost City'],
        'population': [100, 200, 300],
        'index':
        ['california_sunnyvale', 'alabama_montgomery', 'atlantas_lost_city'],
      },
      index=['california_sunnyvale', 'alabama_montgomery', 'atlantis'])
    census_data2 = pandas.DataFrame(
      {
        'bar': [5, 3, 4],
        'state': ['Isle of Man', 'AL', 'CA'],
        'city': ['Avalon', 'Montgomery', 'Sunnyvale'],
        'Target Geo Id2': ['???', '1620000US0151000', '1620000US0677000'],
        get_header('Population Estimate (as of July 1) - 2017', 'census_2017'):
        [1, 200, 100],
      },
      index=[0, 1, 2])

    fbi_table1 = fbi_data_table(data=fbi_data1, suffix='_fbi')
    census_table2 = census_data_table(data=census_data2, suffix='_census')
    joined_table = fbi_table1.join_fuzzy_matching(census_table2)
    # The output table from matching should be the same class as the left table.
    self.assertTrue(isinstance(joined_table, fbi_data_table))
    actual_data = joined_table.data.sort_index(axis=1)
    expected_data = pandas.DataFrame({
      'city_fbi': ['Montgomery', 'Sunnyvale'],
      'population': [200, 100],
      get_header('Population Estimate (as of July 1) - 2017', 'census_2017'):
      [200, 100],
      'state_census': ['AL', 'CA'],
      'Target Geo Id2': ['1620000US0151000', '1620000US0677000'],
      'bar': [3, 4],
      'foo': [2, 1],
      'city_census': ['Montgomery', 'Sunnyvale'],
      'index': ['alabama_montgomery', 'california_sunnyvale'],
      'state_fbi': ['AL', 'CA'],
    }).sort_index(axis=1)

    self.assertTrue(expected_data.equals(actual_data))
示例#5
0
 def test_init_from_file(self):
   fbi_table = fbi_data_table(
     file_path=
     'data/fbi/Table_8_Offenses_Known_to_Law_Enforcement_by_State_by_City_2017.xls'
   )
   self.assertEqual(len(fbi_table.data), 9589)