def create(name, filepath): socrata = Socrata(auth) with open(filepath, 'rb') as csv_file: (revision, output) = socrata.create(name=name).csv(csv_file) job = revision.apply(output_schema=output) revision.open_in_browser()
def main(args): # Get the dataset name from the command line if args.DataSet != "": # Get the dataset info from the XML ods = getDatasetInfo(args.DataSet) # Create the connection to the database connection = ods.getConnection() # Create the dataframe df = pd.read_sql(ods.query, con=connection) # Close the database connection connection.close # Save the dataframe to a file writeFile(df, args.FileName) # Post the file to SCGC? if args.Upload == True: # Authenticate to the portal auth = Authorization('austin-aph.data.socrata.com', 'Your_Socrata_Username', 'Your_Socrata_Password') socrata = Socrata(auth) # Find the view for the dataset (ok, view) = socrata.views.lookup(ods.view) assert ok, view # Open the file with open(args.FileName, 'rb') as my_file: # Get the config file for the view (ok, job) = socrata.using_config(ods.config, view).csv(my_file) assert ok, job # Write out the progress of the job assert ok, job (ok, job) = job.wait_for_finish(progress=lambda job: print( 'Job progress:', job.attributes['status']))
def setUp(self): self.pub = Socrata(auth) (ok, rev) = self.pub.new({'name': 'test-view'}) assert ok, rev self.rev = rev (ok, view) = self.pub.views.lookup(rev.attributes['fourfour']) assert ok, view self.view = view
def test_upload_to_config(self): p = Socrata(auth) name = "some_config %s" % str(uuid.uuid4()) (ok, config) = p.configs.create(name, "replace") self.assertTrue(ok, config) p = Socrata(auth) with open('test/fixtures/simple.csv', 'rb') as my_file: (rev, job) = p.using_config(name, self.view).csv(my_file) self.assertEqual(rev.attributes['action']['type'], 'replace') self.assertTrue(job.attributes['created_at'])
def wrapper(slf): test_name = str(method.__qualname__) pub = Socrata(auth) with open('test/fixtures/%s' % filename, 'rb') as file: create = pub.create(name="test for %s" % test_name, description="a description") (revision, output) = getattr(create, kind)(file) try: method(slf, output) finally: (ok, view) = pub.views.lookup(revision.view_id()) view.delete()
def create(name, filepath): socrata = Socrata(auth) with open(filepath, 'rb') as csv_file: (initial_rev, output) = socrata.create( name = name ).csv(csv_file) job = initial_rev.apply(output_schema = output) job = job.wait_for_finish() view = socrata.views.lookup(initial_rev.attributes['fourfour']) update(view)
def __init__(self, uuid=None, mq_fp=None, auth_param=auth_param): self.existing_dataset = bool(uuid) self.uuid = uuid self.mq_fp = mq_fp self.auth_param = auth_param self.socrata = Socrata(Authorization(*auth_param)) self.mq = None self.metadata = None if mq_fp: self.mq = ITSMetadataQuestionnaire(mq_fp) self.retrieve_metadata()
def __init__(self, env: Literal["test", "impl", "prod"]) -> None: """Initialize this class instance with default values.""" logger.info(f"Initializing Loader for env {env}") # Set env self.env = env # Load lookup tables self.load_files_table() self.load_schemas_table() self.load_tracker_table() # Initialize Socrata client auth: Tuple[str] = Authorization(SOCRATA_DOMAIN, *SOCRATA_CREDENTIALS) self.client = Socrata(auth)
def test_source_to_config(self): p = Socrata(auth) name = "some_config %s" % str(uuid.uuid4()) (ok, config) = p.configs.create(name, "replace") self.assertTrue(ok, config) p = Socrata(auth) (rev, job) = p.using_config(name, self.view).csv("""a,b,c 1,2,3 4,5,6 7,8,9 """, filename="abc.csv") self.assertEqual(rev.attributes['action']['type'], 'replace') self.assertTrue(job.attributes['created_at'])
def test_create_new_csv_from_str(self): string = """a,b,c 1,2,3 4,5,6 7,8,9 """ (revision, output) = Socrata(auth).create( name="cool dataset", description="a description").csv(string, filename="foo.csv") try: self.assertIsNotNone(output.attributes['completed_at']) finally: (ok, view) = Socrata(auth).views.lookup(revision.view_id()) view.delete()
def test_put_source_in_revision(self): pub = Socrata(auth) source = pub.sources.create_upload('foo.csv') df = pd.read_csv('test/fixtures/simple.csv') input_schema = source.df(df) rev = self.create_rev() source = source.add_to_revision(rev)
def test_source_change_on_existing_upload(self): pub = Socrata(auth) (ok, source) = pub.sources.create_upload('foo.csv') self.assertTrue(ok, source) with open('test/fixtures/skip-header.csv', 'rb') as f: (ok, source) = source.csv(f) self.assertTrue(ok, source) (ok, source) = source\ .change_parse_option('header_count').to(2)\ .change_parse_option('column_header').to(2)\ .run() self.assertTrue(ok, source) po = source.attributes['parse_options'] self.assertEqual(po['header_count'], 2) self.assertEqual(po['column_header'], 2) input_schema = source.get_latest_input_schema() self.assertTrue(ok, input_schema) (ok, output_schema) = input_schema.latest_output() self.assertTrue(ok, output_schema) [a, b, c] = output_schema.attributes['output_columns'] self.assertEqual(a['field_name'], 'a') self.assertEqual(b['field_name'], 'b') self.assertEqual(c['field_name'], 'c')
def create_new_dataset(client: Socrata, dataframe: DataFrame, name: str, description: str): """Create and publish a dataframe as a new Socrata dataset.""" revision: Revision output_schema: OutputSchema revision, output_schema = client.create( name=name, description=description, attributionLink='https://api.census.gov').df(dataframe) output_schema = prepare_output_schema(output_schema) # Handle geometry column type if 'geometry' in dataframe.columns: geometry: Optional[Literal['points', 'polygons']] if len(dataframe.loc[dataframe['geometry'].fillna('').str.match( '^POINT')]): geometry = 'points' elif len(dataframe.loc[dataframe['geometry'].fillna('').str.match( '^MULTIPOLYGON')]): geometry = 'polygons' else: geometry = None output_schema = add_geometry_to_output_schema(output_schema, geometry) # Handle pre-1.x versions of Socrata-py if isinstance(output_schema, tuple): _, output_schema = output_schema output_schema.wait_for_finish() revision.apply(output_schema=output_schema) return revision
def to_socrata( domain: Union[URL, str], dataframe: DataFrame, dataset_id: str = None, name: str = None, description: str = None, auth: Tuple[str, str] = None, open_in_browser: bool = True, ) -> URL: """Publish an autocensus dataframe to Socrata.""" # Serialize geometry to WKT try: dataframe['geometry'] = dataframe['geometry'].map(serialize_to_wkt) except KeyError: pass # Initialize client client = Socrata( Authorization(str(domain), *look_up_socrata_credentials(auth))) # If no 4x4 was supplied, create a new dataset if dataset_id is None: name = name if name is not None else 'American Community Survey Data' description = description if description is not None else '' revision = create_new_dataset(client, dataframe, name, description) else: revision = update_existing_dataset(client, dataframe, dataset_id) # Return URL if open_in_browser is True: revision.open_in_browser() return URL(revision.ui_url())
def test_show_config(self): p = Socrata(auth) name = "some_config %s" % str(uuid.uuid4()) (ok, config) = p.configs.create(name, "replace") self.assertTrue(ok, config) (ok, config) = config.show() self.assertTrue(ok, config)
def test_lookup_config(self): p = Socrata(auth) name = "some_config %s" % str(uuid.uuid4()) config = p.configs.create(name, "replace") config = p.configs.lookup(name) self.assertEqual(config.attributes['name'], name)
def test_create_new_csv(self): with open('test/fixtures/simple.csv', 'rb') as file: (revision, output) = Socrata(auth).create( name = "cool dataset", description = "a description" ).csv(file) self.assertEqual(output.attributes['error_count'], 0) self.assertIsNotNone(output.attributes['completed_at'])
def test_create_new_shapefile(self): with open('test/fixtures/zillow.zip', 'rb') as file: (revision, output) = Socrata(auth).create( name = "zillow", description = "a description" ).shapefile(file) self.assertEqual(output.attributes['error_count'], 0) self.assertIsNotNone(output.attributes['completed_at'])
def test_create_source_outside_rev(self): pub = Socrata(auth) source = pub.sources.create_upload('foo.csv') self.assertEqual(source.attributes['source_type']['filename'], 'foo.csv') assert 'show' in source.list_operations() assert 'bytes' in source.list_operations()
def test_put_source_in_revision(self): pub = Socrata(auth) source = pub.sources.create_upload('foo.csv') with open('test/fixtures/simple.csv', 'rb') as f: source = source.csv(f) input_schema = source.get_latest_input_schema() rev = self.create_rev() source = source.add_to_revision(rev)
def test_delete_config(self): p = Socrata(auth) name = "some_config %s" % str(uuid.uuid4()) config = p.configs.create(name, "replace") _ = config.delete() # TODO exception with self.assertRaises(UnexpectedResponseException): _ = config.show()
def test_source_csv_outside_rev(self): pub = Socrata(auth) source = pub.sources.create_upload('foo.csv') df = pd.read_csv('test/fixtures/simple.csv') source = source.df(df) input_schema = source.get_latest_input_schema() names = sorted([ ic['field_name'] for ic in input_schema.attributes['input_columns'] ]) self.assertEqual(['a', 'b', 'c'], names)
def test_list_operations(self): p = Socrata(auth) name = "some_config %s" % str(uuid.uuid4()) config = p.configs.create(name, "replace") configs = p.configs.list() # Assert there's some config on this domain where the # name is what we want self.assertTrue( any([config.attributes['name'] == name for config in configs]))
def test_source_change_header_rows(self): pub = Socrata(auth) source = pub.sources.create_upload('foo.csv') source = source\ .change_parse_option('header_count').to(2)\ .change_parse_option('column_header').to(2)\ .run() po = source.attributes['parse_options'] self.assertEqual(po['header_count'], 2) self.assertEqual(po['column_header'], 2)
def test_create_new_csv_from_str(self): string = """a,b,c 1,2,3 4,5,6 7,8,9 """ (revision, output) = Socrata(auth).create( name = "cool dataset", description = "a description" ).csv(string, filename = "foo.csv") self.assertIsNotNone(output.attributes['completed_at'])
def test_put_source_in_revision(self): pub = Socrata(auth) (ok, source) = pub.sources.create_upload('foo.csv') self.assertTrue(ok, source) df = pd.read_csv('test/fixtures/simple.csv') (ok, input_schema) = source.df(df) self.assertTrue(ok, input_schema) rev = self.create_rev() (ok, source) = source.add_to_revision(rev) self.assertTrue(ok, source)
class TestCase(unittest.TestCase): def create_rev(self): p = Socrata(auth) (ok, r) = self.view.revisions.create_update_revision() assert ok self.rev = r return r def create_input_schema(self, rev=None, filename='simple.csv'): if not rev: rev = self.create_rev() (ok, source) = rev.create_upload('foo.csv') assert ok with open('test/fixtures/%s' % filename, 'rb') as f: (ok, source) = source.csv(f) assert ok, source return source.get_latest_input_schema() def create_output_schema(self, input_schema=None): if not input_schema: input_schema = self.create_input_schema() (ok, output_schema) = input_schema.transform({ 'output_columns': [{ "field_name": "b", "display_name": "b, but as a number", "position": 0, "description": "b but with a bunch of errors", "transform": { "transform_expr": "to_number(b)" } }] }) assert ok return output_schema def setUp(self): self.pub = Socrata(auth) (ok, rev) = self.pub.new({'name': 'test-view'}) assert ok, rev self.rev = rev (ok, view) = self.pub.views.lookup(rev.attributes['fourfour']) assert ok, view self.view = view def tearDown(self): if getattr(self, 'rev', False): self.rev.discard() self.view.delete()
def test_upload_csv_outside_rev(self): pub = Socrata(auth) (ok, source) = pub.sources.create_upload('foo.csv') self.assertTrue(ok, source) with open('test/fixtures/simple.csv', 'rb') as f: (ok, source) = source.csv(f) input_schema = source.get_latest_input_schema() self.assertTrue(ok, input_schema) names = sorted([ ic['field_name'] for ic in input_schema.attributes['input_columns'] ]) self.assertEqual(['a', 'b', 'c'], names)
def test_update_config(self): p = Socrata(auth) name = "some_config %s" % str(uuid.uuid4()) config = p.configs.create(name, "replace") columns = [{ "field_name": "foo", "display_name": "Foo is the display name", "transform_expr": "to_number(`foo`)", "format": {}, "description": "", "is_primary_key": None, "flags": [] }] config = config.update({'data_action': 'update', 'columns': columns}) self.assertEqual(config.attributes["data_action"], "update") self.assertEqual(config.attributes["columns"], columns)
def test_create_config_with_non_defaults(self): name = "some_config %s" % str(uuid.uuid4()) p = Socrata(auth) (ok, config) = p.configs.create(name, "replace", parse_options={ "encoding": "utf8", "header_count": 2, "column_header": 2 }, columns=[{ "field_name": "foo", "display_name": "Foo is the display name", "transform_expr": "to_number(`foo`)" }]) self.assertTrue(ok, config) self.assertEqual(config.attributes['name'], name) self.assertEqual( config.attributes['parse_options'], { "encoding": "utf8", "header_count": 2, "column_header": 2, "quote_char": '"', "parse_source": True, "column_separator": ",", "remove_empty_rows": True, "trim_whitespace": True }) self.assertEqual(config.attributes['columns'], [{ "field_name": "foo", "display_name": "Foo is the display name", "transform_expr": "to_number(`foo`)", "format": {}, "description": "", "is_primary_key": None, "flags": [] }])