def test_get_datafile_path_with_zip_path(self): destination = tempfile.TemporaryDirectory() zip_path = os.path.join(destination.name, "foo.zip") self.make_test_zip(zip_path) path_to_first_file = get_datafile_path(None, destination.name, zip_path=zip_path) self.assertEqual(os.path.join(destination.name, "file1.txt"), path_to_first_file) destination.cleanup()
def run(url, report, destination=None, download=True): """Download IL data, extract, load, and compute some simple stats""" logger.info('*** IL Data Analysis Started ***') destination = download_and_unzip_data(url, destination) csv_path = get_datafile_path(url, destination) stops = load_csv(csv_path) analyze(stops, report)
def run(url, report, destination=None, download=True): """Download MD data, extract, load, and compute some simple stats""" logger.info('*** MD Data Analysis Started ***') destination = download_and_unzip_data(url, destination) xls_path = get_datafile_path(url, destination) stops = load_xls(xls_path) stops = process_raw_data(stops, to_drop=()) analyze(stops, report)
def test_get_datafile_path_with_url(self): destination = tempfile.TemporaryDirectory() zip_path = os.path.join(destination.name, 'foo.zip') self.make_test_zip(zip_path) path_to_first_file = get_datafile_path('http://example.com/foo.zip', destination.name) self.assertEqual(os.path.join(destination.name, 'file1.txt'), path_to_first_file) destination.cleanup()
def test_get_datafile_path_with_zip_path(self): destination = tempfile.TemporaryDirectory() zip_path = os.path.join(destination.name, 'foo.zip') self.make_test_zip(zip_path) path_to_first_file = get_datafile_path(None, destination.name, zip_path=zip_path) self.assertEqual( os.path.join(destination.name, 'file1.txt'), path_to_first_file ) destination.cleanup()
def run(url, destination=None, download=True): """Download MD data, extract, convert to CSV, and scan for issues""" logger.info('*** MD Data Scan Started ***') destination = download_and_unzip_data(url, destination) # Convert to CSV xls_path = get_datafile_path(url, destination) csv_path = get_csv_path(url, destination) if not os.path.exists(csv_path): xls_to_csv(xls_path, csv_path) else: logger.info("{} exists, skipping XLS->CSV conversion".format(csv_path)) csv_count = line_count(csv_path) logger.debug('Rows: {}'.format(csv_count)) scan([csv_path])
def run(url, destination=None, download=True): """Download IL data, extract, and load into PostgreSQL""" logger.info('*** IL Data Import Started ***') destination = download_and_unzip_data(url, destination) # Convert to CSV raw_csv_path = get_datafile_path(url, destination) processed_csv_path = get_csv_path(url, destination) if not os.path.exists(processed_csv_path): raw_to_processed(raw_csv_path, processed_csv_path) else: logger.info("{} exists, skipping cleanup".format(processed_csv_path)) csv_count = line_count(processed_csv_path) logger.debug('Rows: {}'.format(csv_count)) # drop constraints/indexes drop_constraints_and_indexes(connections['traffic_stops_il'].cursor()) # use COPY to load CSV file as quickly as possible copy_from(processed_csv_path) # Clear the query cache flush_memcached()
def run(url, destination=None, download=True): """Download MD data, extract, convert to CSV, and load into PostgreSQL""" logger.info('*** MD Data Import Started ***') destination = download_and_unzip_data(url, destination) # Convert to CSV xls_path = get_datafile_path(url, destination) csv_path = get_csv_path(url, destination) if not os.path.exists(csv_path): xls_to_csv(xls_path, csv_path) else: logger.info("{} exists, skipping XLS->CSV conversion".format(csv_path)) csv_count = line_count(csv_path) logger.debug('Rows: {}'.format(csv_count)) # drop constraints/indexes drop_constraints_and_indexes(connections['traffic_stops_md'].cursor()) # use COPY to load CSV files as quickly as possible copy_from(csv_path) # Clear the query cache flush_memcached()
def test_get_datafile_path_special(self): with self.assertRaises(ValueError): get_datafile_path(None, 'anything') with self.assertRaises(ValueError): get_datafile_path('something', 'anything', zip_path='anything')
def test_get_datafile_path_special(self): with self.assertRaises(ValueError): get_datafile_path(None, "anything") with self.assertRaises(ValueError): get_datafile_path("something", "anything", zip_path="anything")