def initialize_storage(self, d_args=None): """ Load data from CSV file into an object-storage attribute. We assume that the CSV files has comma-sep column names in the first line. Returns (bool): True|False for status of operation """ tag = "%s.initialize_storage" % (self.cname) status = False file_path = self.file_path self.log('DEBUG', "%s: Reading data file into memory: %s" % (tag, file_path)) if DEBUG: ts_1 = time.perf_counter() try: self.data = csvfile.csv_to_df(file_path) status = True except Exception as e: raise if DEBUG: ts_2 = time.perf_counter() ts_elapsed = ts_2 - ts_1 self.log( 'INFO', "%s: Data-file read elapsed time (secs): %s" % (tag, round(ts_elapsed, 2))) return status
def csv_to_sql(file_path, **kwargs): """ ETL function for CSV=>SQL transformation. Required arg (str): path to CSV file Optional kwargs: (username, password) Returns (int): Number of records inserted !!! WARNING !!! This will truncate and overwrite the existing CVE records in the database Typical usage: (venv) $ python -c 'import database; database.sql_to_csv(csv_file, username="******", password="******")' """ tag = "%s.csv_to_sql" % myname db_configs = DevelopmentConfig.STORAGE_ARGS['database'] db_args = process_db_args(db_configs, **kwargs) ## Load CSV file contents into Pandas df df = csvfile.csv_to_df(file_path) engine = create_db_engine(db_args) session = create_session(engine) tablename = CveRecord.__tablename__ sql_truncate = 'TRUNCATE TABLE ' + tablename print(">> TRUNCATING TABLE: %s" % tablename) session.execute(sql_truncate) file_row_count = df.shape[0] for i in range(file_row_count): l_values = list(df.iloc[i, :]) print(">> INSERTING: %s" % str(l_values)) cve_rec = CveRecord(*l_values) session.add(cve_rec) session.commit() session.close() ## sanity check db_row_count = session.query(CveRecord.id).count() if file_row_count != db_row_count: raise DatabaseError("%s: Failed to load all (%d) records from file" % (file_row_count)) return db_row_count
def test_cve_csv_read(): """ Test the csv file load by Pandas to a df """ df = csvfile.csv_to_df(CSV_FILE) assert isinstance(df, pd.DataFrame) == True
def test_csv_invalid_file_format_exc(invalid_cve_file): """ Verify that csvfile.csv_to_df() raise a CsvFileeError() exception as expected with 3 invalid file formats """ with pytest.raises(CsvFileError): df = csvfile.csv_to_df(invalid_cve_file)
def test_csv_file_load_exc(): """ Verify that csvfile.csv_to_df() raise a CsvFileError() exception as expected with non-existent file path """ with pytest.raises(CsvFileError): df = csvfile.csv_to_df(BAD_CSV_PATH)