Пример #1
0
    def initialize_storage(self, d_args=None):
        """
     Load data from CSV file into an object-storage attribute.
     We assume that the CSV files has comma-sep column names in the first line.
     Returns (bool):  True|False for status of operation 
    """
        tag = "%s.initialize_storage" % (self.cname)

        status = False

        file_path = self.file_path
        self.log('DEBUG',
                 "%s: Reading data file into memory: %s" % (tag, file_path))

        if DEBUG:
            ts_1 = time.perf_counter()
        try:
            self.data = csvfile.csv_to_df(file_path)
            status = True
        except Exception as e:
            raise

        if DEBUG:
            ts_2 = time.perf_counter()
            ts_elapsed = ts_2 - ts_1
            self.log(
                'INFO', "%s: Data-file read elapsed time (secs): %s" %
                (tag, round(ts_elapsed, 2)))

        return status
Пример #2
0
def csv_to_sql(file_path, **kwargs):
  """
   ETL function for CSV=>SQL transformation.
   Required arg (str): path to CSV file
   Optional kwargs: (username, password)
   Returns (int): Number of records inserted
 
   !!! WARNING !!! This will truncate and overwrite the existing CVE records in the database
 
   Typical usage:
    (venv) $ python -c 'import database; database.sql_to_csv(csv_file, username="******", password="******")'
  """
  
  tag = "%s.csv_to_sql" % myname
  
  db_configs = DevelopmentConfig.STORAGE_ARGS['database']
  db_args = process_db_args(db_configs, **kwargs) 

  ## Load CSV file contents into Pandas df
  df = csvfile.csv_to_df(file_path)

  engine = create_db_engine(db_args)
  session = create_session(engine)

  tablename = CveRecord.__tablename__

  sql_truncate = 'TRUNCATE TABLE ' + tablename
  print(">> TRUNCATING TABLE: %s" % tablename)
  session.execute(sql_truncate)

  file_row_count = df.shape[0]
  for i in range(file_row_count):
    l_values = list(df.iloc[i, :])
    print(">> INSERTING: %s" % str(l_values))
    cve_rec = CveRecord(*l_values)
    session.add(cve_rec)

  session.commit()
  session.close()

  ## sanity check
  db_row_count = session.query(CveRecord.id).count()

  if file_row_count != db_row_count:
    raise DatabaseError("%s: Failed to load all (%d) records from file" % (file_row_count)) 

  return db_row_count
Пример #3
0
def test_cve_csv_read():
    """ Test the csv file load by Pandas to a df """
    df = csvfile.csv_to_df(CSV_FILE)
    assert isinstance(df, pd.DataFrame) == True 
Пример #4
0
def test_csv_invalid_file_format_exc(invalid_cve_file):
    """
     Verify that csvfile.csv_to_df() raise a CsvFileeError() exception as expected with 3 invalid file formats 
    """
    with pytest.raises(CsvFileError):
        df = csvfile.csv_to_df(invalid_cve_file)
Пример #5
0
def test_csv_file_load_exc():
    """
     Verify that csvfile.csv_to_df() raise a CsvFileError() exception as expected with non-existent file path 
    """
    with pytest.raises(CsvFileError):
        df = csvfile.csv_to_df(BAD_CSV_PATH)