def test_standard(self): """ Test standard use-case """ global TO with xlrd.open_workbook(TO["temp_file"]) as workbook: all_expected = [] sheet = workbook.sheet_by_name(TO["sheet"]) sheet_dict = nrcSpreadsheetScraper.sheet2dict(sheet) # Validate sheet column_names = [i.value for i in sheet.row(0)] # Get one row structured as a dictionary row_num = 0 for row_dict in sheet_dict: # Move one row down the sheet # Note that the very first row is skipped row_num += 1 # Assemble the expected row content expected = {} for column_index, column_name in enumerate(column_names): cell = sheet.cell(row_num, column_index) expected[column_name] = cell.value # Run test self.assertDictEqual(expected, row_dict) # Append for final test all_expected.append(expected) self.assertListEqual(all_expected, sheet_dict)
def test_material_name(self): """ This test only confirms the appropriate number of rows and reportnums are inserted into the target table. It's also pretty ugly ... """ global TO # # TO = { # 'sheet': 'CALLS', # 'temp_file': '_-Nonsense-TEst_FiLe.xlsx.ext', # 'db_host': 'localhost', # 'db_schema': 'public', # 'db_name': 'test_skytruth', # 'db_user': getpass.getuser(), # 'db_pass': '', # 'field_reportnum': 'reportnum', # 'db_table': '"NrcParsedReport"', # 'dl_url': 'http://cgmix.uscg.mil/NRC/FOIAFiles/Current.xlsx', # 'db_null': 'NULL', # 'test_reportnum': 10101019, # } # # # Parse arguments # map_def = kwargs['map_def'] # print_queries = kwargs['print_queries'] # execute_queries = kwargs['execute_queries'] # extras_field_maps = map_def['processing']['args']['extras_field_maps'] # db_write_mode = kwargs['db_write_mode'] # uid = kwargs['uid'] # sheet_seqnos_field = kwargs['sheet_seqnos_field'] # db_cursor = kwargs['db_cursor'] # raw_sheet_cache = kwargs['raw_sheet_cache'] # db_seqnos_field = kwargs['db_seqnos_field'] # db_null_value = kwargs['db_null_value'] # sheet_cache = kwargs['sheet_cache'] # # Don't need to call processing functions so create a pseudo function that doesn't actually do anything def processing_placeholder(*args, **kwargs): return None # Construct needed objects for test db_conn_str = "host='%s' dbname='%s' user='******' password='******'" % ( TO["db_host"], TO["db_name"], TO["db_user"], TO["db_pass"], ) db_conn = psycopg2.connect(db_conn_str) db_cursor = db_conn.cursor() if not isfile(TO["temp_file"]): response = urllib2.urlopen(TO["dl_url"]) with open(TO["temp_file"], "w") as f: f.write(response.read()) workbook = xlrd.open_workbook(TO["temp_file"]) print_queries = False execute_queries = True db_write_mode = "INSERT INTO" uid = 1090442.0 sheet_seqnos_field = "SEQNOS" db_seqnos_field = "reportnum" sheet_cache = {} raw_sheet_cache = {} db_null_value = "NULL" for sname in workbook.sheet_names(): if sname not in sheet_cache: try: sheet_dict = nrcSpreadsheetScraper.sheet2dict(workbook.sheet_by_name(sname)) raw_sheet_cache[sname] = sheet_dict sheet_cache[sname] = {row[sheet_seqnos_field]: row for row in sheet_dict} except IndexError: # Sheet was empty pass map_def = { "db_table": '"NrcScrapedReport"', "db_field": "material_name", "db_schema": "public", "sheet_name": "MATERIAL_INVOLVED", "column": "NAME_OF_MATERIAL", "processing": { "function": None, # Not needed for testing - at this point it has already been called "args": { "extras_table": '"NrcScrapedMaterial"', "extras_schema": "public", "extras_field_maps": { 'public."NrcScrapedReport"': [ { "db_table": "NrcScrapedMaterial", "db_field": "reportnum", "db_schema": "public", "sheet_name": "MATERIAL_INVOLVED", "column": "SEQNOS", "processing": None, }, { # NOTE: This is the same as the parent field map but it is required to write the # specified value to the extras table "db_table": '"NrcScrapedMaterial"', "db_field": "name", "db_field_width": 32, "db_schema": "public", "sheet_name": "MATERIAL_INVOLVED", "column": "NAME_OF_MATERIAL", "processing": None, }, { "db_table": "NrcScrapedMaterial", "db_field": "reached_water", "db_schema": "public", "sheet_name": "MATERIAL_INVOLVED", "column": "IF_REACHED_WATER", "processing": None, }, { "db_table": '"NrcScrapedMaterial"', "db_field": "amt_in_water", "db_schema": "public", "sheet_name": "MATERIAL_INVOLVED", "column": "AMOUNT_IN_WATER", "processing": None, }, { "db_table": '"NrcScrapedMaterial"', "db_field": "amt_in_water_unit", "db_schema": "public", "sheet_name": "MATERIAL_INVOLVED", "column": "UNIT_OF_MEASURE_REACH_WATER", "processing": None, }, { "db_table": '"NrcScrapedMaterial"', "db_field": "chris_code", "db_schema": "public", "sheet_name": "MATERIAL_INV0LVED_CR", "column": "CHRIS_CODE", "processing": None, }, { "db_table": '"NrcScrapedMaterial"', "db_field": "amount", "db_schema": "public", "sheet_name": "MATERIAL_INV0LVED_CR", "column": "UPPER_BOUNDS", "processing": None, }, { "db_table": '"NrcScrapedMaterial"', "db_field": "unit", "db_schema": "public", "sheet_name": "MATERIAL_INV0LVED_CR", "column": "UPPER_BOUNDS_UNIT", "processing": None, }, { "db_table": '"NrcScrapedMaterial"', "db_field": "ft_id", "db_schema": "public", "sheet_name": "CALLS", "column": None, "processing": {"function": processing_placeholder}, }, { "db_table": '"NrcScrapedMaterial"', "db_field": "st_id", "db_schema": "public", "sheet_name": "CALLS", "column": None, "processing": {"function": processing_placeholder}, }, ] }, }, }, } # Execute function return_val = nrcSpreadsheetScraper.NrcScrapedReportFields.material_name( db_cursor=db_cursor, uid=uid, workbook=workbook, row=None, db_null_value=db_null_value, map_def=map_def, sheet=sheet_cache[map_def["sheet_name"]], all_field_maps=None, sheet_seqnos_field=sheet_seqnos_field, db_write_mode=db_write_mode, print_queries=print_queries, execute_queries=execute_queries, raw_sheet_cache=raw_sheet_cache, db_seqnos_field=db_seqnos_field, sheet_cache=sheet_cache, ) # The processing function is supposed to return a material that is then included in the parent query # Make sure the proper value was returned self.assertEqual(return_val, "GASOLINE: AUTOMOTIVE (UNLEADED)") # Make sure the proper number of records were inserted into the NrcScrapedMaterial table count_query = """SELECT COUNT(1) FROM public."NrcScrapedMaterial";""" e_sheet = workbook.sheet_by_name("MATERIAL_INVOLVED") expected = len([c.value for c in e_sheet.col(0) if c.value == uid]) db_cursor.execute(count_query) result = db_cursor.fetchall() actual = int(result[0][0]) self.assertEqual(expected, actual) # Cleanup truncate_query = """TRUNCATE public."NrcScrapedMaterial";""" db_cursor.execute(truncate_query) expected = 0 db_cursor.execute(count_query) actual = int(db_cursor.fetchall()[0][0]) self.assertEqual(expected, actual) e_sheet = None db_cursor.close() db_conn.close()