def test_extract_dpae_two_files_diff(self): # Second file contains duplicated records and one record from the future and only 2 really new valid records. filename_first_month = self.get_data_file_path(FIRST_DPAE_FILE_NAME) filename_second_month = self.get_data_file_path(SECOND_DPAE_FILE_NAME) task = extract_dpae.DpaeExtractJob(filename_first_month) task.run() self.assertEqual(Hiring.query.count(), 6) task = extract_dpae.DpaeExtractJob(filename_second_month) task.run() self.assertEqual(Hiring.query.count(), 6+2)
def test_extract_dpae_two_files_diff(self): # Second file contains one record from the future filename_first_month = self.get_data_file_path(FIRST_DPAE_FILE_NAME) filename_second_month = self.get_data_file_path(SECOND_DPAE_FILE_NAME) task = extract_dpae.DpaeExtractJob(filename_first_month) task.run() self.assertEqual(Hiring.query.count(), 6) task = extract_dpae.DpaeExtractJob(filename_second_month) task.run() # change 6+5 to 6+2, only 2 dpae is between 10/11/2016 and 10/12/2016 in SECOND_DPAE_FILE_NAME self.assertEqual(Hiring.query.count(), 6 + 2)
def test_extract_dpae_two_files_diff(self): # Updated file contains duplicated records and one record from the future and only 2 really new valid records. filename_first_month = self.get_data_file_path("LBB_XDPDPA_DPAE_20151010_20161110_20161110_174915.csv") filename_second_month = self.get_data_file_path("LBB_XDPDPA_DPAE_20151110_20161210_20161210_094110.csv") extract_dpae.DpaeExtractJob.backup_first = False task = extract_dpae.DpaeExtractJob(filename_first_month) task.run() self.assertEquals(Dpae.query.count(), 6) task = extract_dpae.DpaeExtractJob(filename_second_month) task.run() self.assertEquals(Dpae.query.count(), 6+2)
def test_verify_right_number_dpae(self): self.assertEqual(Hiring.query.count(), 0) filename = self.get_data_file_path(FIRST_DPAE_FILE_NAME) task = extract_dpae.DpaeExtractJob(filename) task.run() self.assertEqual(Hiring.query.count(), 6) # delete the file in the registry to simulate if importer job crash before regitring the file ImportTask.query.filter( ImportTask.filename == FIRST_DPAE_FILE_NAME).delete() DpaeStatistics.query.order_by( DpaeStatistics.most_recent_data_date.desc()).first().delete() task = extract_dpae.DpaeExtractJob(filename) self.assertEqual(Hiring.query.count(), 6)
def test_extract_dpae(self): self.assertEqual(Hiring.query.count(), 0) filename = self.get_data_file_path(FIRST_DPAE_FILE_NAME) task = extract_dpae.DpaeExtractJob(filename) task.run() self.assertEqual(Hiring.query.count(), 6) # check if date_insertion is filled self.assertEqual(Hiring.query.filter(Hiring.date_insertion == None).count(), 0)
def test_extract_bz2_format(self): filename = self.get_data_file_path(FIRST_DPAE_FILE_NAME + ".bz2") task = extract_dpae.DpaeExtractJob(filename) task.run() self.assertEqual(Hiring.query.count(), 6)
def test_extract_bz2_format(self): filename = self.get_data_file_path("LBB_XDPDPA_DPAE_20151010_20161110_20161110_174915.csv.bz2") extract_dpae.DpaeExtractJob.backup_first = False task = extract_dpae.DpaeExtractJob(filename) task.run() self.assertEquals(Dpae.query.count(), 6)