def extract_latest_file(self, list_blobs):

        last_recent_file = None
        possible_recent_date_collision = False
        recent_date = datetime.strptime('01-01-1900', '%d-%m-%Y')
        for filename in list_blobs:
            date_file = extract_date_from_file(filename)
            if date_file:
                if date_file == recent_date:
                    if not self.is_a_spark_directory(filename):
                        possible_recent_date_collision = True
                    else:
                        # it is spark dir. Check if it is the same dir
                        if os.path.dirname(filename) != os.path.dirname(
                                last_recent_file):
                            logger.info(filename + " vs " + last_recent_file)
                            possible_recent_date_collision = True
                if date_file > recent_date:
                    possible_recent_date_collision = False
                    recent_date = date_file
                    last_recent_file = filename

        if possible_recent_date_collision:
            # Raise an error. No filename is unique in the recent date selected.
            logger.error("Error TWO files with the same date: %s %s",
                         last_recent_file, recent_date.strftime('%d-%m-%Y'))
            exit(1)

        logger.info("Latest file: %s %s", last_recent_file,
                    recent_date.strftime('%d-%m-%Y'))
        return {
            "latest_filename": last_recent_file,
            "suffix": recent_date.strftime('%Y-%m-%d'),
            "spark": self.is_a_spark_directory(last_recent_file)
        }
示例#2
0
 def test_all_date_yyyy_mm_dd(self):
     """
     Test format yyyy-mm-dd all 2021 possible filename
     """
     for dt in self.dates:
         dt_expected = datetime.combine(dt, datetime.min.time())
         filename = 'temp_' + str(dt) + '.json.gz'
         result = extract_date_from_file(filename)
         self.assertEqual(result, dt_expected)
示例#3
0
 def test_all_date_dd_mm_yyyy(self):
     """
     Test format dd-mm-yyyy all 2021 possible filename
     """
     for dt in self.dates:
         dt_expected = datetime.combine(dt, datetime.min.time())
         date_reverse = dt.strftime('%d-%m-%Y')
         filename = 'temp_' + str(date_reverse) + '.json.gz'
         result = extract_date_from_file(filename)
         self.assertEqual(result, dt_expected)
示例#4
0
 def test_all_date_yyyy_m_d(self):
     """
     Test format when the date miss the 0 for months or days.
     Eg. 2021-2-10 / 2021-11-1 / 2021-1-1
     """
     for dt in self.dates:
         date_nozero = '{dt.year}-{dt.month}-{dt.day}'.format(dt=dt)
         # skip the date already tested above eg. "2021-10-10"
         if str(dt) != str(date_nozero):
             dt_expected = datetime.combine(dt, datetime.min.time())
             filename = 'temp_' + str(date_nozero) + '.json.gz'
             result = extract_date_from_file(filename)
             self.assertEqual(result, dt_expected)
示例#5
0
 def test_all_date_d_m_yyyy(self):
     """
     Test format when the date miss the 0 for months or days.
     Eg. 2-10-2021 / 11-5-2021 / 1-1-2021
     """
     for dt in self.dates:
         date_reverse = dt.strftime('%d-%m-%Y')
         date_reverse_no_zero = '{dt.day}-{dt.month}-{dt.year}'.format(
             dt=dt)
         # skip the date already tested above eg. "10-10-2021"
         if str(date_reverse) != str(date_reverse_no_zero):
             dt_expected = datetime.combine(dt, datetime.min.time())
             filename = 'temp_' + str(date_reverse_no_zero) + '.json.gz'
             result = extract_date_from_file(filename)
             self.assertEqual(result, dt_expected)
示例#6
0
 def test_wrong_format(self):
     expected_date = datetime(2021, 2, 4)
     filename = 'temp_cosmic0074-02-2021.json.gz'
     result = extract_date_from_file(filename)
     self.assertEqual(result, expected_date)
示例#7
0
 def test_no_date_found(self):
     filename = 'temp_cosmic007-0402-2021.json.gz'
     result = extract_date_from_file(filename)
     self.assertEqual(result, None)