Exemplo n.º 1
0
def lookup_file(object_key):
    """
    Takes in the S3 Object Key and returns the file processing information.
    """
    if isinstance(object_key, str):
        # Populate list of known files. Currently hardcoded.
        # Future version will have an API set up to pull this data from dynamoDB table.
        file_types = [
            FileType("Email CSV", "user/userEmailFile*.csv",
                     "mstr/userEmailFile.csv", "Email", ",", "\""),
            FileType("Identifier CSV", "user/randomDataFile*.csv",
                     "mstr/randomDataFile.csv", "Id", ",", "\"")
        ]
        # Loop through known files to find the match to the currently posted file.
        for file_type in file_types:
            if fnmatch.fnmatch(object_key, file_type.incoming_file_pattern):
                # return the known file object once we have a match
                logging.info('File Type Found: {}'.format(
                    file_type.file_process_name))
                return file_type
        # If there is no match, create new file type.
    return create_new_file_type(object_key)
Exemplo n.º 2
0
class TestLookupFile(unittest.TestCase):
    expected = FileType("Email CSV", "user/userEmailFile*.csv",
                        "mstr/userEmailFile.csv", "Email", ",", "\"")
    expected_unknown = FileType("unit_test.csv", "user/unit_test.csv",
                                "mstr/unit_test.csv", "Id", ",", "\"")

    def test_file_type_equals(self):
        test = FileType("Email CSV", "user/userEmailFile*.csv",
                        "mstr/userEmailFile.csv", "Email", ",", "\"")
        result = (test == self.expected)
        self.assertTrue(result)

    def test_lookup_file_known_file(self):
        lookup = 'user/userEmailFile.csv'
        result = s3_functions.lookup_file(lookup)
        self.assertEqual(result, self.expected)

    def test_lookup_file_known_file_pattern(self):
        lookup = 'user/userEmailFile_PatternMatching.csv'
        result = s3_functions.lookup_file(lookup)
        self.assertEqual(result, self.expected)

    def test_lookup_file_bad_file_pattern(self):
        lookup = 'somejunkfile.csv'
        result = s3_functions.lookup_file(lookup)
        self.assertIsNone(result)

    def test_lookup_file_unknown_file_pattern_2(self):
        lookup = 'user/unit_test.csv'
        result = s3_functions.lookup_file(lookup)
        self.assertEqual(self.expected_unknown, result)

    def test_lookup_file_none(self):
        result = s3_functions.lookup_file(None)
        self.assertIsNone(result)

    def test_lookup_file_numeric(self):
        result = s3_functions.lookup_file(1234)
        self.assertIsNone(result)
Exemplo n.º 3
0
def create_new_file_type(key):
    """
    Currently just returns a faked out a file type using the key passed in. Future versions where we use a data store for the
    file types will allow this function to write to the data store.
    """

    if not isinstance(key, str) or not key.startswith('user/'):
        logging.info('Invalid object key: {}'.format(key))
        return None
    file_type = FileType(file_process_name=key.replace('user/', ''),
                         incoming_file_pattern=key,
                         master_file_s3_key=key.replace('user/', 'mstr/'),
                         primary_key='Id',
                         field_delimiter=',',
                         text_qualifier='\"')
    return file_type
Exemplo n.º 4
0
    def test_get_data_frame_bad_data3(self):
        sample_df = pandas.DataFrame(columns=['unittest'])
        sample_object = S3Object('', 1234)

        file_type = FileType(file_process_name='unittest',
                             incoming_file_pattern='unittest',
                             master_file_s3_key='unittest',
                             primary_key='unittest',
                             field_delimiter=',',
                             text_qualifier='\"')
        result = s3_functions.get_dataframe(self.s3, sample_object, file_type)
        try:
            assert_frame_equal(sample_df, result)
            return True
        except AssertionError:
            return False
Exemplo n.º 5
0
 def test_get_data_frame(self):
     d = {'Test': ['Test data'], 'Test2': ['More test data']}
     sample_df = pandas.DataFrame(data=d)
     sample_object = S3Object('dschultz-python-skills-demo-unittests',
                              'unit_test_sample.csv')
     file_type = FileType(file_process_name='unittest',
                          incoming_file_pattern='unittest',
                          master_file_s3_key='unittest',
                          primary_key='unittest',
                          field_delimiter=',',
                          text_qualifier='\"')
     result = s3_functions.get_dataframe(self.s3, sample_object, file_type)
     try:
         assert_frame_equal(sample_df, result)
         return True
     except AssertionError:
         return False
Exemplo n.º 6
0
class TestCreateNewFileType(unittest.TestCase):
    expected = FileType("unit_test.csv", "user/unit_test.csv",
                        "mstr/unit_test.csv", "Id", ",", "\"")

    def test_create_new_file_tyep(self):
        result = s3_functions.create_new_file_type('user/unit_test.csv')
        self.assertEqual(self.expected, result)

    def test_create_new_file_type_bad_key(self):
        result = s3_functions.create_new_file_type('junk.csv')
        self.assertIsNone(result)

    def test_create_new_file_type_empty_key(self):
        result = s3_functions.create_new_file_type('')
        self.assertIsNone(result)

    def test_create_new_file_type_numeric(self):
        result = s3_functions.create_new_file_type(1234)
        self.assertIsNone(result)

    def test_create_new_file_type_none(self):
        result = s3_functions.create_new_file_type(None)
        self.assertIsNone(result)
Exemplo n.º 7
0
 def test_file_type_equals(self):
     test = FileType("Email CSV", "user/userEmailFile*.csv",
                     "mstr/userEmailFile.csv", "Email", ",", "\"")
     result = (test == self.expected)
     self.assertTrue(result)