示例#1
0
class SFTPSensor(BaseSensorOperator):
    @apply_defaults
    def __init__(self,
                 filepath,
                 filepattern,
                 sftp_conn_id='sftp_default',
                 *args,
                 **kwargs):
        super(SFTPSensor, self).__init__(*args, **kwargs)
        self.filepath = filepath
        self.filepattern = filepattern
        self.sftp_conn_id = sftp_conn_id
        self.hook = SFTPHook(ftp_conn_id=sftp_conn_id, keepalive_interval=10)

    def poke(self, context):
        full_path = self.filepath
        file_pattern = re.compile(self.filepattern)
        fileDict = {}
        fileList = []

        try:
            isFound = False
            directory = self.hook.describe_directory(full_path)
            logging.info('Polling Interval 1')
            for file in directory.keys():
                if not re.match(file_pattern, file):
                    self.log.info(file)
                    self.log.info(file_pattern)
                    del directory[file]

            if not directory:
                # If directory has no files that match the mask, exit
                return isFound

            # wait before we compare file sizes and timestamps again to
            # verify that the file is done transferring to remote loc
            time.sleep(30)

            logging.info('Post-Wait Polling')
            newDirectoryResults = self.hook.describe_directory(full_path)

            for file in newDirectoryResults.keys():
                if file in directory.keys():
                    if newDirectoryResults[file]['size'] == directory[file]['size'] and \
                        newDirectoryResults[file]['modify'] == directory[file]['modify']:

                        fileList.append(file)
                        print(
                            'filename: {} with size {} and modified time of {} met all criteria to be moved.'
                            .format(file, newDirectoryResults[file]['size'],
                                    newDirectoryResults[file]['modify']))
                        isFound = True

            context["task_instance"].xcom_push("file_name", fileList)

            return isFound
        except IOError as e:
            if e.errno != SFTP_NO_SUCH_FILE:
                raise e
            return False
def pollForFiles(**kwargs):
    # Create some local scope variables for use later in proc
    sftpConnName = kwargs['SFTP_Connection_Name']
    feedGroups = kwargs['Feed_Groups']
    
    # Connect to SFTP site using provided credentials - should be saved in Connections
    sourceHook = SFTPHook(ftp_conn_id = sftpConnName)

    # Create empty dictionary for storing files that match file masks
    fileMatches = {}

    # Loop through feed locations and their regex for this SFTP site.
    for i in feedGroups:
        fullPath = i['Feed_Group_Location']
        filePattern = i['Feed_Group_Regex']
        feedGroupName = i['Feed_Group_Name']

        logging.info('Evaluating Feed Group {}'.format(feedGroupName))

        try:
            directory = sourceHook.describe_directory(path = fullPath)
            for file in directory.keys():
                if re.match(filePattern, file):
                    fileMatches[os.path.join(fullPath, file)] = directory[file]
        except Exception as e:
            logging.error('Error attempting to poll feed group {} in directory {}'.format(feedGroupName, fullPath))
            raise e

    # If we do not find a file that matches a file mask in any of the directories, exit.
    if not fileMatches:
        return 0

    # If no trigger files or renaming is utilized by the client when placing files on SFTP, we
    #   have to resort to polling for files, waiting for a time period and then comparing the size/modified time
    #   to see if they are ready to pull down.
    time.sleep(SLEEP_TIME)

    for j in feedGroups:
        fullPath = j['Feed_Group_Location']
        filePattern = j['Feed_Group_Regex']
        feedGroupName = j['Feed_Group_Name']

        logging.info('Evaluating Feed Group {} after sleeping'.format(feedGroupName))

        try:
            newDirResults = sourceHook.describe_directory(fullPath)

            for file in newDirResults:
                fullFilePath = os.path.join(fullPath, file)

                if fullFilePath in fileMatches.keys():
                    if newDirResults[file]['size'] == fileMatches[fullFilePath]['size'] and \
                            newDirResults[file]['modify'] == fileMatches[fullFilePath]['modify']:
                        
                        # If file hasn't changed size or modified time since first look, set to ready for another process to pick up and transfer.
                        sourceHook.conn.rename(fullFilePath, fullFilePath + '.ready')
                        logging.info('Tagged the {} file as ready.'.format(fullFilePath))
        except Exception as e:
            logging.error('Error attempting to rename files in feed group {} in directory {}'.format(feedGroupName, fullPath))
            raise e
示例#3
0
def pollForFiles(**kwargs):
    """
    Poll the flatfiles directory for files to process.
    """
    fileRegex = r'^BP_STORE.*\.txt$'
    autoPath = '/airflow_test/postgresdb_etl_poc_flatfiles/auto'
    processPath = '/airflow_test/postgresdb_etl_poc_flatfiles/processing'

    sourceHook = SFTPHook(ftp_conn_id='kub2VM')

    fileMatches = {}

    try:
        directory = sourceHook.describe_directory(path=autoPath)
        for file in directory.keys():
            if re.match(fileRegex, file):
                fileMatches[file] = directory[file]
    except Exception as e:
        logging.error('Error attempting to poll directory {}'.format(autoPath))
        raise e

    print('FileMatches: {}'.format(fileMatches))

    for file in fileMatches.keys():
        sourceHook.conn.rename(os.path.join(autoPath, file),
                               os.path.join(processPath, file))

        dag_params = {}
        dag_params['fileName'] = file
        dag_params['processPath'] = processPath

        triggerConfig = {
            'fileName': file,
            'processPath': processPath,
        }

        trigger_dag(dag_id='ProcessStoreFeed',
                    run_id='trig_{}'.format(timezone.utcnow().isoformat()),
                    conf=json.dumps(triggerConfig),
                    execution_date=None,
                    replace_microseconds=False)

        logging.info('Triggered DAG Job for File: {}'.format(file))

        # Introduce a delay between scheduling dags so there is an order to execution.
        # I'm worried that if we submit sub-second for multiples that it'll try to run them all at once.
        time.sleep(10)

    if sourceHook.conn:
        sourceHook.close_conn()
示例#4
0
def sftp_to_pg(**kwargs):
    today = datetime.date.today().strftime('%y%m%d')
    conn = SFTPHook('sftp_cityftp')
    files = conn.describe_directory('/Home/IET/PNC')
    file_name = [
        fn for fn in files.keys()
        if fn.startswith(f"tls.cityofdetroit.out.{today}")
    ][0]
    conn.retrieve_file(f"/Home/IET/PNC/{file_name}", f"/tmp/{file_name}")

    pg_conn = PostgresHook('etl_postgres')
    pg_conn.run("truncate table escrow.escrow")
    pg_conn.run(
        f"copy escrow.escrow from '/tmp/{file_name}' (FORMAT CSV, HEADER FALSE) "
    )
示例#5
0
class SFTPHookTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f:
            f.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name))
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.get_mod_time(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_default(self, get_connection):
        connection = Connection(login='******', host='host')
        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_enabled(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled_for_all_but_true(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": "foo"}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_ignore(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"ignore_hostkey_verification": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_no_ignore(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"ignore_hostkey_verification": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
def lookAtDir(**context):
    sourceHook = SFTPHook(ftp_conn_id=SOURCE_SFTP_CONN_ID)

    resultDir = sourceHook.describe_directory(SOURCE_FILEPATH)

    logging.info('Directory Contents: {}'.format(resultDir))
示例#7
0
class TestSFTPHook(unittest.TestCase):
    @provide_session
    def update_connection(self, login, session=None):
        connection = (session.query(Connection).filter(
            Connection.conn_id == "sftp_default").first())
        old_login = connection.login
        connection.login = login
        session.commit()
        return old_login

    def setUp(self):
        self.old_login = self.update_connection(SFTP_CONNECTION_USER)
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR))

        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as file:
            file.write('Test file')
        with open(
                os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR,
                             TMP_FILE_FOR_TESTS), 'a') as file:
            file.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [SUB_DIR])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_create_and_delete_directories(self):
        base_dir = "base_dir"
        sub_dir = "sub_dir"
        new_dir_path = os.path.join(base_dir, sub_dir)
        self.hook.create_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(base_dir in output)
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir))
        self.assertTrue(sub_dir in output)
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_path))
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, base_dir))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_path not in output)
        self.assertTrue(base_dir not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [SUB_DIR, TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name))
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [SUB_DIR])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.get_mod_time(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_default(self, get_connection):
        connection = Connection(login='******', host='host')
        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_enabled(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled_for_all_but_true(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"no_host_key_check": "foo"}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_ignore(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"ignore_hostkey_verification": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_no_ignore(self, get_connection):
        connection = Connection(login='******',
                                host='host',
                                extra='{"ignore_hostkey_verification": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @parameterized.expand([
        (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS), True),
        (os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), True),
        (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS + "abc"), False),
        (os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, "abc"), False),
    ])
    def test_path_exists(self, path, exists):
        result = self.hook.path_exists(path)
        self.assertEqual(result, exists)

    @parameterized.expand([
        ("test/path/file.bin", None, None, True),
        ("test/path/file.bin", "test", None, True),
        ("test/path/file.bin", "test/", None, True),
        ("test/path/file.bin", None, "bin", True),
        ("test/path/file.bin", "test", "bin", True),
        ("test/path/file.bin", "test/", "file.bin", True),
        ("test/path/file.bin", None, "file.bin", True),
        ("test/path/file.bin", "diff", None, False),
        ("test/path/file.bin", "test//", None, False),
        ("test/path/file.bin", None, ".txt", False),
        ("test/path/file.bin", "diff", ".txt", False),
    ])
    def test_path_match(self, path, prefix, delimiter, match):
        result = self.hook._is_path_match(path=path,
                                          prefix=prefix,
                                          delimiter=delimiter)
        self.assertEqual(result, match)

    def test_get_tree_map(self):
        tree_map = self.hook.get_tree_map(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        files, dirs, unknowns = tree_map

        self.assertEqual(files, [
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR,
                         TMP_FILE_FOR_TESTS)
        ])
        self.assertEqual(dirs,
                         [os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, SUB_DIR)])
        self.assertEqual(unknowns, [])

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        self.update_connection(self.old_login)
示例#8
0
def pollForFiles(**kwargs):
    # Create some local scope variables for use later in proc
    sftpName = kwargs['SFTP_Name']
    sftpConnName = kwargs['SFTP_Connection_Name']
    feedGroups = kwargs['Feed_Groups']
    
    # Connect to SFTP site using provided credentials - should be saved in Connections
    sourceHook = SFTPHook(ftp_conn_id = sftpConnName)

    # Create empty dictionary for storing files that match file masks
    fileMatches = {}

    # Loop through feed locations and their regex for this SFTP site.
    for i in feedGroups:
        fullPath = i['Feed_Group_Location']
        filePattern = i['Feed_Group_Regex']
        feedGroupName = i['Feed_Group_Name']

        try:
            directory = sourceHook.describe_directory(path = fullPath)
            for file in directory.keys():
                if re.match(filePattern, file):
                    fileMatches[os.path.join(fullPath, file)] = directory[file]
        except Exception as e:
            logging.error('Error attempting to poll feed group {} in directory {}'.format(feedGroupName, fullPath))
            raise e

    # If we do not find a file that matches a file mask in any of the directories, exit.
    if not fileMatches:
        return 0

    # If no trigger files or renaming is utilized by the client when placing files on SFTP, we
    #   have to resort to polling for files, waiting for a time period and then comparing the size/modified time
    #   to see if they are ready to pull down.
    time.sleep(SLEEP_TIME)

    for j in feedGroups:
        fullPath = j['Feed_Group_Location']
        filePattern = j['Feed_Group_Regex']
        feedGroupName = j['Feed_Group_Name']
        newFileMatches = {}

        try:
            newDirResults = sourceHook.describe_directory(fullPath)
            # Add only the files that match regular expression for this feed group
            for file in newDirResults.keys():
                if re.match(filePattern, file):
                    newFileMatches[os.path.join(fullPath, file)] = newDirResults[file]

            for file in newFileMatches.keys():
                # fullFilePath = os.path.join(fullPath, file)

                if file in fileMatches.keys():
                    if newFileMatches[file]['size'] == fileMatches[file]['size'] and \
                            newFileMatches[file]['modify'] == fileMatches[file]['modify']:
                        
                        readyFile = file + '.ready'
                        
                        # If file hasn't changed size or modified time since first look, set to ready for another process to pick up and transfer.
                        sourceHook.conn.rename(file, readyFile)
                        logging.info('SFTP: {} FeedGroup: {} File: {} is ready.'.format(sftpName, feedGroupName, os.path.basename(file)))
                        
                        triggerConfig = {
                            'SFTP_Name': sftpName, 
                            'SFTP_Connection_Name': sftpConnName,
                            'File_Name': readyFile,
                        }

                        triggerConfig.update(j)
                        
                        trigger_dag(
                            dag_id = 'SingleFileTransferJob',
                            run_id = 'trig_{}'.format(timezone.utcnow().isoformat()),
                            conf = json.dumps(triggerConfig),
                            execution_date = None,
                            replace_microseconds = False
                        )
        except Exception as e:
            logging.error('Error attempting to rename files in feed group {} in directory {}'.format(feedGroupName, fullPath))
            raise e
示例#9
0
class SFTPHookTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f:
            f.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name))
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS,
                                          TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
        output = self.hook.get_mod_time(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
class SFTPHookTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f:
            f.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)
        )
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name)
        )
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(path=os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)
        )
        output = self.hook.get_mod_time(path=os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
示例#11
0
class SFTPHookTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        self.hook = SFTPHook()
        os.makedirs(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        with open(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS), 'a') as f:
            f.write('Test file')

    def test_get_conn(self):
        output = self.hook.get_conn()
        self.assertEqual(type(output), pysftp.Connection)

    def test_close_conn(self):
        self.hook.conn = self.hook.get_conn()
        self.assertTrue(self.hook.conn is not None)
        self.hook.close_conn()
        self.assertTrue(self.hook.conn is None)

    def test_describe_directory(self):
        output = self.hook.describe_directory(TMP_PATH)
        self.assertTrue(TMP_DIR_FOR_TESTS in output)

    def test_list_directory(self):
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_create_and_delete_directory(self):
        new_dir_name = 'new_dir'
        self.hook.create_directory(os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name in output)
        self.hook.delete_directory(os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, new_dir_name))
        output = self.hook.describe_directory(
            os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertTrue(new_dir_name not in output)

    def test_store_retrieve_and_delete_file(self):
        self.hook.store_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)
        )
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [TMP_FILE_FOR_TESTS])
        retrieved_file_name = 'retrieved.txt'
        self.hook.retrieve_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, retrieved_file_name)
        )
        self.assertTrue(retrieved_file_name in os.listdir(TMP_PATH))
        os.remove(os.path.join(TMP_PATH, retrieved_file_name))
        self.hook.delete_file(path=os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        output = self.hook.list_directory(
            path=os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        self.assertEqual(output, [])

    def test_get_mod_time(self):
        self.hook.store_file(
            remote_full_path=os.path.join(
                TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS),
            local_full_path=os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS)
        )
        output = self.hook.get_mod_time(path=os.path.join(
            TMP_PATH, TMP_DIR_FOR_TESTS, TMP_FILE_FOR_TESTS))
        self.assertEqual(len(output), 14)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_default(self, get_connection):
        connection = Connection(login='******', host='host')
        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_enabled(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"no_host_key_check": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"no_host_key_check": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_disabled_for_all_but_true(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"no_host_key_check": "foo"}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_ignore(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"ignore_hostkey_verification": true}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, True)

    @mock.patch('airflow.contrib.hooks.sftp_hook.SFTPHook.get_connection')
    def test_no_host_key_check_no_ignore(self, get_connection):
        connection = Connection(
            login='******', host='host',
            extra='{"ignore_hostkey_verification": false}')

        get_connection.return_value = connection
        hook = SFTPHook()
        self.assertEqual(hook.no_host_key_check, False)

    def tearDown(self):
        shutil.rmtree(os.path.join(TMP_PATH, TMP_DIR_FOR_TESTS))
        os.remove(os.path.join(TMP_PATH, TMP_FILE_FOR_TESTS))
示例#12
0
def get_file(**kwargs):
    conn = SFTPHook('sftp_novatus')
    d = conn.describe_directory('/outgoing/')
    conn.retrieve_file(path, f"/tmp/{file_name}")