Пример #1
0
class FTPFSHook(FileSystemHook):
    conn_type = 'ftp_filesystem'
    conn_type_long = 'FTP FileSystem'

    def __init__(self, conn_params: Connection):
        from airflow.contrib.hooks.ftp_hook import FTPHook
        self.conn_id = conn_params.conn_id
        self.af_ftp_hook = FTPHook(ftp_conn_id=self.conn_id)
        self.base_path = Path(conn_params.extra_dejson('base_path', '/'))

    def list_path(self, path: str, recursive: bool = False) -> List[str]:
        if recursive:
            raise NotImplementedError('Recursive list not implemented for FTP')
        else:
            return self.af_ftp_hook.list_directory(str(self.base_path / path))

    def write_data(self, path: str, data: Union[str, bytes, BytesIO]):
        if isinstance(data, str):
            data = data.encode()
        if isinstance(data, bytes):
            data = BytesIO(data)
        self.af_ftp_hook.store_file(str(self.base_path / path), data)

    def read_data(self, path: str) -> BytesIO:
        result = BytesIO()
        self.af_ftp_hook.retrieve_file(str(self.base_path / path), result)
        return result
Пример #2
0
    def bulk_load_ib(self, table, database, tmp_file, rel_path='/opt', sep='\t', header = 0, ftp_conn_id='ftp_default'):
        import uuid
        from airflow.contrib.hooks.ftp_hook import FTPHook

        """
        Loads a tab-delimited file into a database table using FTPHook for transfering
        CSV file since ICE don't support LOAD DATA LOCAL INFILE. Doesn't support IGNORE X LINES, NO HEADER

        # TODO : DELETE HEADER OPTION
        """

        logging.info('Load file to table : {}'.format(table))
        logging.info('Using Database: {}'.format(database))
        conn_FTP = FTPHook(ftp_conn_id=ftp_conn_id)
        # Remote temp file name and dir
        tmp_file_remote = database + '_' + table + '.csv'
	rnd = str(uuid.uuid4())
        tmp_dir_remote = '/ibl' + rnd + '/'
        conn_FTP.create_directory(tmp_dir_remote)
        logging.info('Temp folder created : {}'.format(tmp_dir_remote))
        temp_filepath = tmp_dir_remote + tmp_file_remote
        logging.info('Transfering file : {}'.format(temp_filepath))
        remote_filepath = rel_path + temp_filepath
        logging.info('Remote file : {}'.format(remote_filepath))
        try:
            conn_FTP.store_file(temp_filepath, tmp_file)
        except Exception as e:
	    print(str(e))
            logging.warning("Failed to store file")
            conn_FTP.delete_directory(tmp_dir_remote)
            raise
            # Load Remote temp file uploaded to Infobright Server
        try:
            conn = self.get_conn()
            logging.info('Loading data to Infobright...')
            cur = conn.cursor()

            cur.execute("""
                set @bh_dataformat = 'txt_variable'
            """.format(**locals()))

            cur.execute("""
                set @BH_REJECT_FILE_PATH = '/opt/{rnd}_{database}_{table}_reject.txt'
            """.format(**locals()))

            cur.execute("""
                LOAD DATA INFILE '{remote_filepath}'
                INTO TABLE {database}.{table}
		        CHARACTER SET UTF8
                FIELDS TERMINATED BY '{sep}'
                """.format(**locals()))
            conn.commit()
            logging.info('Finished loading data')
        except:
            logging.warning("Failed to execute SQL Statement")
            conn_FTP.delete_file(temp_filepath)
            conn_FTP.delete_directory(tmp_dir_remote)
            raise
        # Remove temp dir after commit
        conn_FTP.delete_file(temp_filepath)
        conn_FTP.delete_directory(tmp_dir_remote)
        logging.info('Removed temp folder')
Пример #3
0
    def bulk_load_ib(self,
                     table,
                     database,
                     tmp_file,
                     rel_path='/opt',
                     sep='\t',
                     header=0,
                     ftp_conn_id='ftp_default'):
        import uuid
        from airflow.contrib.hooks.ftp_hook import FTPHook
        """
        Loads a tab-delimited file into a database table using FTPHook for transfering
        CSV file since ICE don't support LOAD DATA LOCAL INFILE. Doesn't support IGNORE X LINES, NO HEADER

        # TODO : DELETE HEADER OPTION
        """

        logging.info('Load file to table : {}'.format(table))
        logging.info('Using Database: {}'.format(database))
        conn_FTP = FTPHook(ftp_conn_id=ftp_conn_id)
        # Remote temp file name and dir
        tmp_file_remote = database + '_' + table + '.csv'
        rnd = str(uuid.uuid4())
        tmp_dir_remote = '/ibl' + rnd + '/'
        conn_FTP.create_directory(tmp_dir_remote)
        logging.info('Temp folder created : {}'.format(tmp_dir_remote))
        temp_filepath = tmp_dir_remote + tmp_file_remote
        logging.info('Transfering file : {}'.format(temp_filepath))
        remote_filepath = rel_path + temp_filepath
        logging.info('Remote file : {}'.format(remote_filepath))
        try:
            conn_FTP.store_file(temp_filepath, tmp_file)
        except Exception as e:
            print(str(e))
            logging.warning("Failed to store file")
            conn_FTP.delete_directory(tmp_dir_remote)
            raise
        # Load Remote temp file uploaded to Infobright Server
        try:
            conn = self.get_conn()
            logging.info('Loading data to Infobright...')
            cur = conn.cursor()

            cur.execute("""
                set @bh_dataformat = 'txt_variable'
            """.format(**locals()))

            cur.execute("""
                set @BH_REJECT_FILE_PATH = '/opt/{rnd}_{database}_{table}_reject.txt'
            """.format(**locals()))

            cur.execute("""
                LOAD DATA INFILE '{remote_filepath}'
                INTO TABLE {database}.{table}
		        CHARACTER SET UTF8
                FIELDS TERMINATED BY '{sep}'
                """.format(**locals()))
            conn.commit()
            logging.info('Finished loading data')
        except:
            logging.warning("Failed to execute SQL Statement")
            conn_FTP.delete_file(temp_filepath)
            conn_FTP.delete_directory(tmp_dir_remote)
            raise
        # Remove temp dir after commit
        conn_FTP.delete_file(temp_filepath)
        conn_FTP.delete_directory(tmp_dir_remote)
        logging.info('Removed temp folder')