Пример #1
0
    def tile(cls, op):
        from cupid.runtime import RuntimeContext

        if RuntimeContext.is_context_ready():
            return cls._tile_cupid(op)
        else:
            return cls._tile_tunnel(op)
Пример #2
0
    def execute(cls, ctx, op):
        from cupid.runtime import RuntimeContext

        if RuntimeContext.is_context_ready():
            cls._execute_in_cupid(ctx, op)
        else:
            cls._execute_arrow_tunnel(ctx, op)
Пример #3
0
    def get_bearer_token():
        from cupid.runtime import context, RuntimeContext

        if not RuntimeContext.is_context_ready():
            return
        cupid_context = context()
        return cupid_context.get_bearer_token()
Пример #4
0
def _handle_terminate_instance(sock):
    from cupid.runtime import context, RuntimeContext
    from odps import ODPS
    from odps.accounts import BearerTokenAccount

    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with key
        cmd_body = pickle.loads(sock.recv(cmd_len))

        instance_id = cmd_body['instance_id']

        if not RuntimeContext.is_context_ready():
            logger.warning('Cupid context not ready')
        else:
            bearer_token = context().get_bearer_token()
            account = BearerTokenAccount(bearer_token)
            project = os.environ['ODPS_PROJECT_NAME']
            endpoint = os.environ['ODPS_RUNTIME_ENDPOINT']
            o = ODPS(None,
                     None,
                     account=account,
                     project=project,
                     endpoint=endpoint)

            o.stop_instance(instance_id)
    except:
        logger.exception('Failed to put kv value')
        _write_request_result(sock, False, exc_info=sys.exc_info())
Пример #5
0
def _handle_commit_table_upload_session(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with odps_params, table_name, cupid_handle, blocks, overwrite
        commit_config = pickle.loads(sock.recv(cmd_len))

        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.runtime import RuntimeContext
        from cupid.io.table import CupidTableUploadSession

        if not RuntimeContext.is_context_ready():
            raise SystemError(
                'No Mars cluster found, please create via `o.create_mars_cluster`.'
            )
        cupid_ctx = context()

        odps_params = commit_config['odps_params']
        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME',
                                 None) or odps_params['project']
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=project,
                 endpoint=endpoint)
        cupid_session = CupidSession(o)

        project_name, table_name = commit_config['table_name'].split('.')
        upload_session = CupidTableUploadSession(
            session=cupid_session,
            table_name=table_name,
            project_name=project_name,
            handle=commit_config['cupid_handle'],
            blocks=commit_config['blocks'])
        upload_session.commit(overwrite=commit_config['overwrite'])

        _write_request_result(sock)
    except:
        logger.exception('Failed to commit upload session')
        _write_request_result(sock, False, exc_info=sys.exc_info())
Пример #6
0
def _handle_put_kv(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with key
        cmd_body = pickle.loads(sock.recv(cmd_len))

        from cupid.runtime import RuntimeContext

        if not RuntimeContext.is_context_ready():
            logger.warning('Cupid context not ready')
        else:
            from cupid import context
            cupid_kv = context().kv_store()
            cupid_kv[cmd_body['key']] = cmd_body['value']

        _write_request_result(sock)
    except:
        logger.exception('Failed to put kv value')
        _write_request_result(sock, False, exc_info=sys.exc_info())
Пример #7
0
def _handle_create_table_upload_session(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with odps_params, table_name
        session_config = pickle.loads(sock.recv(cmd_len))

        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.runtime import RuntimeContext

        if not RuntimeContext.is_context_ready():
            raise SystemError(
                'No Mars cluster found, please create via `o.create_mars_cluster`.'
            )
        cupid_ctx = context()

        odps_params = session_config['odps_params']
        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME',
                                 None) or odps_params['project']
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=project,
                 endpoint=endpoint)
        cupid_session = CupidSession(o)

        data_src = o.get_table(session_config['table_name'])

        logger.debug('Start creating upload session from cupid.')
        upload_session = cupid_session.create_upload_session(data_src)

        ret_data = {
            'handle': upload_session.handle,
        }
        _write_request_result(sock, result=ret_data)
    except:
        logger.exception('Failed to create upload session')
        _write_request_result(sock, False, exc_info=sys.exc_info())
Пример #8
0
    def _extract_pod_name_ep(self, pod_data):
        from cupid.runtime import RuntimeContext

        pod_name = pod_data['metadata']['name']
        if not RuntimeContext.is_context_ready():
            logger.debug('Cupid context not ready, pod name: {}'.format(pod_name))
            return pod_name, None

        if pod_name in self._pod_to_port:
            pod_port = self._pod_to_port[pod_name]
        else:
            pod_kv_data = self.cupid_kv.get(pod_name)
            if pod_kv_data:
                pod_port = self._pod_to_port[pod_name] = \
                    json.loads(pod_kv_data)['endpoint'].rsplit(':', 1)[-1]
                logger.debug('Get port from kvstore, name: {}, port: {}'.format(pod_name, pod_port))
            else:
                pod_port = None
                logger.debug('Cannot get port from kvstore, name: {}'.format(pod_name))
        pod_endpoint = '%s:%s' % (pod_data['status']['pod_ip'], pod_port)
        return pod_name, pod_endpoint if pod_port else None
Пример #9
0
def _handle_get_kv(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with key
        cmd_body = pickle.loads(sock.recv(cmd_len))

        from cupid.runtime import RuntimeContext

        if not RuntimeContext.is_context_ready():
            logger.warning('Cupid context not ready')
            value = None
        else:
            from cupid import context
            cupid_kv = context().kv_store()
            value = cupid_kv.get(cmd_body['key'])

        ret_data = {
            'value': value,
        }
        _write_request_result(sock, result=ret_data)
    except:
        logger.exception('Failed to get kv value')
        _write_request_result(sock, False, exc_info=sys.exc_info())
Пример #10
0
    def _tile_cupid(cls, op):
        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.runtime import RuntimeContext

        if not RuntimeContext.is_context_ready():
            raise SystemError(
                'No Mars cluster found, please create via `o.create_mars_cluster`.'
            )
        cupid_ctx = context()

        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME', None)
        odps_params = op.odps_params.copy()
        if project:
            odps_params['project'] = project
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=odps_params['project'],
                 endpoint=endpoint)
        cupid_session = CupidSession(o)

        data_src = o.get_table(op.table_name)

        logger.debug('Start creating upload session from cupid.')
        upload_session = cupid_session.create_upload_session(data_src)

        input_df = build_concatenated_rows_frame(op.inputs[0])
        out_df = op.outputs[0]

        out_chunks = []
        out_chunk_shape = (0, ) * len(input_df.shape)
        blocks = {}
        for chunk in input_df.chunks:
            block_id = str(int(time.time())) + '_' + str(uuid.uuid4()).replace(
                '-', '')
            chunk_op = DataFrameWriteTableSplit(
                dtypes=op.dtypes,
                table_name=op.table_name,
                unknown_as_string=op.unknown_as_string,
                partition_spec=op.partition_spec,
                cupid_handle=to_str(upload_session.handle),
                block_id=block_id,
                write_batch_size=op.write_batch_size)
            out_chunk = chunk_op.new_chunk([chunk],
                                           shape=out_chunk_shape,
                                           index=chunk.index,
                                           index_value=out_df.index_value,
                                           dtypes=chunk.dtypes)
            out_chunks.append(out_chunk)
            blocks[block_id] = op.partition_spec

        # build commit tree
        combine_size = 8
        chunks = out_chunks
        while len(chunks) >= combine_size:
            new_chunks = []
            for i in range(0, len(chunks), combine_size):
                chks = chunks[i:i + combine_size]
                if len(chks) == 1:
                    chk = chks[0]
                else:
                    chk_op = DataFrameWriteTableCommit(dtypes=op.dtypes,
                                                       is_terminal=False)
                    chk = chk_op.new_chunk(chks,
                                           shape=out_chunk_shape,
                                           index_value=out_df.index_value,
                                           dtypes=op.dtypes)
                new_chunks.append(chk)
            chunks = new_chunks

        assert len(chunks) < combine_size

        commit_table_op = DataFrameWriteTableCommit(dtypes=op.dtypes,
                                                    table_name=op.table_name,
                                                    blocks=blocks,
                                                    cupid_handle=to_str(
                                                        upload_session.handle),
                                                    overwrite=op.overwrite,
                                                    odps_params=op.odps_params,
                                                    is_terminal=True)
        commit_table_chunk = commit_table_op.new_chunk(
            chunks,
            shape=out_chunk_shape,
            dtypes=op.dtypes,
            index_value=out_df.index_value)

        new_op = op.copy()
        return new_op.new_dataframes(op.inputs,
                                     shape=out_df.shape,
                                     index_value=out_df.index_value,
                                     dtypes=out_df.dtypes,
                                     columns_value=out_df.columns_value,
                                     chunks=[commit_table_chunk],
                                     nsplits=((0, ), ) * len(out_chunk_shape))
Пример #11
0
def to_mars_dataframe(odps, table_name, shape=None, partition=None, chunk_bytes=None,
                      sparse=False, columns=None, add_offset=False, calc_nrows=True,
                      use_arrow_dtype=False, string_as_binary=None,
                      cupid_internal_endpoint=None):
    """
    Read table to Mars DataFrame.

    :param table_name: table name
    :param shape: table shape. A tuple like (1000, 3) which means table count is 1000 and schema length is 3.
    :param partition: partition spec.
    :param chunk_bytes: Bytes to read for each chunk. Default value is '16M'.
    :param sparse: if read as sparse DataFrame.
    :param columns: selected columns.
    :param add_offset: if standardize the DataFrame's index to RangeIndex. False as default.
    :param calc_nrows: if calculate nrows if shape is not specified.
    :param use_arrow_dtype: read to arrow dtype. Reduce memory in some saces.
    :param string_as_binary: read string columns as binary type.
    :return: Mars DataFrame.
    """
    from cupid.runtime import RuntimeContext
    from .dataframe import read_odps_table
    from ..utils import init_progress_ui

    odps_params = dict(
        project=odps.project, endpoint=cupid_internal_endpoint or cupid_options.cupid.runtime.endpoint)

    data_src = odps.get_table(table_name)

    odps_schema = data_src.schema
    if len(odps_schema.partitions) != 0:
        if partition is None:
            raise TypeError('Partition should be specified.')

    for col in columns or []:
        if col not in odps_schema.names:
            raise TypeError("Specific column {} doesn't exist in table".format(col))

    # persist view table to a temp table
    if data_src.is_virtual_view:
        temp_table_name = table_name + '_temp_mars_table_' + str(uuid.uuid4()).replace('-', '_')
        odps.create_table(temp_table_name, schema=data_src.schema, stored_as='aliorc', lifecycle=1)
        data_src.to_df().persist(temp_table_name)
        table_name = temp_table_name
        data_src = odps.get_table(table_name)

    # get dataframe's shape
    if shape is None:
        if calc_nrows and not RuntimeContext.is_context_ready():
            # obtain count
            if partition is None:
                odps_df = data_src.to_df()
            else:
                odps_df = data_src.get_partition(partition).to_df()
            nrows = odps_df.count().execute(use_tunnel=False, ui=init_progress_ui(mock=True))
        else:
            nrows = np.nan

        shape = (nrows, len(data_src.schema.simple_columns))

    return read_odps_table(odps.get_table(table_name), shape, partition=partition,
                           chunk_bytes=chunk_bytes, sparse=sparse, columns=columns,
                           odps_params=odps_params, add_offset=add_offset,
                           use_arrow_dtype=use_arrow_dtype, string_as_binary=string_as_binary)
Пример #12
0
    def tile(cls, op):
        import numpy as np
        import pandas as pd
        from odps import ODPS
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.runtime import RuntimeContext
        from mars.context import get_context

        if not RuntimeContext.is_context_ready():
            raise SystemError(
                'No Mars cluster found, please create via `o.create_mars_cluster`.'
            )

        cupid_ctx = context()

        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME', None)
        odps_params = op.odps_params.copy()
        if project:
            odps_params['project'] = project
        o = ODPS(None, None, account=account, **odps_params)
        cupid_session = CupidSession(o)

        mars_context = get_context()

        df = op.outputs[0]
        split_size = df.extra_params.chunk_bytes or READ_CHUNK_LIMIT

        data_src = o.get_table(op.table_name)
        if op.partition is not None:
            data_src = data_src.get_partition(op.partition)

        try:
            data_store_size = data_src.size
        except ODPSError:
            # fail to get data size, just ignore
            pass
        else:
            if data_store_size < split_size and mars_context is not None:
                # get worker counts
                worker_count = max(len(mars_context.get_worker_addresses()), 1)
                # data is too small, split as many as number of cores
                split_size = data_store_size // worker_count
                # at least 1M
                split_size = max(split_size, 1 * 1024**2)
                logger.debug(
                    'Input data size is too small, split_size is {}'.format(
                        split_size))

        logger.debug(
            'Start creating download session of table {} from cupid.'.format(
                op.table_name))
        while True:
            try:
                download_session = cupid_session.create_download_session(
                    data_src, split_size=split_size, columns=op.columns)
                break
            except CupidError:
                logger.debug(
                    'The number of splits exceeds 100000, split_size is {}'.
                    format(split_size))
                if split_size >= MAX_CHUNK_SIZE:
                    raise
                else:
                    split_size *= 2

        logger.debug('%s table splits have been created.',
                     str(len(download_session.splits)))

        if np.isnan(df.shape[0]):
            est_chunk_rows = [None] * len(download_session.splits)
        else:
            sp_file_sizes = np.array([
                sp.split_file_end - sp.split_file_start
                for sp in download_session.splits
            ])
            total_size = sp_file_sizes.sum()
            est_chunk_rows = sp_file_sizes * df.shape[0] // total_size

        logger.warning('Estimated chunk rows: %r', est_chunk_rows)

        out_chunks = []

        if len(download_session.splits) == 0:
            logger.debug('Table {} has no data'.format(op.table_name))
            chunk_op = DataFrameReadTableSplit()
            index_value = parse_index(pd.RangeIndex(0))
            columns_value = parse_index(df.dtypes.index, store_data=True)
            out_chunk = chunk_op.new_chunk(None,
                                           shape=(np.nan, df.shape[1]),
                                           dtypes=op.dtypes,
                                           index_value=index_value,
                                           columns_value=columns_value,
                                           index=(0, 0))
            out_chunks = [out_chunk]
        else:
            for idx, split in enumerate(download_session.splits):
                chunk_op = DataFrameReadTableSplit(
                    cupid_handle=to_str(split.handle),
                    split_index=split.split_index,
                    split_file_start=split.split_file_start,
                    split_file_end=split.split_file_end,
                    schema_file_start=split.schema_file_start,
                    schema_file_end=split.schema_file_end,
                    add_offset=op.add_offset,
                    dtypes=op.dtypes,
                    sparse=op.sparse,
                    split_size=split_size,
                    string_as_binary=op.string_as_binary,
                    use_arrow_dtype=op.use_arrow_dtype,
                    estimate_rows=est_chunk_rows[idx])
                # the chunk shape is unknown
                index_value = parse_index(pd.RangeIndex(0))
                columns_value = parse_index(df.dtypes.index, store_data=True)
                out_chunk = chunk_op.new_chunk(None,
                                               shape=(np.nan, df.shape[1]),
                                               dtypes=op.dtypes,
                                               index_value=index_value,
                                               columns_value=columns_value,
                                               index=(idx, 0))
                out_chunks.append(out_chunk)

        if op.add_offset:
            out_chunks = standardize_range_index(out_chunks)

        new_op = op.copy()
        nsplits = ((np.nan, ) * len(out_chunks), (df.shape[1], ))
        return new_op.new_dataframes(None,
                                     shape=df.shape,
                                     dtypes=op.dtypes,
                                     index_value=df.index_value,
                                     columns_value=df.columns_value,
                                     chunks=out_chunks,
                                     nsplits=nsplits)
Пример #13
0
def _handle_create_table_download_session(sock):
    try:
        cmd_len, = struct.unpack('<I', sock.recv(4))
        # dict with odps_params, table_name, partition, columns, worker_count, split_size, max_chunk_num
        session_config = pickle.loads(sock.recv(cmd_len))

        from odps import ODPS
        from odps.errors import ODPSError
        from odps.accounts import BearerTokenAccount
        from cupid import CupidSession, context
        from cupid.errors import CupidError
        from cupid.runtime import RuntimeContext

        if not RuntimeContext.is_context_ready():
            raise SystemError(
                'No Mars cluster found, please create via `o.create_mars_cluster`.'
            )

        cupid_ctx = context()

        odps_params = session_config['odps_params']
        bearer_token = cupid_ctx.get_bearer_token()
        account = BearerTokenAccount(bearer_token)
        project = os.environ.get('ODPS_PROJECT_NAME',
                                 None) or odps_params['project']
        endpoint = os.environ.get(
            'ODPS_RUNTIME_ENDPOINT') or odps_params['endpoint']
        o = ODPS(None,
                 None,
                 account=account,
                 project=project,
                 endpoint=endpoint)
        cupid_session = CupidSession(o)

        split_size = session_config['split_size']
        table_name = session_config['table_name']
        data_src = o.get_table(table_name)
        if session_config.get('partition') is not None:
            data_src = data_src.get_partition(session_config['partition'])

        try:
            data_store_size = data_src.size
        except ODPSError:
            # fail to get data size, just ignore
            pass
        else:
            worker_count = session_config['worker_count']
            if data_store_size < split_size and worker_count is not None:
                # data is too small, split as many as number of cores
                split_size = data_store_size // worker_count
                # at least 1M
                split_size = max(split_size, 1 * 1024**2)
                logger.debug(
                    'Input data size is too small, split_size is {}'.format(
                        split_size))

        max_chunk_num = session_config['max_chunk_num']
        columns = session_config['columns']
        with_split_meta = session_config.get('with_split_meta_on_tile')

        logger.debug(
            'Start creating download session of table %s from cupid, columns %r',
            table_name, columns)
        while True:
            try:
                download_session = cupid_session.create_download_session(
                    data_src,
                    split_size=split_size,
                    columns=columns,
                    with_split_meta=with_split_meta)
                break
            except CupidError:
                logger.debug(
                    'The number of splits exceeds 100000, split_size is {}'.
                    format(split_size))
                if split_size >= max_chunk_num:
                    raise
                else:
                    split_size *= 2

        ret_data = {
            'splits': download_session.splits,
            'split_size': split_size,
        }
        _write_request_result(sock, result=ret_data)
    except:
        logger.exception('Failed to create download session')
        _write_request_result(sock, False, exc_info=sys.exc_info())
Пример #14
0
def persist_mars_dataframe(odps,
                           df,
                           table_name,
                           overwrite=False,
                           partition=None,
                           write_batch_size=None,
                           unknown_as_string=None,
                           as_type=None,
                           drop_table=False,
                           create_table=True,
                           drop_partition=False,
                           create_partition=None,
                           lifecycle=None,
                           runtime_endpoint=None,
                           **kw):
    """
    Write Mars DataFrame to table.

    :param df: Mars DataFrame.
    :param table_name: table to write.
    :param overwrite: if overwrite the data. False as default.
    :param partition: partition spec.
    :param write_batch_size: batch size of records to write. 1024 as default.
    :param unknown_as_string: set the columns to string type if it's type is Object.
    :param as_type: specify column dtypes. {'a': 'string'} will set column `a` as string type.
    :param drop_table: drop table if exists, False as default
    :param create_table: create table first if not exits, True as default
    :param drop_partition: drop partition if exists, False as default
    :param create_partition: create partition if not exists, None as default
    :param lifecycle: table lifecycle. If absent, `options.lifecycle` will be used.

    :return: None
    """
    from .dataframe import write_odps_table
    from odps.tunnel import TableTunnel

    dtypes = df.dtypes
    odps_types = []
    names = []
    for name, t in zip(dtypes.keys(), list(dtypes.values)):
        names.append(name)
        if as_type and name in as_type:
            odps_types.append(as_type[name])
        else:
            odps_types.append(
                pd_type_to_odps_type(t,
                                     name,
                                     unknown_as_string=unknown_as_string))
    if partition:
        p = PartitionSpec(partition)
        schema = Schema.from_lists(names, odps_types, p.keys,
                                   ['string'] * len(p))
    else:
        schema = Schema.from_lists(names, odps_types)

    if drop_table:
        odps.delete_table(table_name, if_exists=True)

    if partition is None:
        # the non-partitioned table
        if drop_partition:
            raise ValueError('Cannot drop partition for non-partition table')
        if create_partition:
            raise ValueError('Cannot create partition for non-partition table')

        if create_table or (not odps.exist_table(table_name)):
            odps.create_table(table_name,
                              schema,
                              if_not_exists=True,
                              stored_as='aliorc',
                              lifecycle=lifecycle)
    else:
        if odps.exist_table(table_name) or not create_table:
            t = odps.get_table(table_name)
            table_partition = t.get_partition(partition)
            if drop_partition:
                t.delete_partition(table_partition, if_exists=True)
            if create_partition:
                t.create_partition(table_partition, if_not_exists=True)

        else:
            odps.create_table(table_name,
                              schema,
                              stored_as='aliorc',
                              lifecycle=lifecycle)

    table = odps.get_table(table_name)

    if len(table.schema.simple_columns) != len(schema.simple_columns):
        raise TypeError(
            'Table column number is %s while input DataFrame has %s columns' %
            (len(table.schema.simple_columns), len(schema.simple_columns)))

    for c_left, c_right in zip(table.schema.simple_columns,
                               schema.simple_columns):
        if c_left.name.lower() != c_right.name.lower(
        ) or c_left.type != c_right.type:
            raise TypeError(
                'Column type between provided DataFrame and target table'
                ' does not agree with each other. DataFrame column %s type is %s,'
                'target table column %s type is %s' %
                (c_right.name, c_right.type, c_left.name, c_left.type))

    if partition:
        table.create_partition(partition, if_not_exists=True)
    runtime_endpoint = (runtime_endpoint
                        or kw.pop('cupid_internal_endpoint', None)
                        or cupid_options.cupid.runtime.endpoint)
    odps_params = dict(project=odps.project, endpoint=runtime_endpoint)
    if isinstance(odps.account, AliyunAccount):
        odps_params.update(
            dict(access_id=odps.account.access_id,
                 secret_access_key=odps.account.secret_access_key))
    if isinstance(df, pd.DataFrame):
        from cupid.runtime import RuntimeContext
        import pyarrow as pa

        if RuntimeContext.is_context_ready():
            _write_table_in_cupid(odps,
                                  df,
                                  table,
                                  partition=partition,
                                  overwrite=overwrite,
                                  unknown_as_string=unknown_as_string)
        else:
            t = odps.get_table(table_name)
            tunnel = TableTunnel(odps, project=t.project)

            if partition is not None:
                upload_session = tunnel.create_upload_session(
                    t.name, partition_spec=partition)
            else:
                upload_session = tunnel.create_upload_session(t.name)

            writer = upload_session.open_arrow_writer(0)
            arrow_rb = pa.RecordBatch.from_pandas(df)
            writer.write(arrow_rb)
            writer.close()
            upload_session.commit([0])

    else:
        write_odps_table(df,
                         table,
                         partition=partition,
                         overwrite=overwrite,
                         odps_params=odps_params,
                         unknown_as_string=unknown_as_string,
                         write_batch_size=write_batch_size).execute()