Пример #1
0
def _import_helper(repo: Dolt, table_name: str,
                   write_import_file: Callable[[str], None],
                   primary_keys: List[str], import_mode: str) -> None:
    import_modes = IMPORT_MODES_TO_FLAGS.keys()
    if import_mode is not None:
        assert import_mode in import_modes, 'update_mode must be one of: {}'.format(
            import_modes)
    else:
        if table_name in [table.name for table in repo.ls()]:
            logger.info(
                'No import mode specified, table exists, using "{}"'.format(
                    UPDATE))
            import_mode = UPDATE
        else:
            logger.info(
                'No import mode specified, table exists, using "{}"'.format(
                    CREATE))
            import_mode = CREATE

    import_flags = IMPORT_MODES_TO_FLAGS[import_mode]
    logger.info(
        'Importing to table {} in dolt directory located in {}, import mode {}'
        .format(table_name, repo.repo_dir(), import_mode))
    fp = tempfile.NamedTemporaryFile(suffix='.csv')
    write_import_file(fp.name)
    args = [
        'table', 'import', table_name, '--pk={}'.format(','.join(primary_keys))
    ] + import_flags
    repo.execute(args + [fp.name])
Пример #2
0
 def inner(repo: Dolt):
     _import_mode = import_mode or (
         'create' if table not in [t.name for t in repo.ls()] else 'update')
     data_to_load = _apply_file_transformers(get_data(), transformers)
     bulk_import(repo,
                 table,
                 data_to_load,
                 pk_cols,
                 import_mode=_import_mode)
     return table
Пример #3
0
    def inner(repo: Dolt):
        _transformers = transformers + [insert_unique_key
                                        ] if transformers else [
                                            insert_unique_key
                                        ]
        data = _apply_df_transformers(get_data(), _transformers)
        if table not in [t.name for t in repo.ls()]:
            raise ValueError('Missing table')

        # Get existing PKs
        existing = read_table(repo, table)
        existing_pks = existing[INSERTED_ROW_HASH_COL].to_list()

        # Get proposed PKs
        proposed_pks = data[INSERTED_ROW_HASH_COL].to_list()
        to_drop = [
            existing for existing in existing_pks
            if existing not in proposed_pks
        ]

        if to_drop:
            iterator = iter(to_drop)
            while iterator:
                batch = list(itertools.islice(iterator, 30000))
                if len(batch) == 0:
                    break

            logger.info('Dropping batch of {} IDs from table {}'.format(
                len(batch), table))
            drop_statement = '''
            DELETE FROM {table} WHERE {pk} in ("{pks_to_drop}")
            '''.format(table=table,
                       pk=INSERTED_ROW_HASH_COL,
                       pks_to_drop='","'.join(batch))
            repo.sql(query=drop_statement)

        new_data = data[~(data[INSERTED_ROW_HASH_COL].isin(existing_pks))]
        if not new_data.empty:
            logger.info('Importing {} records'.format(len(new_data)))
            import_df(repo, table, new_data, [INSERTED_ROW_HASH_COL], 'update')

        return table
Пример #4
0
def _import_helper(repo: Dolt, table_name: str,
                   write_import_file: Callable[[str], None],
                   primary_keys: List[str], import_mode: str) -> None:
    import_modes = IMPORT_MODES_TO_FLAGS.keys()
    if import_mode is not None:
        assert import_mode in import_modes, 'update_mode must be one of: {}'.format(
            import_modes)
    else:
        if table_name in [table.name for table in repo.ls()]:
            logger.info(
                'No import mode specified, table exists, using "{}"'.format(
                    UPDATE))
            import_mode = UPDATE
        else:
            logger.info(
                'No import mode specified, table exists, using "{}"'.format(
                    CREATE))
            import_mode = CREATE

    if import_mode == CREATE and primary_keys is None:
        raise ValueError(
            'Import mode CREATE requires a primary key to be specified')

    import_flags = IMPORT_MODES_TO_FLAGS[import_mode]
    logger.info(
        'Importing to table {} in dolt directory located in {}, import mode {}'
        .format(table_name, repo.repo_dir(), import_mode))

    fname = tempfile.mktemp(suffix='.csv')
    try:
        write_import_file(fname)
        args = ['table', 'import', table_name] + import_flags
        if import_mode == CREATE:
            args += ['--pk={}'.format(','.join(primary_keys))]

        repo.execute(args + [fname])
    finally:
        if os.path.exists(fname):
            os.remove(fname)