Пример #1
0
def create_test_table(init_empty_test_repo,
                      create_test_data) -> Tuple[Dolt, str]:
    repo, test_data_path = init_empty_test_repo, create_test_data
    repo.sql(query='''
        CREATE TABLE `test_players` (
            `name` LONGTEXT NOT NULL COMMENT 'tag:0',
            `id` BIGINT NOT NULL COMMENT 'tag:1',
            PRIMARY KEY (`id`)
        );
    ''')
    write_pandas(repo,
                 'test_players',
                 pd.read_csv(test_data_path),
                 UPDATE,
                 commit=False)
    yield repo, 'test_players'

    if 'test_players' in [table.name for table in repo.ls()]:
        _execute(['table', 'rm', 'test_players'], repo.repo_dir())
Пример #2
0
 def _insert_row_helper(repo, table, row):
     write_pandas(repo, table, row, UPDATE, commit=False)
Пример #3
0
def test_get_dirty_tables(create_test_table):
    repo, test_table = create_test_table
    message = 'Committing test data'

    # Some test data
    initial = pd.DataFrame({
        'id': [1],
        'name': ['Bianca'],
        'role': ['Champion']
    })
    appended_row = pd.DataFrame({
        'name': ['Serena'],
        'id': [2],
        'role': ['Runner-up']
    })

    def _insert_row_helper(repo, table, row):
        write_pandas(repo, table, row, UPDATE, commit=False)

    # existing, not modified
    repo.add(test_table)
    repo.commit(message)

    # existing, modified, staged
    modified_staged = 'modified_staged'
    write_pandas(repo, modified_staged, initial, commit=False)
    repo.add(modified_staged)

    # existing, modified, unstaged
    modified_unstaged = 'modified_unstaged'
    write_pandas(repo, modified_unstaged, initial, commit=False)
    repo.add(modified_unstaged)

    # Commit and modify data
    repo.commit(message)
    _insert_row_helper(repo, modified_staged, appended_row)
    write_pandas(repo, modified_staged, appended_row, UPDATE, commit=False)
    repo.add(modified_staged)
    write_pandas(repo, modified_unstaged, appended_row, UPDATE, commit=False)

    # created, staged
    created_staged = 'created_staged'
    write_pandas(repo,
                 created_staged,
                 initial,
                 import_mode=CREATE,
                 primary_key=['id'],
                 commit=False)
    repo.add(created_staged)

    # created, unstaged
    created_unstaged = 'created_unstaged'
    write_pandas(repo,
                 created_unstaged,
                 initial,
                 import_mode=CREATE,
                 primary_key=['id'],
                 commit=False)

    status = repo.status()

    expected_new_tables = {'created_staged': True, 'created_unstaged': False}
    expected_changes = {'modified_staged': True, 'modified_unstaged': False}

    assert status.added_tables == expected_new_tables
    assert status.modified_tables == expected_changes
Пример #4
0
def test_write_pandas(init_empty_test_repo):
    dolt = init_empty_test_repo
    write_pandas(dolt, 'characters', pd.DataFrame(TEST_ROWS), CREATE, ['id'])
    actual = read_rows(dolt, 'characters')
    expected = pd.DataFrame(TEST_ROWS).to_dict('records')
    compare_rows_helper(expected, actual)
    'SeasonRankingsRegularSeason': 'rankings_regular_season',
    'SeasonTotalsAllStarSeason': 'season_totals_allstar',
    'SeasonTotalsPostSeason': 'season_totals_post_season',
    'SeasonTotalsRegularSeason': 'season_totals_regular_season'
}

repo = Dolt('.')

# Import players
players_df = pandas.DataFrame(players.get_players())

print(players_df)

write_pandas(repo,
             'players',
             players_df,
             import_mode='replace',
             primary_key=['id'],
             commit=False)

# Import previously downloaded stats
count = 1
base = 'player-data'
player_ids = os.listdir(base)
total = len(player_ids)
for player_id in player_ids:
    print(f'{count}/{total}: {player_id}')

    for csvfile in os.listdir(f'{base}/{player_id}'):
        table_lookup = csvfile.split('.')[0]
        table_name = table_map.get(table_lookup)
Пример #6
0
logger = logging.getLogger()
logger.setLevel(logging.WARNING)

from doltpy.cli import Dolt
from doltpy.cli.write import write_pandas
import pandas as pd

if __name__ == "__main__":
    dolt = Dolt.init("foo")

    df_v1 = pd.DataFrame({"A": [1, 1, 1], "B": [1, 1, 1]})
    df_v2 = pd.DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": [1, 1, 1, 2, 2, 2]})

    write_pandas(dolt=dolt,
                 table="bar",
                 df=df_v1.reset_index(),
                 primary_key=["index"],
                 import_mode="create")
    dolt.add("bar")
    dolt.commit("Initialize bar")

    v1 = list(dolt.log(number="1").keys())[0]

    write_pandas(dolt=dolt,
                 table="bar",
                 df=df_v2.reset_index(),
                 primary_key=["index"],
                 import_mode="update")
    dolt.add("bar")
    dolt.commit("Add rows to bar")
Пример #7
0
def test_write_pandas(init_empty_test_repo):
    dolt = init_empty_test_repo
    write_pandas(dolt, "characters", pd.DataFrame(TEST_ROWS), CREATE, ["id"])
    actual = read_rows(dolt, "characters")
    expected = pd.DataFrame(TEST_ROWS).to_dict("records")
    compare_rows(expected, actual, "id")