def create_test_table(init_empty_test_repo, create_test_data) -> Tuple[Dolt, str]: repo, test_data_path = init_empty_test_repo, create_test_data repo.sql(query=''' CREATE TABLE `test_players` ( `name` LONGTEXT NOT NULL COMMENT 'tag:0', `id` BIGINT NOT NULL COMMENT 'tag:1', PRIMARY KEY (`id`) ); ''') write_pandas(repo, 'test_players', pd.read_csv(test_data_path), UPDATE, commit=False) yield repo, 'test_players' if 'test_players' in [table.name for table in repo.ls()]: _execute(['table', 'rm', 'test_players'], repo.repo_dir())
def _insert_row_helper(repo, table, row): write_pandas(repo, table, row, UPDATE, commit=False)
def test_get_dirty_tables(create_test_table): repo, test_table = create_test_table message = 'Committing test data' # Some test data initial = pd.DataFrame({ 'id': [1], 'name': ['Bianca'], 'role': ['Champion'] }) appended_row = pd.DataFrame({ 'name': ['Serena'], 'id': [2], 'role': ['Runner-up'] }) def _insert_row_helper(repo, table, row): write_pandas(repo, table, row, UPDATE, commit=False) # existing, not modified repo.add(test_table) repo.commit(message) # existing, modified, staged modified_staged = 'modified_staged' write_pandas(repo, modified_staged, initial, commit=False) repo.add(modified_staged) # existing, modified, unstaged modified_unstaged = 'modified_unstaged' write_pandas(repo, modified_unstaged, initial, commit=False) repo.add(modified_unstaged) # Commit and modify data repo.commit(message) _insert_row_helper(repo, modified_staged, appended_row) write_pandas(repo, modified_staged, appended_row, UPDATE, commit=False) repo.add(modified_staged) write_pandas(repo, modified_unstaged, appended_row, UPDATE, commit=False) # created, staged created_staged = 'created_staged' write_pandas(repo, created_staged, initial, import_mode=CREATE, primary_key=['id'], commit=False) repo.add(created_staged) # created, unstaged created_unstaged = 'created_unstaged' write_pandas(repo, created_unstaged, initial, import_mode=CREATE, primary_key=['id'], commit=False) status = repo.status() expected_new_tables = {'created_staged': True, 'created_unstaged': False} expected_changes = {'modified_staged': True, 'modified_unstaged': False} assert status.added_tables == expected_new_tables assert status.modified_tables == expected_changes
def test_write_pandas(init_empty_test_repo): dolt = init_empty_test_repo write_pandas(dolt, 'characters', pd.DataFrame(TEST_ROWS), CREATE, ['id']) actual = read_rows(dolt, 'characters') expected = pd.DataFrame(TEST_ROWS).to_dict('records') compare_rows_helper(expected, actual)
'SeasonRankingsRegularSeason': 'rankings_regular_season', 'SeasonTotalsAllStarSeason': 'season_totals_allstar', 'SeasonTotalsPostSeason': 'season_totals_post_season', 'SeasonTotalsRegularSeason': 'season_totals_regular_season' } repo = Dolt('.') # Import players players_df = pandas.DataFrame(players.get_players()) print(players_df) write_pandas(repo, 'players', players_df, import_mode='replace', primary_key=['id'], commit=False) # Import previously downloaded stats count = 1 base = 'player-data' player_ids = os.listdir(base) total = len(player_ids) for player_id in player_ids: print(f'{count}/{total}: {player_id}') for csvfile in os.listdir(f'{base}/{player_id}'): table_lookup = csvfile.split('.')[0] table_name = table_map.get(table_lookup)
logger = logging.getLogger() logger.setLevel(logging.WARNING) from doltpy.cli import Dolt from doltpy.cli.write import write_pandas import pandas as pd if __name__ == "__main__": dolt = Dolt.init("foo") df_v1 = pd.DataFrame({"A": [1, 1, 1], "B": [1, 1, 1]}) df_v2 = pd.DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": [1, 1, 1, 2, 2, 2]}) write_pandas(dolt=dolt, table="bar", df=df_v1.reset_index(), primary_key=["index"], import_mode="create") dolt.add("bar") dolt.commit("Initialize bar") v1 = list(dolt.log(number="1").keys())[0] write_pandas(dolt=dolt, table="bar", df=df_v2.reset_index(), primary_key=["index"], import_mode="update") dolt.add("bar") dolt.commit("Add rows to bar")
def test_write_pandas(init_empty_test_repo): dolt = init_empty_test_repo write_pandas(dolt, "characters", pd.DataFrame(TEST_ROWS), CREATE, ["id"]) actual = read_rows(dolt, "characters") expected = pd.DataFrame(TEST_ROWS).to_dict("records") compare_rows(expected, actual, "id")