def init(branch=dolthub_branch): # check if repo already exists in our cwd cwd = os.getcwd() path = os.path.join(cwd, dolthub_repo) if os.path.isdir(path): print(' [*] DoltHub Repo found in ./{}, re-initializing'.format(dolthub_repo)) dolt = Dolt(path) # make sure the data isn't stale and pull new data print(' [*] Performing `dolt pull` to ensure repo is up to date') # check what branch we have b = Dolt.branch(dolt)[0].name print(' [*] Current Branch: {}'.format(b)) # if we are not on the branch passed, then switch if b != branch: try: print(' [*] Checking out branch: {}'.format(branch)) Dolt.checkout(dolt, branch=branch) # recheck the branch b = Dolt.branch(dolt)[0].name print(' [*] Current Branch: {}'.format(b)) except: pass p = Dolt.pull(dolt) s = Dolt.status(dolt) print(' [*] Current Status: {}'.format(s)) else: # clone the database from DoltHub, save it into a var to be referenced for read/write purposes print(' [*] Cloning DoltHub Repo: {} into ./{}'.format(dolthub_fullrepo, dolthub_repo)) dolt = Dolt.clone(dolthub_fullrepo, branch=branch) b = Dolt.branch(dolt)[0].name print(' [*] Current Branch: {}'.format(b)) return dolt
def new_branch(dolt, intake): # generates a modifier at the end # city-protect-import-py-5d365f9b new_branch = 'city-protect-import-py-{}'.format( str(uuid.uuid4()).split('-')[0]) try: print(' [*] Creating new Branch: {}'.format(new_branch)) b = Dolt.branch(dolt, branch_name=dolthub_branch, new_branch=new_branch, copy=True) b2 = Dolt.branch(intake, branch_name=intake_branch, new_branch=new_branch, copy=True) #print(b) Dolt.checkout(dolt, branch=new_branch) Dolt.checkout(intake, branch=new_branch) print(' [*] Checking out new branch...') # recheck the branch b = Dolt.branch(dolt)[0].name b2 = Dolt.branch(intake)[0].name print(' [*] Current Branch: {} / {}'.format(b, b2)) except: print(' [!] FATAL ERROR: CANNOT CHECKOUT NEW BRANCH. ABORTING') sys.exit()
def commit(dolt): branch = Dolt.branch(dolt)[0].name print(' [*] Commiting changes to {}'.format(branch)) # make sure we are in our custom branch and not a main one if 'city-protect' in branch: dolt.remote(name='origin', url=dolthub_fullrepo) dolt.add(['data_incident_reports', 'datasets']) dolt.commit('Data Added from cityprotect_load.py ETL Script') dolt.push('origin', branch, set_upstream='origin') print(' [*] Done!') else: print( '[!] ERROR: Cannot push to main branch. Aborting. Use dolt cli to migrate and finalize commit')
def _import_helper( dolt: Dolt, table: str, write_import_file: Callable[[str], None], import_mode: Optional[str] = None, primary_key: Optional[List[str]] = None, commit: Optional[bool] = False, commit_message: Optional[str] = None, commit_date: Optional[datetime.datetime] = None, ) -> None: import_mode = _get_import_mode_and_flags(dolt, table, import_mode) logger.info(f"Importing to table {table} in dolt directory located in {dolt.repo_dir()}, import mode {import_mode}") fname = tempfile.mktemp(suffix=".csv") import_flags = IMPORT_MODES_TO_FLAGS[import_mode] try: write_import_file(fname) args = ["table", "import", table] + import_flags if primary_key: args += ["--pk={}".format(",".join(primary_key))] dolt.execute(args + [fname]) if commit: msg = commit_message or f"Committing write to table {table} in {import_mode} mode" dolt.add(table) dolt.commit(msg, date=commit_date) finally: if os.path.exists(fname): os.remove(fname)
def _get_import_mode_and_flags(dolt: Dolt, table: str, import_mode: Optional[str] = None) -> str: import_modes = IMPORT_MODES_TO_FLAGS.keys() if import_mode and import_mode not in import_modes: raise ValueError(f"update_mode must be one of: {import_modes}") else: if table in [table.name for table in dolt.ls()]: logger.info(f'No import mode specified, table exists, using "{UPDATE}"') import_mode = UPDATE else: logger.info(f'No import mode specified, table exists, using "{CREATE}"') import_mode = CREATE return import_mode
def _add_test_table(dolt: Dolt, table_name: str): dolt.sql(query=f''' CREATE TABLE `{table_name}` ( `name` VARCHAR(32), `adjective` VARCHAR(32), `id` INT NOT NULL, `date_of_death` DATETIME, PRIMARY KEY (`id`) ); ''') dolt.add(table_name) dolt.commit('Created test table')
def sql_server(): p = None d = tempfile.TemporaryDirectory() try: db_path = os.path.join(d.name, "tracks") db = Dolt.init(db_path) db.sql("create table tracks (TrackId bigint, Name text)") db.sql( "insert into tracks values (0, 'Sue'), (1, 'L'), (2, 'M'), (3, 'Ji'), (4, 'Po')" ) db.sql("select dolt_commit('-am', 'Init tracks')") p = Popen(args=["dolt", "sql-server", "-l", "trace", "--port", "3307"], cwd=db_path) time.sleep(.5) yield db finally: if p is not None: p.kill() if os.path.exists(d.name): shutil.rmtree(d.name)
from doltpy.cli import Dolt from doltpy.sql import DoltSQLServerContext, ServerConfig import sqlalchemy as sa import os cur_dir = os.path.dirname(os.path.realpath(__file__)) f = open(cur_dir + "/../../private/.mysql_password", "r") mysql_password = f.read().strip("\n") # Setup objects to represents source and target databases, start Dolt SQL Server dolt = Dolt.clone('durst/csknow') dssc = DoltSQLServerContext(dolt, ServerConfig()) dssc.start_server() mysql_engine = sa.create_engine( '{dialect}://{user}:{password}@{host}:{port}/{database}'.format( dialect='mysql+mysqlconnector', user="******", password=mysql_password, host="localhost", port="3124", database="csknow" ) ) from doltpy.sql.sync import sync_schema_to_dolt, MYSQL_TO_DOLT_TYPE_MAPPING sync_schema_to_dolt(mysql_engine, dssc.engine, {"players":"players", "rounds":"rounds", "ticks":"ticks", "player_at_tick":"player_at_tick", "spotted":"spotted", "weapon_fire":"weapon_fire", "kills":"kills", "hurt":"hurt", "grenades":"grenades", "flashed":"flashed", "grenade_trajectories":"grenade_trajectories", "plants":"plants", "defusals":"defusals", "explosions":"explosions"}, MYSQL_TO_DOLT_TYPE_MAPPINGS)
from doltpy.cli import Dolt from doltpy.cli.read import read_pandas import sqlalchemy import pandas as pd import sys from sqlalchemy import create_engine from tqdm import tqdm engine = create_engine("mysql://*****:*****@ip:3306/data?charset=utf8") repo = Dolt('hospital-price-transparency') query = "SELECT code, short_description, long_description from cpt_hcpcs WHERE (short_description like '%%blood%%' and " \ "short_description like '%%transfusion%%') or (long_description like '%%blood%%' and " \ "long_description like '%%transfusion%%'); " def chunker(seq, size): return (seq[pos:pos + size] for pos in range(0, len(seq), size)) def insert_with_progress(df, table_name): chunksize = int(len(df) / 10) with tqdm(total=len(df)) as pbar: for i in range(10): pos = chunksize * i cdf = df.iloc[pos:pos + chunksize, :] cdf.to_sql(name=table_name, con=engine, if_exists="append",
def _read_table_sql(dolt: Dolt, sql: str) -> List[dict]: return dolt.sql(sql, result_format="csv")
def _write_helper(dolt: Dolt, data: List[dict], update_type: str): write_rows(dolt, TEST_TABLE, data, update_type, ['id'], commit=True) commit_hash, _ = dolt.log().popitem(last=False) return dolt, commit_hash
from nba_api.stats.static import players from nba_api.stats.endpoints import playercareerstats table_map = { 'CareerTotalsAllStarSeason': 'career_totals_allstar', 'CareerTotalsPostSeason': 'career_totals_post_season', 'CareerTotalsRegularSeason': 'career_totals_regular_season', 'SeasonRankingsPostSeason': 'rankings_post_season', 'SeasonRankingsRegularSeason': 'rankings_regular_season', 'SeasonTotalsAllStarSeason': 'season_totals_allstar', 'SeasonTotalsPostSeason': 'season_totals_post_season', 'SeasonTotalsRegularSeason': 'season_totals_regular_season' } repo = Dolt('.') # Import players players_df = pandas.DataFrame(players.get_players()) print(players_df) write_pandas(repo, 'players', players_df, import_mode='replace', primary_key=['id'], commit=False) # Import previously downloaded stats count = 1
import logging logger = logging.getLogger() logger.setLevel(logging.WARNING) from doltpy.cli import Dolt from doltpy.cli.write import write_pandas import pandas as pd if __name__ == "__main__": dolt = Dolt.init("foo") df_v1 = pd.DataFrame({"A": [1, 1, 1], "B": [1, 1, 1]}) df_v2 = pd.DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": [1, 1, 1, 2, 2, 2]}) write_pandas(dolt=dolt, table="bar", df=df_v1.reset_index(), primary_key=["index"], import_mode="create") dolt.add("bar") dolt.commit("Initialize bar") v1 = list(dolt.log(number="1").keys())[0] write_pandas(dolt=dolt, table="bar", df=df_v2.reset_index(), primary_key=["index"], import_mode="update") dolt.add("bar")