def _import_and_commit(dolt: Dolt, table: str, data: pd.DataFrame, primary_keys: Optional[List[str]], import_mode: str): dolt_write.import_df(dolt, table, pd.DataFrame(data), primary_keys, import_mode) dolt.add(table) dolt.commit('Executed import on table {} in import mode "{}"'.format( table, import_mode)) commit = dolt.log()[0] return { 'commit_hash': commit.hash, 'timestamp': commit.ts, 'author': commit.author, 'message': commit.message }
def main(): #cleanup() repo_path = "." repo = Dolt(repo_path) act_branch, branches = repo.branch() brn_cnt=len(branches) print() answer = '' printFlag = False while answer != 'q': print("0 - List Branches") print("1 - List Active Branch") print("2 - Get Branch Commits/Revisions") print("3 - Change Branch") print("4 - Create Branch") print("5 - Merge Branch") print("6 - Delete Branch") print("7 - List Revision Range") print("8 - Checkout a Revision") print("9 - Enable List Commits Printing") print("10- Add Revisions") print("11- Batch Revisions") print("q - Quit") answer = input("Select item : ") #try: # List Branches if answer == '0': act_branch, branches = repo.branch() brn_cnt=len(branches) for brn_id in range(0,brn_cnt): branchData = str(branches[brn_id]).split(":", 2) branchName = str(branchData[1]).split(",", 1) print (branchName[0]) # List Active Branch elif answer == '1': print() print("Active Branch:") print(act_branch) print() # Get Branch Commits/Revisions elif answer == '2': start_time = datetime.datetime.now() commits = list(repo.log().values()) end_time = datetime.datetime.now() com_cnt=len(commits) print("Total Revisions = ", com_cnt) print("Total Commit Revision Pull Time = ", end_time-start_time) for com_id in range(0,com_cnt): com_data = str(commits[com_id]).split(":", 1) msg = com_data[1].split(" @ ", 1) if printFlag == True: print("commit_id: ", com_id, msg[1]) # Change Branch elif answer == '3': myBranch = input("Branch Name : ") switch_branch(repo, myBranch) act_branch, branches = repo.branch() brn_cnt=len(branches) start_time = datetime.datetime.now() commits = list(repo.log().values()) end_time = datetime.datetime.now() com_cnt=len(commits) print("Total Commit Revision Pull Time = ", end_time-start_time) # Create Branch elif answer == '4': print() print("Active Branch:") print(act_branch) print() newBranch = input("Enter New Branch Name : ") start_time = datetime.datetime.now() create_branch(repo, newBranch) end_time = datetime.datetime.now() print("Total Create Branch Time = ", end_time-start_time) # Merge Branch elif answer == '5': print() print("Active Branch:") print(act_branch) print() mergeBranch = input("Enter Merge Branch Name : ") start_time = datetime.datetime.now() merge_branch(repo, mergeBranch, "Merging Branch") end_time = datetime.datetime.now() print("Total Merge Branch Time = ", end_time-start_time) # Delete Branch elif answer == '6': print() print("Active Branch:") print(act_branch) print() deleteBranch = input("Enter Branch Name To Delete : ") start_time = datetime.datetime.now() delete_branch(repo, deleteBranch) end_time = datetime.datetime.now() print("Total Create Branch Time = ", end_time-start_time) # List Revision Range elif answer == '7': print() print("Active Branch:") print(act_branch) print() revision_id = int(input("Enter Start Revision ID : ")) revision_max = int(input("Enter Revision Range : ")) for com_id in range(revision_id,revision_id+revision_max): com_data = str(commits[com_id]).split(":", 1) msg = com_data[1].split(" @ ", 1) print("commit_id: ", com_id, msg[1]) # Checkout a Revision elif answer == '8': print() print("Active Branch:") print(act_branch) print() revision_id = int(input("Enter Revision ID : ")) com_data = str(commits[revision_id]).split(":", 1) msg = com_data[1].split(" @ ", 1) startBranch = "revision"+str(revision_id) start_time = datetime.datetime.now() starting_point_branch(repo, startBranch, com_data[0]) end_time = datetime.datetime.now() print("Total Merge Branch Time = ", end_time-start_time) # Enable List Commits Printing elif answer == '9': printFlag = True # Add Revisions elif answer == '10': print() print("Active Branch:") print(act_branch) print() person = str(input("Enter First Name : ")) revision_id = int(input("Enter Start Revision ID : ")) revision_max = int(input("Enter End Revision ID : ")) start_time = datetime.datetime.now() for revId in range(revision_id,revision_max+1): change_person_revid(repo, person, revId) comment=f"{person} RevID="+str(revId).zfill(2) commit(repo, comment) end_time = datetime.datetime.now() print("Total Revision Addition Time = ", end_time-start_time) print(f"Revisions {revision_id} -> {revision_max}") # Batch Revisions elif answer == '11': print() print("Active Branch:") print(act_branch) print() person = str(input("Enter First Name : ")) revision_id = int(input("Enter Start Revision ID : ")) revision_max = int(input("Enter Revision Range : ")) step = 50 start_time = datetime.datetime.now() batch_revisions(repo, revision_id, revision_max, step, person) end_time = datetime.datetime.now() print("Total Revision Addition Time = ", end_time-start_time)
class DoltDT(object): def __init__(self, run=None, database: str = ".", branch: str = 'master'): """ Initialize a new context for Dolt operations with Metaflow. run: this is either - a FlowSpec when initialized with a running Flow - a Flow when looking across for data read/written across runs of a Flow - a Run when looking for data read/written by a specific run doltdb_path: this is a path to a location on the filesystem with a Dolt database """ self.run = run self.database = database self.branch = branch self.meta_database = "." self.doltdb = Dolt(self.database) try: self.meta_doltdb = Dolt(os.getcwd()) except: self.meta_doltdb = Dolt.init(os.getcwd()) current_branch, _ = self.doltdb.branch() self.entry_branch = None if current_branch.name != branch: entry_branch = current_branch.name self.doltdb.checkout(branch, checkout_branch=False) self.table_reads = [] self.table_writes = [] def __enter__(self): assert isinstance( self.run, FlowSpec ) and current.is_running_flow, 'Context manager use requires running flow' assert self.doltdb.status( ).is_clean, 'DoltDT as context manager requires clean working set for transaction semantics' return self def __exit__(self, *args, allow_empty: bool = True): if not self.doltdb.status().is_clean: self.commit_writes() if self.table_reads or self.table_writes: self.commit_metadata() def _get_table_read(self, table: str) -> DoltRead: return self._get_dolt_action('read', DoltRead, table) def _get_table_write(self, table: str) -> DoltWrite: return self._get_dolt_action('write', DoltWrite, table) def _get_dolt_action(self, action_str: str, action: type, table: str): return action( flow_name=current.flow_name, run_id=current.run_id, step_name=current.step_name, task_id=current.task_id, commit=self._get_latest_commit_hash(), table_name=table, database=self.database, kind=action_str, ) def _get_latest_commit_hash(self) -> str: lg = self.doltdb.log() return lg.popitem(last=False)[0] def write_metadata(self, data: List[DoltMeta]): """Important that write metadata commit is recorded immediately after the data commit""" meta_df = pd.DataFrame.from_records( [x.dict() for x in self.table_reads + self.table_writes]) import_df(repo=self.meta_doltdb, table_name="metadata", data=meta_df, primary_keys=meta_df.columns.tolist()) def write_table(self, table_name: str, df: pd.DataFrame, pks: List[str]): """ Writes the contents of the given DataFrame to the specified table. If the table exists it is updated, if it does not it is created. """ assert current.is_running_flow, 'Writes and commits are only supported in a running Flow' import_df(repo=self.doltdb, table_name=table_name, data=df, primary_keys=pks) self.table_writes.append(self._get_table_write(table_name)) def read_table(self, table_name: str, commit: str = None, flow_name: str = None, run_id: str = None) -> pd.DataFrame: """ Returns the specified tables as a DataFrame. """ if not current.is_running_flow: raise ValueError("read_table is only supported in a running Flow") read_meta = self._get_table_read(table_name) if commit: table = self._get_dolt_table_asof(self.doltdb, table_name, commit) read_meta.commit = commit elif flow_name and run_id: df = read_table_sql(self.meta_doltdb, _get_actions_query(flow_name, run_id, 'read')) database = df.database.values[0] commit = df.commit.values[0] # checkout database and get table ASOF commit db = Dolt(database) table = self._get_dolt_table_asof(db, table_name, commit) read_meta.commit = commit else: table = read_table(self.doltdb, table_name) read_meta.commit = self._get_latest_commit_hash() self.table_reads.append(read_meta) return table def commit_writes(self, allow_empty=True): """ Creates a new commit containing all the changes recorded in self.dolt_data.['table_writes'], meaning that the precise data can be reproduced exactly later on by querying self.flow_spec. """ if not current.is_running_flow: raise ValueError( 'Writes and commits are only supported in a running Flow') to_commit = [ table_write.table_name for table_write in self.table_writes + self.table_reads ] self.doltdb.add(to_commit) self.doltdb.commit(message=self._get_commit_message(), allow_empty=allow_empty) def commit_metadata(self, allow_empty=True): commit_hash = self._get_latest_commit_hash() # might be different db for w in self.table_writes: w.set_commit(commit_hash) self.write_metadata(self.table_reads + self.table_writes) self.meta_doltdb.add("metadata") return self.meta_doltdb.commit(message=self._get_commit_message(), allow_empty=allow_empty) @classmethod def _get_commit_message(cls): return f'{current.flow_name}/{current.run_id}/{current.step_name}/{current.task_id}' @classmethod def _get_dolt_table_asof(cls, dolt: Dolt, table_name: str, commit: str = None) -> pd.DataFrame: base_query = f'SELECT * FROM `{table_name}`' if commit: return read_table_sql(dolt, f'{base_query} AS OF "{commit}"') else: return read_table_sql(dolt, base_query)