def sync(repo: pygit2.Repository, branch_name: str) -> None: """ Tries to update the `branch_name` branch of the `repo` repo to the latest upstream branch state. If the branch is up to date, does nothing. If the branch can be fast-forwarded, resets to the upstream. Otherwise, fails with an error. """ branch = repo.branches.local[branch_name] if not branch.is_head(): raise ValueError(branch) try: remote = repo.remotes['origin'] except KeyError: return remote.fetch(callbacks=RemoteCallbacks()) upstream_branch = branch.upstream if not upstream_branch: return merge_state, _ = repo.merge_analysis(upstream_branch.target, branch.name) if merge_state & pygit2.GIT_MERGE_ANALYSIS_UP_TO_DATE: return if not (merge_state & pygit2.GIT_MERGE_ANALYSIS_FASTFORWARD): raise ValueError(branch) repo.reset(upstream_branch.target, pygit2.GIT_RESET_HARD) repo.checkout(refname=branch)
def launch(self): # init repo self._info('Locating the repository..') try: repo = Repository(REPO_PATH) except GitError: self._error('Failed to locate the repository!') return # fetch self._info('Fetching repository..') try: repo.remotes['origin'].fetch() except GitError: self._error('Failed to fetch the repository!') # reset repository self._info('Resetting the repository..') try: repo.reset( repo.lookup_reference('refs/remotes/origin/master').target, GIT_RESET_HARD) except GitError: self._error('Failed to reset the repository!') # launch self._info('Launching..') try: subprocess.Popen(EXE_PATH, cwd=EXE_WORKDIR) except OSError: self._error('Failed to launch!') else: self.destroy()
config = repo.config remote_url = repo.remotes[args.remote].url pass_path = None for glob in credentials_mapping.keys(): if fnmatch.fnmatch(remote_url, glob): pass_path = credentials_mapping[glob]["target"] # FIXME: user identity (name + email) is not always set at repo level # that said, we need a SPOT for git identities as used/implemented # in git-identity emacs package source_branch_name = args.update_source_branch if args.update_source_branch != "" else get_active_branch( repo) remote = resolve_remote(repo, args.remote) if not remote: log_error(f"cannot find remote '{args.remote}'") sys.exit(1) if args.update_op == "fetch": remote.fetch(refspecs=[f"refs/heads/*:refs/heads/*"]) elif args.update_op == "merge": source_branch_head = repo.references[source_branch_name].resolve().target repo.merge(source_branch_head) elif args.update_op == "rebase": source_branch = repo.lookup_branch(source_branch_name, GIT_BRANCH_REMOTE) dest_branch = repo.lookup_branch(get_active_branch(repo)) dest_branch.set_target(source_branch.target) # Fast-forwarding with set_target() leaves the index and the working tree # in their old state. That's why we need to checkout() and reset() repo.checkout(f"refs/heads/{dest_branch.name}") repo.reset(dest_branch.target, GIT_RESET_HARD)
def post(event, context): # Logging the entire event is a cheap simple way to make debugging easier # Often times just being able to see the event information quickly can help # Troubleshoot an issue faster than hooking up a debugger logger.info(event) # We always want to take the shortest path through our functions. Check for anything fatal first. try: output_bucket = os.environ['output_bucket'] except: raise Exception( 'Output Bucket not defined. Set the environment variable for the function' ) try: comment_function = os.environ['comment_function'] except: raise Exception( 'Comment Function not defined. Set the environment variable for the function' ) # Here we take a few steps to get the JSON into the body object # If this came in as a proxy request, or a direct API Gateway request # or a boto3 invokation the format of the body could be a few different types # With this stepped approach we can guarantee that no matter how this was called # we will have JSON in the body variable. if "body" in event: body = json.loads(event['body']) else: try: body = json.loads(event) except: body = event # We will still validate this before doing anything with it, but if we are missing # any essential components we should end early to save processing time. # No point in computing hashes for a payload that is missing data we need. try: full_name = body['repository']['full_name'] except KeyError: raise Exception('Failed to find full_name in json post body') try: remote_url = body['repository']['clone_url'] except KeyError: raise Exception('Failed to find clone_url name in json post body') # Another short circuit. If we know this wasn't called locally then it was likely # called via the webhook or some HTTP entity so we need to see what kind of event # it is and process it appropriately. Otherwise we can save ourself a bunch of validation if "local_invoke" not in body: try: github_secrets = os.environ['github_secrets'] except: raise Exception( 'Github secrets not defined. Set the environment variable for the function' ) if "headers" in event and "X-GitHub-Event" in event['headers']: # We only care about push events, if this isn't one politely exit if event['headers']['X-GitHub-Event'] != "push": return { "statusCode": 200, "body": json.dumps('Skipping - Not a push event') } # We split this env variable because we could be re-using this function for multiple API # endpoints, multiple repos etc. It is best practice to have a secret per repo # so even if we use this exact endpoint we can still feed it multiple repos with multiple # keys. We define each key with a , to separate them. apikeys = github_secrets.split(',') # set a validation key, we will check multiple keys so it holds our result secure = False # Compute out the hash and validate the signature. If it passes set secure, otherwise throw an error if 'X-Hub-Signature' in event['headers'].keys(): signature = event['headers']['X-Hub-Signature'] for k in apikeys: computed_hash = hmac.new(k.encode('ascii'), event['body'].encode('ascii'), hashlib.sha1) computed_signature = '='.join( ['sha1', computed_hash.hexdigest()]) hmac.compare_digest(computed_signature.encode('ascii'), signature.encode('ascii')) if hmac.compare_digest(computed_signature.encode('ascii'), signature.encode('ascii')): secure = True if secure == False: raise Exception( 'Failed to validate authenticity of webhook message') repo_name = full_name + '/branch/' + branch_name repo_path = '/tmp/%s' % repo_name # If we have an existing repo (if this function is still warm / is not a cold start) # we can re-use that repo on the file system and update it to save us some time and bandwidth try: repository_path = discover_repository(repo_path) repo = Repository(repository_path) logger.info('found existing repo, using that...') # If a previous repo is not found we will create it except Exception: logger.info('creating new repo for %s in %s' % (remote_url, repo_path)) repo = create_repo(repo_path, remote_url) # Re-used or created, we now have a repo reference to pull against pull_repo(repo, branch_name, remote_url) # Now that we have the raw markdown files we can inject our comments # Into the markdown files before we compile the site so we take advantage # of all of the theme styling with minimal effort add_comments(repo_path + "/content/posts/", comment_function) # Compile the site to our pre-defined path build_hugo(repo_path, build_path) # Sync the site to our public s3 bucket for hosting upload_to_s3(build_path, output_bucket) if reset: logger.info('Resetting Repo...') repo.reset(repo.head.target, GIT_RESET_HARD) if cleanup: logger.info('Cleanup Lambda container...') shutil.rmtree(repo_path) # We have to return a status code otherwise the API Gateway will give a server error # however we are likely exceeding the 29s hard timeout limit on the API gateway # but if we can return correctly we should attempt to, that window could be changed later # or we could execute in time occasionally return { "statusCode": 200, "body": json.dumps('Successfully updated %s' % repo_name) }
class GitRepo: """A class that manages a git repository. This class enables versiong via git for a repository. You can stage and commit files and checkout different commits of the repository. """ path = '' pathspec = [] repo = None callback = None author_name = 'QuitStore' author_email = '*****@*****.**' gcProcess = None def __init__(self, path, origin=None, gc=False): """Initialize a new repository from an existing directory. Args: path: A string containing the path to the repository. origin: The remote URL where to clone and fetch from and push to """ logger = logging.getLogger('quit.core.GitRepo') logger.debug('GitRepo, init, Create an instance of GitStore') self.path = path self.gc = gc if not exists(path): try: makedirs(path) except OSError as e: raise Exception('Can\'t create path in filesystem:', path, e) try: self.repo = Repository(path) except KeyError: pass except AttributeError: pass if origin: self.callback = QuitRemoteCallbacks() if self.repo: if self.repo.is_bare: raise QuitGitRepoError('Bare repositories not supported, yet') if origin: # set remote self.addRemote('origin', origin) else: if origin: # clone self.repo = self.cloneRepository(origin, path, self.callback) else: self.repo = init_repository(path=path, bare=False) def cloneRepository(self, origin, path, callback): try: repo = clone_repository(url=origin, path=path, bare=False, callbacks=callback) return repo except Exception as e: raise QuitGitRepoError( "Could not clone from: {} origin. {}".format(origin, e)) def addall(self): """Add all (newly created|changed) files to index.""" self.repo.index.read() self.repo.index.add_all(self.pathspec) self.repo.index.write() def addfile(self, filename): """Add a file to the index. Args: filename: A string containing the path to the file. """ index = self.repo.index index.read() try: index.add(filename) index.write() except Exception as e: logger.info( "GitRepo, addfile, Could not add file {}.".format(filename)) logger.debug(e) def addRemote(self, name, url): """Add a remote. Args: name: A string containing the name of the remote. url: A string containing the url to the remote. """ try: self.repo.remotes.create(name, url) logger.info("Successfully added remote: {} - {}".format(name, url)) except Exception as e: logger.info("Could not add remote: {} - {}".format(name, url)) logger.debug(e) try: self.repo.remotes.set_push_url(name, url) self.repo.remotes.set_url(name, url) except Exception as e: logger.info("Could not set push/fetch urls: {} - {}".format( name, url)) logger.debug(e) def checkout(self, commitid): """Checkout a commit by a commit id. Args: commitid: A string cotaining a commitid. """ try: commit = self.repo.revparse_single(commitid) self.repo.set_head(commit.oid) self.repo.reset(commit.oid, GIT_RESET_HARD) logger.info("Checked out commit: {}".format(commitid)) except Exception as e: logger.info("Could not check out commit: {}".format(commitid)) logger.debug(e) def commit(self, message=None): """Commit staged files. Args: message: A string for the commit message. Raises: Exception: If no files in staging area. """ if self.isstagingareaclean(): # nothing to commit return index = self.repo.index index.read() tree = index.write_tree() try: author = Signature(self.author_name, self.author_email) comitter = Signature(self.author_name, self.author_email) if len(self.repo.listall_reference_objects()) == 0: # Initial Commit if message is None: message = 'Initial Commit from QuitStore' self.repo.create_commit('HEAD', author, comitter, message, tree, []) else: if message is None: message = 'New Commit from QuitStore' self.repo.create_commit('HEAD', author, comitter, message, tree, [self.repo.head.get_object().hex]) logger.info('Updates commited') except Exception as e: logger.info('Nothing to commit') logger.debug(e) if self.gc: self.garbagecollection() def commitexists(self, commitid): """Check if a commit id is part of the repository history. Args: commitid: String of a Git commit id. Returns: True, if commitid is part of commit log False, else. """ if commitid in self.getids(): return True else: return False def garbagecollection(self): """Start garbage collection. Args: commitid: A string cotaining a commitid. """ try: # Check if the garbage collection process is still running if self.gcProcess is None or self.gcProcess.poll() is not None: # Start garbage collection with "--auto" option, # which imidietly terminates, if it is not necessary self.gcProcess = Popen(["git", "gc", "--auto", "--quiet"], cwd=self.path) logger.debug('Spawn garbage collection') except Exception as e: logger.debug('Git garbage collection failed to spawn') logger.debug(e) def getpath(self): """Return the path of the git repository. Returns: A string containing the path to the directory of git repo """ return self.path def getcommits(self): """Return meta data about exitsting commits. Returns: A list containing dictionaries with commit meta data """ commits = [] if len(self.repo.listall_reference_objects()) > 0: for commit in self.repo.walk(self.repo.head.target, GIT_SORT_REVERSE): commits.append({ 'id': str(commit.oid), 'message': str(commit.message), 'commit_date': datetime.fromtimestamp( commit.commit_time).strftime('%Y-%m-%dT%H:%M:%SZ'), 'author_name': commit.author.name, 'author_email': commit.author.email, 'parents': [c.hex for c in commit.parents], }) return commits def getids(self): """Return meta data about exitsting commits. Returns: A list containing dictionaries with commit meta data """ ids = [] if len(self.repo.listall_reference_objects()) > 0: for commit in self.repo.walk(self.repo.head.target, GIT_SORT_REVERSE): ids.append(str(commit.oid)) return ids def isgarbagecollectionon(self): """Return if gc is activated or not. Returns: True, if activated False, if not """ return self.gc def isstagingareaclean(self): """Check if staging area is clean. Returns: True, if staginarea is clean False, else. """ status = self.repo.status() for filepath, flags in status.items(): if flags != GIT_STATUS_CURRENT: return False return True def pull(self, remote='origin', branch='master'): """Pull if possible. Return: True: If successful. False: If merge not possible or no updates from remote. """ try: self.repo.remotes[remote].fetch() except Exception as e: logger.info("Can not pull: Remote {} not found.".format(remote)) logger.debug(e) ref = 'refs/remotes/' + remote + '/' + branch remoteid = self.repo.lookup_reference(ref).target analysis, _ = self.repo.merge_analysis(remoteid) if analysis & GIT_MERGE_ANALYSIS_UP_TO_DATE: # Already up-to-date pass elif analysis & GIT_MERGE_ANALYSIS_FASTFORWARD: # fastforward self.repo.checkout_tree(self.repo.get(remoteid)) master_ref = self.repo.lookup_reference('refs/heads/master') master_ref.set_target(remoteid) self.repo.head.set_target(remoteid) elif analysis & GIT_MERGE_ANALYSIS_NORMAL: self.repo.merge(remoteid) tree = self.repo.index.write_tree() msg = 'Merge from ' + remote + ' ' + branch author = Signature(self.author_name, self.author_email) comitter = Signature(self.author_name, self.author_email) self.repo.create_commit('HEAD', author, comitter, msg, tree, [self.repo.head.target, remoteid]) self.repo.state_cleanup() else: logger.debug('Can not pull. Unknown merge analysis result') def push(self, remote='origin', branch='master'): """Push if possible. Return: True: If successful. False: If diverged or nothing to push. """ ref = ['refs/heads/' + branch] try: remo = self.repo.remotes[remote] except Exception as e: logger.info( "Can not push. Remote: {} does not exist.".format(remote)) logger.debug(e) return try: remo.push(ref, callbacks=self.callback) except Exception as e: logger.info("Can not push to {} with ref {}".format( remote, str(ref))) logger.debug(e) def getRemotes(self): remotes = {} try: for remote in self.repo.remotes: remotes[remote.name] = [remote.url, remote.push_url] except Exception as e: logger.info('No remotes found.') logger.debug(e) return {} return remotes
def mirror_tarballs(target_dir, tmp_dir, git_repo, git_revision, concurrent=DEFAULT_CONCURRENT_DOWNLOADS): global failed_entries global download_queue create_mirror_dirs(target_dir, git_revision) download_queue = queue.Queue() threads = [] repo_path = os.path.join(tmp_dir, "nixpkgs") os.makedirs(repo_path, exist_ok=True) with ccd(repo_path): exists = False try: repo = Repository(os.path.join(repo_path, ".git")) repo.remotes["origin"].fetch() exists = True except: pass if not exists: repo = clone_repository(git_repo, repo_path) repo.reset(git_revision, GIT_RESET_HARD) with ccd(repo.workdir): success = False env = os.environ.copy() env["NIX_PATH"] = "nixpkgs={}".format(repo.workdir) for expr in NIX_EXPRS: res = subprocess.run(nix_instantiate_cmd(expr), shell=True, stdout=subprocess.PIPE, env=env) if res.returncode != 0: print("nix instantiate failed!") else: success = True break if success is False: return "fatal: all nix instantiate processes failed!" output = json.loads(res.stdout.decode('utf-8').strip()) # with open(os.path.join(target_dir, "tars.json"), "w") as f: # f.write(json.dumps(output)) #with open(os.path.join(target_dir, "tars.json"), "r") as f: # output = json.loads(f.read()) for idx, entry in enumerate(output): if not (len( [x for x in VALID_URL_SCHEMES if entry['url'].startswith(x)]) == 1): append_failed_entry(entry) print("url {} is not in the supported url schemes.".format( entry['url'])) continue elif (len(check_presence(target_dir, entry['hash'])) or len(check_presence(target_dir, entry['name']))): print("url {} already mirrored".format(entry['url'])) continue else: download_queue.put(entry) for i in range(concurrent): t = threading.Thread(target=download_worker, args=( target_dir, git_revision, repo.workdir, )) threads.append(t) t.start() download_queue.join() for i in range(concurrent): download_queue.put(None) for t in threads: t.join() log = "########################\n" log += "SUMMARY OF FAILED FILES:\n" log += "########################\n" for entry in failed_entries: log += "url:{}, name:{}\n".format(entry['url'], entry['name']) with open(os.path.join(target_dir, "revisions", git_revision, "log"), "w") as f: f.write(log) return log