def get_from_http(self, file_loc): """ Try to get the schema from HTTP/HTTPS based on file_loc """ try: resp = requests.get(file_loc, verify=False) except Exception as e: self.logger.error(str(e)) return None if resp.status_code == 200 and 'content-length' in resp.headers: self.logger.info('GET {} bytes from {}'.format(resp.headers['content-length'], file_loc)) return resp.text else: # This means something went wrong. self.logger.error('Request Error for {}: {}\n Header: {}'.format(file_loc, resp.status_code, resp.headers)) return None
def get_from_http(self, file_loc): """ Try to get the schema from HTTP/HTTPS based on file_loc """ try: resp = requests.get(file_loc, verify=False) except Exception as e: self.logger.error(str(e)) return None if resp.status_code == 200: if 'content-length' in resp.headers: self.logger.debug('GET {} bytes from {}'.format(resp.headers['content-length'], file_loc)) return resp.text else: # This means something went wrong. raise Exception('Request Error', 'GET {} {}'.format(file_loc, resp.status_code)) return None
def get_multiproducts(self): ''' fetch all products and owners of Multiproduct ''' resp = requests.get(args[Constant.MULTIPRODUCT_SERVICE_URL], verify=False) if resp.status_code != 200: # This means something went wrong. raise Exception('Request Error', 'GET /api/v1/mpl {}'.format(resp.status_code)) # print resp.content re_git_repo_name = re.compile(r":(.*)\.git$") re_svn_repo_name = re.compile(r"/(.*)/trunk$") if resp.headers['content-type'].split(';')[0] == 'application/json': for product_name, product_info in resp.json()['products'].items(): scm_type = product_info["scm"]["name"] try: if scm_type == 'git': repo_fullname = re_git_repo_name.search( product_info["uris"]["trunk"]).group(1) repo_key = 'git:' + repo_fullname elif scm_type == 'svn': repo_fullname = re_svn_repo_name.search( product_info["uris"]["trunk"]).group(1) repo_key = 'svn:' + repo_fullname except: self.logger.debug( "Error parsing repo full name {} - {}".format( product_name, product_info["uris"])) continue self.multiproduct[repo_key] = { "scm_repo_fullname": repo_fullname, "scm_type": scm_type, "multiproduct_name": product_name, "product_type": product_info["type"], "namespace": product_info["org"], "owner_name": ",".join(product_info["owners"]), "product_version": product_info["product-version"] } self.logger.info("Fetched {} Multiproducts".format( len(self.multiproduct)))
def get_multiproducts(self): ''' fetch all products and owners of Multiproduct ''' resp = requests.get(args[Constant.MULTIPRODUCT_SERVICE_URL], verify=False) if resp.status_code != 200: # This means something went wrong. raise Exception('Request Error', 'GET /api/v1/mpl {}'.format(resp.status_code)) # print resp.content re_git_repo_name = re.compile(r":(.*)\.git$") re_svn_repo_name = re.compile(r"/(.*)/trunk$") if resp.headers['content-type'].split(';')[0] == 'application/json': for product_name, product_info in resp.json()['products'].items(): scm_type = product_info["scm"]["name"] try: if scm_type == 'git': repo_fullname = re_git_repo_name.search(product_info["uris"]["trunk"]).group(1) repo_key = 'git:' + repo_fullname elif scm_type == 'svn': repo_fullname = re_svn_repo_name.search(product_info["uris"]["trunk"]).group(1) repo_key = 'svn:' + repo_fullname except: self.logger.debug("Error parsing repo full name {} - {}".format(product_name, product_info["uris"])) continue self.multiproduct[repo_key] = { "scm_repo_fullname": repo_fullname, "scm_type": scm_type, "multiproduct_name": product_name, "product_type": product_info["type"], "namespace": product_info["org"], "owner_name": ",".join(product_info["owners"]), "product_version": product_info["product-version"] } self.logger.info("Fetched {} Multiproducts".format(len(self.multiproduct)))
def get_project_repo(self): ''' fetch detail and repos of all git projects ''' re_git_project_name = re.compile(r"(.*)/(.*)$") re_git_repo_name = re.compile(r"git://[\w\.-]+/(.*)\.git$") project_nonexist = [] project_names = {} for key, product in self.multiproduct.iteritems(): if product["scm_type"] == 'svn': continue project_name = re_git_project_name.search( product['scm_repo_fullname']).group(1) if project_name in project_names: continue project_url = '{}/{}?format=xml'.format( args[Constant.GIT_URL_PREFIX], project_name) try: resp = requests.get(project_url, verify=False) except Exception as ex: self.logger.info("Error getting /{}.xml - {}".format( project_name, ex.message)) continue if resp.status_code != 200: # This means something went wrong. self.logger.debug('Request Error: GET /{}.xml {}'.format( project_name, resp.status_code)) project_nonexist.append(project_name) continue # print resp.content if resp.headers['content-type'].split(';')[0] == 'application/xml': xml = ET.fromstring(resp.content) current_project = MultiproductProjectRecord( self.app_id, xml.find('slug').text, 'git', xml.find('owner').attrib['kind'], xml.find('owner').text, xml.find('created-at').text, xml.find('license').text, self.trim_newline(xml.find('description').text), self.wh_exec_id) project_repo_names = [] for repo in xml.findall('repositories/mainlines/repository'): repo_fullname = re_git_repo_name.search( repo.find('clone_url').text).group(1) project_repo_names.append(repo_fullname) repo_key = 'git:' + repo_fullname self.git_repo[repo_key] = { 'scm_repo_fullname': repo_fullname, 'scm_type': 'git', 'repo_id': repo.find('id').text, 'project': project_name, 'owner_type': repo.find('owner').attrib['kind'], 'owner_name': repo.find('owner').text } project_repo_num = len(project_repo_names) current_project.setRepos(project_repo_num, ','.join(project_repo_names)) self.project_writer.append(current_project) project_names[project_name] = project_repo_num # self.logger.debug("Project: {} - Repos: {}".format(project_name, project_repo_num)) self.project_writer.close() self.logger.info("Finish Fetching git projects and repos") self.logger.debug('Non-exist projects: {}'.format(project_nonexist))
def get_acl_owners(self): ''' fetch owners information from acl ''' re_acl_owners = re.compile(r"owners\:\s*\[([^\[\]]+)\]") re_acl_path = re.compile(r"paths\:\s*\[([^\[\]]+)\]") re_svn_acl_url = re.compile( r'href=\"[\w\/\-]+[\/\:]acl\/([\w\-\/]+)\.acl(\?revision=\d+)&view=markup\"' ) re_git_acl_url = re.compile( r'href=\"[\w\/\-]+\/source\/([\w\:]*)acl\/([\w\-]+)\.acl\"') owner_count = 0 for repo in self.product_repo: repo_fullname = repo.getScmRepoFullname() scm_type = repo.getScmType() repo_id = repo.getRepoId() if scm_type == "git": repo_url = '{}/{}/source/acl'.format( args[Constant.GIT_URL_PREFIX], repo_fullname) elif scm_type == "svn": repo_url = '{}/{}/acl'.format(args[Constant.SVN_URL_PREFIX], repo_fullname) try: resp = requests.get(repo_url, verify=False) except Exception as ex: self.logger.info("Error getting acl {} - {}".format( repo_url, ex.message)) continue if resp.status_code != 200: self.logger.debug( 'Request Error: GET repo {} acls - {}'.format( repo, resp.status_code)) continue if resp.headers['content-type'].split(';')[0] == 'text/html': re_acl_url = re_git_acl_url if scm_type == "git" else re_svn_acl_url for acl_url in re_acl_url.finditer(resp.content): if scm_type == "git": acl_name = acl_url.group(2) commit_hash = acl_url.group(1) full_acl_url = '{}/{}/raw/{}acl/{}.acl'.format( args[Constant.GIT_URL_PREFIX], repo_fullname, commit_hash, acl_name) elif scm_type == "svn": acl_name = acl_url.group(1) commit_hash = acl_url.group(2) full_acl_url = '{}/{}.acl{}'.format( repo_url, acl_name, commit_hash) try: resp = requests.get(full_acl_url, verify=False) except Exception as ex: self.logger.info("Error getting acl {} - {}".format( full_acl_url, ex.message)) continue if resp.status_code != 200: self.logger.debug( 'Request Error: GET acl {} - {}'.format( full_acl_url, resp.status_code)) continue owners_string = re_acl_owners.search(resp.content) path_string = re_acl_path.search(resp.content) if owners_string: owners = self.parse_owners(owners_string.group(1)) paths = self.trim_path( path_string.group(1)) if path_string else None sort_id = 0 for owner in owners: owner_record = MultiproductRepoOwnerRecord( self.app_id, repo_fullname, scm_type, repo_id, acl_name.title(), owner, sort_id, paths, self.wh_exec_id) self.repo_owner_writer.append(owner_record) sort_id += 1 owner_count += 1 # self.logger.debug('{} - {} owners: {}'.format(repo_fullname, acl_name, len(owners))) self.repo_owner_writer.close() self.logger.info( 'Finish Fetching acl owners, total {} records'.format(owner_count))
def get_project_repo(self): ''' fetch detail and repos of all git projects ''' re_git_project_name = re.compile(r"(.*)/(.*)$") re_git_repo_name = re.compile(r"git://[\w\.-]+/(.*)\.git$") project_nonexist = [] project_names = {} for key, product in self.multiproduct.iteritems(): if product["scm_type"] == 'svn': continue project_name = re_git_project_name.search(product['scm_repo_fullname']).group(1) if project_name in project_names: continue project_url = '{}/{}?format=xml'.format(args[Constant.GIT_URL_PREFIX], project_name) try: resp = requests.get(project_url, verify=False) except Exception as ex: self.logger.info("Error getting /{}.xml - {}".format(project_name, ex.message)) continue if resp.status_code != 200: # This means something went wrong. self.logger.debug('Request Error: GET /{}.xml {}'.format(project_name, resp.status_code)) project_nonexist.append(project_name) continue # print resp.content if resp.headers['content-type'].split(';')[0] == 'application/xml': xml = ET.fromstring(resp.content) current_project = MultiproductProjectRecord( self.app_id, xml.find('slug').text, 'git', xml.find('owner').attrib['kind'], xml.find('owner').text, xml.find('created-at').text, xml.find('license').text, self.trim_newline(xml.find('description').text), self.wh_exec_id ) project_repo_names = [] for repo in xml.findall('repositories/mainlines/repository'): repo_fullname = re_git_repo_name.search(repo.find('clone_url').text).group(1) project_repo_names.append(repo_fullname) repo_key = 'git:' + repo_fullname self.git_repo[repo_key] = { 'scm_repo_fullname': repo_fullname, 'scm_type': 'git', 'repo_id': repo.find('id').text, 'project': project_name, 'owner_type': repo.find('owner').attrib['kind'], 'owner_name': repo.find('owner').text } project_repo_num = len(project_repo_names) current_project.setRepos(project_repo_num, ','.join(project_repo_names)) self.project_writer.append(current_project) project_names[project_name] = project_repo_num # self.logger.debug("Project: {} - Repos: {}".format(project_name, project_repo_num)) self.project_writer.close() self.logger.info("Finish Fetching git projects and repos") self.logger.debug('Non-exist projects: {}'.format(project_nonexist))
def get_acl_owners(self): ''' fetch owners information from acl ''' re_acl_owners = re.compile(r"owners\:\s*\[([^\[\]]+)\]") re_acl_path = re.compile(r"paths\:\s*\[([^\[\]]+)\]") re_svn_acl_url = re.compile(r'href=\"[\w\/\-]+[\/\:]acl\/([\w\-\/]+)\.acl(\?revision=\d+)&view=markup\"') re_git_acl_url = re.compile(r'href=\"[\w\/\-]+\/source\/([\w\:]*)acl\/([\w\-]+)\.acl\"') owner_count = 0 for repo in self.product_repo: repo_fullname = repo.getScmRepoFullname() scm_type = repo.getScmType() repo_id = repo.getRepoId() if scm_type == "git": repo_url = '{}/{}/source/acl'.format(args[Constant.GIT_URL_PREFIX], repo_fullname) elif scm_type == "svn": repo_url = '{}/{}/acl'.format(args[Constant.SVN_URL_PREFIX], repo_fullname) try: resp = requests.get(repo_url, verify=False) except Exception as ex: self.logger.info("Error getting acl {} - {}".format(repo_url, ex.message)) continue if resp.status_code != 200: self.logger.debug('Request Error: GET repo {} acls - {}'.format(repo, resp.status_code)) continue if resp.headers['content-type'].split(';')[0] == 'text/html': re_acl_url = re_git_acl_url if scm_type == "git" else re_svn_acl_url for acl_url in re_acl_url.finditer(resp.content): if scm_type == "git": acl_name = acl_url.group(2) commit_hash = acl_url.group(1) full_acl_url = '{}/{}/raw/{}acl/{}.acl'.format(args[Constant.GIT_URL_PREFIX], repo_fullname, commit_hash, acl_name) elif scm_type == "svn": acl_name = acl_url.group(1) commit_hash = acl_url.group(2) full_acl_url = '{}/{}.acl{}'.format(repo_url, acl_name, commit_hash) try: resp = requests.get(full_acl_url, verify=False) except Exception as ex: self.logger.info("Error getting acl {} - {}".format(full_acl_url, ex.message)) continue if resp.status_code != 200: self.logger.debug('Request Error: GET acl {} - {}'.format(full_acl_url, resp.status_code)) continue owners_string = re_acl_owners.search(resp.content) path_string = re_acl_path.search(resp.content) if owners_string: owners = self.parse_owners(owners_string.group(1)) paths = self.trim_path(path_string.group(1)) if path_string else None sort_id = 0 for owner in owners: owner_record = MultiproductRepoOwnerRecord( self.app_id, repo_fullname, scm_type, repo_id, acl_name.title(), owner, sort_id, paths, self.wh_exec_id ) self.repo_owner_writer.append(owner_record) sort_id += 1 owner_count += 1 # self.logger.debug('{} - {} owners: {}'.format(repo_fullname, acl_name, len(owners))) self.repo_owner_writer.close() self.logger.info('Finish Fetching acl owners, total {} records'.format(owner_count))