# # Copyright (c) 2021 Airbyte, Inc., all rights reserved. # import os from pathlib import Path from ci_common_utils import Logger LOGGER = Logger() ROOT_DIR = Path(os.getcwd()) while str(ROOT_DIR) != "/" and not (ROOT_DIR / "gradlew").is_file(): ROOT_DIR = ROOT_DIR.parent if str(ROOT_DIR) == "/": LOGGER.critical("this script must be executed into the Airbyte repo only")
import re from datetime import datetime, timedelta import pytest from ci_common_utils import Logger LOG_RE = re.compile(r'^\[(\d{2}/\d{2}/\d{4} \d{2}:\d{2}:\d{2}\.\d{6})\] -' r'\s+(\w+)\s+- \[.*tests/test_logger.py:(\d+)\] # (.+)') LOGGER = Logger() TEST_MESSAGE = 'sbhY=)9\'v-}LT=)jjF66(XrZh=]>7Xp"?/zCz,=eu8K47u8' def check_output(msg: str, expected_line_number: int, expected_log_level: str): m = LOG_RE.match(msg) assert m is not None, f"incorrect message format, pattern: {LOG_RE.pattern}" date_time, log_level, line_number, msg = m.groups() assert int(line_number) == expected_line_number assert expected_log_level == log_level assert expected_log_level == log_level dt = datetime.strptime(date_time, '%d/%m/%Y %H:%M:%S.%f') now = datetime.now() delta = timedelta(seconds=1) assert now - delta < dt < now @pytest.mark.parametrize('log_func,expected_log_level,expected_code', ((LOGGER.debug, 'DEBUG', 0), (LOGGER.warning, 'WARNING', 0), (LOGGER.info, 'INFO', 0), (LOGGER.error, 'ERROR', 1))
class SecretsLoader: """Loading and saving all requested secrets into connector folders""" logger: ClassVar[Logger] = Logger() base_folder = Path("/actions-runner/_work/airbyte/airbyte") def __init__(self, connector_name: str, gsm_credentials: Mapping[str, Any]): self.gsm_credentials = gsm_credentials self.connector_name = connector_name self._api = None @property def api(self) -> GoogleApi: if self._api is None: self._api = GoogleApi(self.gsm_credentials, GSM_SCOPES) return self._api def __load_gsm_secrets(self) -> Mapping[Tuple[str, str], str]: """Loads needed GSM secrets""" secrets = {} # docs: https://cloud.google.com/secret-manager/docs/filtering#api filter = "name:SECRET_" if self.connector_name: filter += f" AND labels.connector={self.connector_name}" url = f"https://secretmanager.googleapis.com/v1/projects/{self.api.project_id}/secrets" next_token = None while True: params = { "filter": filter, } if next_token: params["pageToken"] = next_token data = self.api.get(url, params=params) for secret_info in data.get("secrets") or []: secret_name = secret_info["name"] connector_name = secret_info.get("labels", {}).get("connector") if not connector_name: self.logger.warning( f"secret {secret_name} doesn't have the label 'connector'" ) continue elif self.connector_name and connector_name != self.connector_name: self.logger.warning( f"incorrect the label connector '{connector_name}' of secret {secret_name}" ) continue filename = secret_info.get("labels", {}).get("filename") if filename: # all secret file names should be finished with ".json" # but '.' cant be used in google, so we append it filename = f"{filename}.json" else: # the "filename" label is optional. filename = DEFAULT_SECRET_FILE_WITH_EXT log_name = f'{secret_name.split("/")[-1]}({connector_name})' self.logger.info( f"found GSM secret: {log_name} = > {filename}") versions_url = f"https://secretmanager.googleapis.com/v1/{secret_name}/versions" data = self.api.get(versions_url) enabled_versions = [ version["name"] for version in data["versions"] if version["state"] == "ENABLED" ] if len(enabled_versions) > 1: self.logger.critical( f"{log_name} should have one enabled version at the same time!!!" ) secret_url = f"https://secretmanager.googleapis.com/v1/{enabled_versions[0]}:access" data = self.api.get(secret_url) secret_value = data.get("payload", {}).get("data") if not secret_value: self.logger.warning(f"{log_name} has empty value") continue secret_value = base64.b64decode( secret_value.encode()).decode('utf-8') try: # minimize and validate its JSON value secret_value = json.dumps(json.loads(secret_value), separators=(',', ':')) except JSONDecodeError as err: self.logger.error( f"{log_name} has non-JSON value!!! Error: {err}") continue secrets[(connector_name, filename)] = secret_value next_token = data.get("nextPageToken") if not next_token: break return secrets @staticmethod def generate_secret_name(connector_name: str, filename: str) -> str: """ Generates an unique GSM secret name. Format of secret name: SECRET_<CAPITAL_CONNECTOR_NAME>_<OPTIONAL_UNIQUE_FILENAME_PART>__CREDS Examples: 1. connector_name: source-linnworks, filename: dsdssds_a-b---_---_config.json => SECRET_SOURCE-LINNWORKS_DSDSSDS_A-B__CREDS 2. connector_name: source-s3, filename: config.json => SECRET_SOURCE-LINNWORKS__CREDS """ name_parts = ["secret", connector_name] filename_wo_ext = filename.replace(".json", "") if filename_wo_ext != DEFAULT_SECRET_FILE: name_parts.append( filename_wo_ext.replace(DEFAULT_SECRET_FILE, "").strip("_-")) name_parts.append("_creds") return "_".join(name_parts).upper() def create_secret(self, connector_name: str, filename: str, secret_value: str) -> bool: """ Creates a new GSM secret with auto-generated name. """ secret_name = self.generate_secret_name(connector_name, filename) self.logger.info( f"Generated the new secret name '{secret_name}' for {connector_name}({filename})" ) params = { "secretId": secret_name, } labels = { "connector": connector_name, } if filename != DEFAULT_SECRET_FILE: labels["filename"] = filename.replace(".json", "") body = { "labels": labels, "replication": { "automatic": {} }, } url = f"https://secretmanager.googleapis.com/v1/projects/{self.api.project_id}/secrets" data = self.api.post(url, json=body, params=params) # try to create a new version secret_name = data["name"] self.logger.info(f"the GSM secret {secret_name} was created") secret_url = f'https://secretmanager.googleapis.com/v1/{secret_name}:addVersion' body = { "payload": { "data": base64.b64encode(secret_value.encode()).decode("utf-8") } } self.api.post(secret_url, json=body) return True def read_from_gsm(self) -> int: """Reads all necessary secrets from different sources""" secrets = self.__load_gsm_secrets() for k in secrets: if not isinstance(secrets[k], tuple): secrets[k] = ("GSM", secrets[k]) source, _ = secrets[k] self.logger.info(f"Register the file {k[1]}({k[0]}) from {source}") if not len(secrets): self.logger.warning( f"not found any secrets of the connector '{self.connector_name}'" ) return {} return {k: v[1] for k, v in secrets.items()} def write_to_storage(self, secrets: Mapping[Tuple[str, str], str]) -> int: """Tries to save target secrets to the airbyte-integrations/connectors|bases/{connector_name}/secrets folder""" if not secrets: return 0 for (connector_name, filename), secret_value in secrets.items(): if connector_name == "base-normalization": secrets_dir = f"airbyte-integrations/bases/{connector_name}/secrets" else: secrets_dir = f"airbyte-integrations/connectors/{connector_name}/secrets" secrets_dir = self.base_folder / secrets_dir secrets_dir.mkdir(parents=True, exist_ok=True) filepath = secrets_dir / filename with open(filepath, "w") as file: file.write(secret_value) self.logger.info(f"The file {filepath} was saved") return 0
import json import os import sys from json.decoder import JSONDecodeError from ci_common_utils import Logger from . import SecretsLoader logger = Logger() ENV_GCP_GSM_CREDENTIALS = "GCP_GSM_CREDENTIALS" # credentials of GSM and GitHub secrets should be shared via shell environment def main() -> int: if len(sys.argv) != 2: return logger.error( "uses one script argument only: <unique connector name>") # parse unique connector name, because it can have the common prefix "connectors/<unique connector name>" connector_name = sys.argv[1].split("/")[-1] if connector_name == "all": # if needed to load all secrets connector_name = None # parse GCP_GSM_CREDENTIALS try: gsm_credentials = json.loads( os.getenv(ENV_GCP_GSM_CREDENTIALS) or "{}") except JSONDecodeError as e:
class SonarQubeApi: """https://sonarcloud.io/web_api""" logger = Logger() def __init__(self, host: str, token: str, pr_name: str): self._host = host self._token = token # split the latest name part self._pr_id = (pr_name or '').split("/")[-1] if not self._pr_id.isdigit(): self.logger.critical( f"PR id should be integer. Current value: {pr_name}") self._pr_id = int(self._pr_id) # check token # https://sonarcloud.io/web_api/api/authentication/validate if not self._host: return resp = self._get("authentication/validate") if not resp["valid"]: self.logger.critical("provided token is not valid") @property def __auth(self): return HTTPBasicAuth(self._token, '') def __parse_response(self, url: str, response: requests.Response) -> Mapping[str, Any]: if response.status_code == 204: # empty response return {} elif response.status_code != 200: self.logger.critical( f"API error for {url}: [{response.status_code}] {response.json()['errors']}" ) return response.json() def generate_url(self, endpoint: str) -> str: return reduce(urljoin, [self._host, "/api/", endpoint]) def _post(self, endpoint: str, json: Mapping[str, Any]) -> Mapping[str, Any]: url = self.generate_url(endpoint) return self.__parse_response( url, requests.post(url, auth=self.__auth, params=json, json=json)) def _get(self, endpoint: str) -> Mapping[str, Any]: url = self.generate_url(endpoint) return self.__parse_response(url, requests.get(url, auth=self.__auth)) def _get_list(self, endpoint: str, list_name: str) -> List[Mapping[str, Any]]: page = 0 items = [] while True: page += 1 url = endpoint + "&" if "?" in endpoint else "?" + f"p={page}" data = self._get(url) items += data[list_name] total = data.get("total") or data.get("paging", {}).get("total", 0) if len(items) >= total: break return items @classmethod def module2project(cls, module_name: str) -> str: """""" parts = module_name.split("/") if len(parts) != 2: cls.logger.critical( "module name must have the format: component/module") return f"{AIRBYTE_PROJECT_PREFIX}:{parts[0].lower()}:{parts[1].lower().replace('_', '-')}" def __correct_project_name(self, project_name: str) -> str: return f"pr:{self._pr_id}:{project_name}" if self._pr_id else f"master:{project_name}" def __search_project(self, project_name: str) -> Optional[Mapping[str, Any]]: """https://sonarcloud.io/web_api/api/projects/search""" data = self._get(f"projects/search?q={project_name}") exists_projects = data["components"] if len(exists_projects) > 1: self.logger.critical( f"there are several projects with the name '{project_name}'") elif len(exists_projects) == 0: return None return exists_projects[0] def prepare_project_settings(self, project_name: str) -> Mapping[str, str]: title = re.sub('[:_-]', ' ', project_name).replace("connectors_", "").title() if self._pr_id: title += f"(#{self._pr_id})" project_name = self.__correct_project_name(project_name) return { "name": title, "project": project_name, "visibility": "private", } def create_project(self, project_name: str) -> bool: """https://sonarcloud.io/web_api/api/projects/create""" data = self.prepare_project_settings(project_name) project_name = data["project"] exists_project = self.__search_project(project_name) if exists_project: self.logger.info( f"The project '{project_name}' was created before") return True self._post("projects/create", data) self.logger.info(f"The project '{project_name}' was created") return True def remove_project(self, project_name: str) -> bool: """https://sonarcloud.io/web_api/api/projects/delete""" project_name = self.prepare_project_settings(project_name)["project"] exists_project = self.__search_project(project_name) if exists_project is None: self.logger.info(f"not found the project '{project_name}'") return True body = {"project": project_name} self._post("projects/delete", body) self.logger.info(f"The project '{project_name}' was removed") return True def generate_report(self, project_name: str, report_file: str) -> bool: project_data = self.prepare_project_settings(project_name) md_file = MdUtils(file_name=report_file) md_file.new_line( "<details><summary> <strong> SonarQube Report </strong></summary>") md_file.new_line("<p>") md_file.new_line("") md_file.new_line(f'### SonarQube report for {project_data["name"]}') project_name = project_data["project"] issues = self._get_list( f"issues/search?componentKeys={project_name}&additionalFields=_all", "issues") rules = {} for rule_key in set(issue["rule"] for issue in issues): key_parts = rule_key.split(":") while len(key_parts) > 2: key_parts.pop(0) key = ":".join(key_parts) data = self._get(f"rules/search?rule_key={key}")["rules"] if not data: data = self._get(f"rules/show?key={rule_key}")["rule"] else: data = data[0] description = data["name"] public_name = key link = None if rule_key.startswith("external_"): public_name = key.replace("external_", "") if not data["isExternal"]: # this is custom rule description = data["htmlDesc"] if public_name.startswith("flake"): # single link for all descriptions link = "https://flake8.pycqa.org/en/latest/user/error-codes.html" elif "isort_" in public_name: link = "https://pycqa.github.io/isort/index.html" elif "black_" in public_name: link = "https://black.readthedocs.io/en/stable/the_black_code_style/index.html" else: # link's example # https://rules.sonarsource.com/python/RSPEC-6287 m = RE_RULE_NAME.match(public_name) if not m: # for local server link = f"{self._host}coding_rules?open={key}&rule_key={key}" else: # to public SQ docs link = f"https://rules.sonarsource.com/{m.group(1)}/RSPEC-{m.group(2)}" if link: public_name = md_file.new_inline_link(link=link, text=public_name) rules[rule_key] = (public_name, description) data = self._get( f"measures/component?component={project_name}&additionalFields=metrics&metricKeys={','.join(REPORT_METRICS)}" ) measures = {} total_coverage = None for measure in data["component"]["measures"]: metric = measure["metric"] if measure["metric"].startswith("new_") and measure.get("periods"): # we need to show values for last sync period only last_period = max(measure["periods"], key=lambda period: period["index"]) value = last_period["value"] else: value = measure.get("value") measures[metric] = value # group overall and latest values measures = { metric: (value, measures.get(f"new_{metric}")) for metric, value in measures.items() if not metric.startswith("new_") } metrics = {} for metric in data["metrics"]: # if metric["key"] not in measures: # continue metrics[metric["key"]] = (metric["name"], metric["type"]) md_file.new_line('#### Measures') values = [] for metric, (overall_value, latest_value) in measures.items(): if metric not in metrics: continue name, metric_type = metrics[metric] value = overall_value if (latest_value is None or latest_value == "0") else latest_value if metric_type == "PERCENT": value = str(round(float(value), 1)) elif metric_type == "INT": value = int(float(value)) elif metric_type == "LEVEL": pass elif metric_type == "RATING": value = int(float(value)) for k, v in RATINGS.items(): if value <= k: value = v break if metric == "coverage": total_coverage = value values.append([name, value]) values += [ ("Blocker Issues", sum(map(lambda i: i["severity"] == "BLOCKER", issues))), ("Critical Issues", sum(map(lambda i: i["severity"] == "CRITICAL", issues))), ("Major Issues", sum(map(lambda i: i["severity"] == "MAJOR", issues))), ("Minor Issues", sum(map(lambda i: i["severity"] == "MINOR", issues))), ] while len(values) % 3: values.append(("", "")) table_items = ["Name", "Value"] * 3 + list( itertools.chain.from_iterable(values)) md_file.new_table(columns=6, rows=int(len(values) / 3 + 1), text=table_items, text_align='left') md_file.new_line() if issues: md_file.new_line('#### Detected Issues') table_items = ["Rule", "File", "Description", "Message"] for issue in issues: rule_name, description = rules[issue["rule"]] path = issue["component"].split(":")[-1].split("/") # need to show only 2 last path parts while len(path) > 2: path.pop(0) path = "/".join(path) # add line number in the end if issue.get("line"): path += f':{issue["line"]}' table_items += [ f'{rule_name} ({issue["severity"]})', path, description, issue["message"], ] md_file.new_table(columns=4, rows=len(issues) + 1, text=table_items, text_align='left') coverage_files = [ (k, v) for k, v in self.load_coverage_component(project_name).items() ] if total_coverage is not None: md_file.new_line(f'#### Coverage ({total_coverage}%)') while len(coverage_files) % 2: coverage_files.append(("", "")) table_items = ["File", "Coverage"] * 2 + list( itertools.chain.from_iterable(coverage_files)) md_file.new_table(columns=4, rows=int(len(coverage_files) / 2 + 1), text=table_items, text_align='left') md_file.new_line("") md_file.new_line("</p>") md_file.new_line("</details>") md_file.create_md_file() self.logger.info(f"The {report_file} was generated") return True def load_coverage_component(self, base_component: str, dir_path: str = None) -> Mapping[str, Any]: page = 0 coverage_files = {} read_count = 0 while True: page += 1 component = base_component if dir_path: component += f":{dir_path}" url = f"measures/component_tree?p={page}&component={component}&additionalFields=metrics&metricKeys=coverage,uncovered_lines,uncovered_conditions&strategy=children" data = self._get(url) read_count += len(data["components"]) for component in data["components"]: if component["qualifier"] == "DIR": coverage_files.update( self.load_coverage_component(base_component, component["path"])) continue elif not component["measures"]: continue elif component["qualifier"] == "FIL": coverage_files[component["path"]] = [ m["value"] for m in component["measures"] if m["metric"] == "coverage" ][0] if data["paging"]["total"] <= read_count: break return coverage_files