class SnapshotChecker(object): def __init__(self): self.ipparser = ImportPathParserBuilder().buildWithLocalMapping() self.artefactreaderact = ActFactory().bake("artefact-reader") self.commitreaderact = ActFactory().bake("scan-upstream-repository") def _getCommitDate(self, repository, commit): try: artefact = self.commitreaderact.call({ "repository": repository, "commit": commit }) except ValueError as e: logging.error(e) return {} return artefact["commits"][commit] def _comparePackages(self, package, upstream_commit, distro_commit): if upstream_commit["cdate"] == distro_commit["cdate"]: return "%s%s is up-to-date%s" % (GREEN, package, ENDC) elif upstream_commit["cdate"] < distro_commit["cdate"]: return "%s%s is newer in distribution%s" % (BLUE, package, ENDC) elif upstream_commit["cdate"] > distro_commit["cdate"]: return "%s%s is outdated in distribution%s" % (RED, package, ENDC) def _checkPackageCoverage(self, product, distribution, build, rpm, ipprefix, packages): data = { "artefact": ARTEFACT_GOLANG_PROJECT_DISTRIBUTION_PACKAGES, "product": product, "distribution": distribution, "build": build, "rpm": rpm } artefact = self.artefactreaderact.call(data) # get list of defined packages for ipprefix_class in artefact["data"]: if ipprefix_class["ipprefix"] == ipprefix: # All packages covered? return list(set(packages) - set(ipprefix_class["packages"])) def check(self, snapshot, product, distribution): """Check if a given snapshot is covered in a distribution :param snapshot: project snapshot :type snapshot: Snapshot :param distribution: OS distribution, e.g. f23, f25, rawhide, centos7, ... :type distribution: string """ packages = snapshot.packages() ipprefixes = {} providers = {} rpms = {} upstream = {} not_recognized = [] for package in packages: try: self.ipparser.parse(package) except ValueError: not_recognized.append(package) continue ipprefix = self.ipparser.getImportPathPrefix() try: ipprefixes[ipprefix].append(package) except KeyError: ipprefixes[ipprefix] = [package] # store ipprefix commit (assuming all packages with the same prefix has the same commit) upstream[ipprefix] = packages[package] # iprefix -> provider prefix providers[ipprefix] = self.ipparser.getProviderSignature() # ipprefix -> rpm data = { "artefact": ARTEFACT_GOLANG_IPPREFIX_TO_RPM, "distribution": "rawhide", "product": "Fedora", "ipprefix": ipprefix } # if ipprefix2rpm artefact does not exist => report it and continue, no fallback # TODO(jchaloup): FF: fallback to generic mapping if ipprefix to pkg name # and report that "maybe" the ipprefix is provided by this package try: rpms[ipprefix] = self.artefactreaderact.call(data) except ActFailedError as e: logging.error("Unable to get mapping for %s" % package) pass for ipprefix in ipprefixes: if ipprefix not in providers: print "%sUnable to find provider for '%s' ipprefix%s" % (WHITE, ipprefix, ENDC) continue if ipprefix not in rpms: print "%sUnable to find ipprefix2rpm mapping '%s' ipprefix%s" % (WHITE, ipprefix, ENDC) continue upstream_commit = self._getCommitDate(providers[ipprefix], upstream[ipprefix]) distro_commit = self._getCommitDate(providers[ipprefix], rpms[ipprefix]["commit"]) if upstream_commit == {}: logging.error("Unable to retrieve commit info for %s %s" % (package, packages[package])) continue if distro_commit == {}: logging.error("Unable to retrieve commit info for %s %s" % (package, rpms[package]["commit"])) continue # compare commits comparison = self._comparePackages(ipprefix, upstream_commit, distro_commit) # check if packages in ipprefix class are covered in distribution not_covered = self._checkPackageCoverage(product, distribution, rpms[ipprefix]["build"], rpms[ipprefix]["rpm"], ipprefix, ipprefixes[ipprefix]) if not_covered != []: print "%s: %snot covered: %s%s" % (comparison, RED, not_covered, ENDC) else: print comparison
class SnapshotReconstructor(object): def __init__(self): # parsers self.ipparser = ImportPathParserBuilder().buildWithLocalMapping() # acts self.go_code_inspection_act = ActFactory().bake("go-code-inspection") self.scan_upstream_repository_act = ActFactory().bake("scan-upstream-repository") # snapshot self._snapshot = Snapshot() # dependency space self.detected_projects = {} self.unscanned_projects = {} self.scanned_projects = {} def _getCommitTimestamp(self, repository, commit): """Retrieve commit from a repository, returns its commits date :param repository: repository :type repository: dict :param commit: commit :type commit: hex string """ data = { "repository": repository, "commit": commit } # TODO(jchaloup): catch exception if the commit is not found commit_data = self.scan_upstream_repository_act.call(data) return commit_data["commits"][commit]["cdate"] def _findYoungestCommits(self, commits): # sort commits commits = map(lambda l: {"c": l, "d": commits[l]["cdate"]}, commits) commits = sorted(commits, key = lambda commit: commit["d"]) return commits[-1] def _findClosestCommit(self, repository, timestamp): """Get the oldest commits from the repository that is at most old as timestamp. :param repository: repository :type repository: dict :param timestamp: commit timestamp :type timestamp: integer """ # TODO(jchaloup): search for commits only on master branch!!! # other branches can be in inconsystem state with experimental features # and get picked unintensionaly data = { "repository": repository, "end_timestamp": timestamp } DAY = 3600*24 # try the last day, week, last month, last year for delta in [1, 7, 30, 365]: data["start_timestamp"] = timestamp - delta*DAY rdata = self.scan_upstream_repository_act.call(data) if rdata["commits"] != {}: return self._findYoungestCommits(rdata["commits"]) # unbound start_timestamp del data["start_timestamp"] rdata = self.scan_upstream_repository_act.call(data) if rdata["commits"] != {}: return self._findYoungestCommits(rdata["commits"]) # no commit foud => raise exception raise KeyError("Commit not found") def _detectNextDependencies(self, dependencies, ipprefix, commit_timestamp): dependencies = list(set(dependencies)) # normalize paths normalizer = ImportPathNormalizer() dependencies = map(lambda l: normalizer.normalize(l), dependencies) decomposer = ImportPathsDecomposerBuilder().buildLocalDecomposer() decomposer.decompose(dependencies) prefix_classes = decomposer.getClasses() next_projects = {} for prefix in prefix_classes: # filter out Native prefix if prefix == "Native": continue # filter out project's import path prefix if prefix == ipprefix: continue logging.warning("Processing %s ..." % prefix) # for each imported path get a list of commits in a given interval try: self.ipparser.parse(prefix) # ipprefix already covered? if self.ipparser.getImportPathPrefix() in self.detected_projects: # ip covered in the prefix class? not_covered = [] for ip in prefix_classes[prefix]: if ip not in self.detected_projects[prefix]: not_covered.append(ip) if not_covered == []: logging.warning("Prefix %s already covered" % prefix) continue logging.warning("Some paths '%s' not yet covered in '%s' prefix" % (str(not_covered), prefix)) # scan only ips not yet covered prefix_classes[prefix] = not_covered provider = self.ipparser.getProviderSignature() provider_prefix = self.ipparser.getProviderPrefix() except ValueError as e: raise ReconstructionError("Prefix provider error: %s" % e) try: closest_commit = self._findClosestCommit(provider, commit_timestamp) except KeyError as e: raise ReconstructionError("Closest commit to %s timestamp for %s not found" % (commit_timestamp, provider_prefix)) # update packages to scan next_projects[prefix] = { "ipprefix": prefix, "paths": map(lambda l: str(l), prefix_classes[prefix]), "provider": provider, "commit": closest_commit["c"], #"timestamp": closest_commit["d"], "provider_prefix": provider_prefix } return next_projects def _detectDirectDependencies(self, repository, commit, ipprefix, commit_timestamp, mains, tests): data = { "type": "upstream_source_code", "project": "github.com/coreos/etcd", "commit": commit, "ipprefix": ipprefix, "directories_to_skip": [] } packages_artefact = self.go_code_inspection_act.call(data) # collect dependencies direct_dependencies = [] for package in packages_artefact["data"]["dependencies"]: direct_dependencies = direct_dependencies + map(lambda l: l["name"], package["dependencies"]) if mains != []: paths = {} for path in packages_artefact["data"]["main"]: paths[path["filename"]] = path["dependencies"] for main in mains: if main not in paths: raise ReconstructionError("Main package file %s not found" % main) direct_dependencies = direct_dependencies + paths[main] if tests: for dependencies in map(lambda l: l["dependencies"], packages_artefact["data"]["tests"]): direct_dependencies = direct_dependencies + dependencies # remove duplicates direct_dependencies = list(set(direct_dependencies)) next_projects = self._detectNextDependencies(direct_dependencies, ipprefix, commit_timestamp) # update detected projects for project in next_projects: self.detected_projects[project] = next_projects[project]["paths"] # update packages to scan for prefix in next_projects: if prefix in self.unscanned_projects: continue self.unscanned_projects[prefix] = copy.deepcopy(next_projects[prefix]) self.scanned_projects[prefix] = copy.deepcopy(next_projects[prefix]) def _detectIndirectDependencies(self, ipprefix, commit_timestamp): nodes = [] next_projects = {} for prefix in self.unscanned_projects: # get dataset dataset = ProjectDatasetBuilder( self.unscanned_projects[prefix]["provider_prefix"], self.unscanned_projects[prefix]["commit"] ).build() # construct dependency graph from the dataset graph = DatasetDependencyGraphBuilder().build(dataset, LEVEL_GOLANG_PACKAGES) # get the subgraph of evolved dependency's packages subgraph = GraphUtils.truncateGraph(graph, self.unscanned_projects[prefix]["paths"]) # get dependencies from the subgraph package_nodes = filter(lambda l: l.startswith(self.unscanned_projects[prefix]["ipprefix"]), subgraph.nodes()) label_edges = dataset.getLabelEdges() for node in package_nodes: nodes = nodes + label_edges[node] nodes = list(set(nodes)) next_projects = self._detectNextDependencies(nodes, ipprefix, commit_timestamp) if next_projects == {}: return False # update packages to scan one_at_least = False self.unscanned_projects = {} for prefix in next_projects: # prefix already covered? Just extend the current coverage if prefix in self.detected_projects: for ip in next_projects[prefix]["paths"]: if str(ip) not in self.detected_projects[prefix]: self.detected_projects[prefix].append(ip) self.scanned_projects[prefix]["paths"].append(ip) continue one_at_least = True self.unscanned_projects[prefix] = copy.deepcopy(next_projects[prefix]) self.scanned_projects[prefix] = copy.deepcopy(next_projects[prefix]) self.detected_projects[prefix] = copy.deepcopy(next_projects[prefix]["paths"]) return one_at_least def reconstruct(self, repository, commit, ipprefix, mains = [], tests = False): """Reconstruct snapshot :param repository: project repository :type repository: dict :param commit: repository commit :type commit: string :param ipprefix: import path prefix :type ipprefix: string :param mains: list of main packages with root path to go file to cover, implicitly no main package, just devel :type mains: [string] :param tests: cover unit tests as well, default is False :type tests: boolean """ # clear snapshot self._snapshot.clear() # get commit date of project's commit commit_timestamp = self._getCommitTimestamp(repository, commit) # get direct dependencies logging.info("=============DIRECT==============") self._detectDirectDependencies(repository, commit, ipprefix, commit_timestamp, mains, tests) # scan detected dependencies logging.info("=============UNDIRECT==============") while self._detectIndirectDependencies(ipprefix, commit_timestamp): logging.info("=============UNDIRECT==============") # create snapshot for prefix in self.scanned_projects: for ip in sorted(self.scanned_projects[prefix]["paths"]): self._snapshot.addPackage(ip, self.scanned_projects[prefix]["commit"]) return self def snapshot(self): return self._snapshot