Пример #1
0
def populate_sql_with_full_dependency_info(
    deps_elaborated,
    versions_by_package, # <---- NOT USED. TODO: Remove.
    packages_without_available_version_info,
    dists_with_missing_dependencies,
    db_fname=None):
  """
  (Write this docstring last.)
  """
  log = depresolve.logging.getLogger('populate_sql_with_full_dependency_info')

  log.info("Initializing db")

  # Initialize the sqlite3 database that will be populated with dependency
  # information as interpreted from the json files above.
  initialize(db_fname)

  for distkey in deps_elaborated: # for every dist,

    log.info("Working through " + distkey + "'s dependencies.")
    for e_dep in deps_elaborated[distkey]: # for every one of its dependencies,

      satisfying_packagename = e_dep[0]
      list_of_satisfying_versions = e_dep[1]
      specstring = e_dep[2]
      # We don't need the SpecifierSet, element 3 (4th) of the tuple right now.

      log.info("  satisfying_packagename:" + satisfying_packagename)
      log.info("  list_of_satisfying_versions: " +
          str(list_of_satisfying_versions))
      log.info("  specstring: " + specstring)

      # First, let's add the dependency specifier to that table.
      add_to_table(
          SQL_DEP_SPECIFIER_TABLE,
          distkey,
          satisfying_packagename,
          specstring
          )

      # Now let's add every satisfying version to the full dependency info
      # table.
      for version in list_of_satisfying_versions:
        satisfying_distkey = depdata.distkey_format(
            satisfying_packagename, version)

        add_to_table(
            SQL_DEPENDENCY_TABLE,
            distkey, # depending dist: 'codegrapher(0.1.1)'
            satisfying_packagename, # package depended on: 'click'
            satisfying_distkey # one distkey that could satisfy: 'click(1.0)'
        )

  flush()
def get_distkey_from_full_filename(fname_full):
  """
  Given a full filename of an sdist (a .tar.gz in a bandersnatch mirror, say,
  of the form e.g. /srv/.../packagename/packagename-1.0.0.tar.gz), return the
  distkey, the key I use to identify the distribution, format currently
  'packname(version)'.

  Also perform some normalizations to match what we can expect from pip.
  """

  #     get position of last / in full filename
  i_of_last_slash = fname_full.rfind('/')
  #     get position of 2nd to last / in full filename
  i_of_second_to_last_slash = fname_full[: i_of_last_slash].rfind('/')
  #     get position of .tar.gz in full filename
  i_of_targz = fname_full.rfind('.tar.gz')

  # Parent directory roughly dictates the package name.
  parent_dir = fname_full[i_of_second_to_last_slash + 1 : i_of_last_slash]
  unnormalized_packagename = parent_dir

  unnormalized_package_and_version = \
      fname_full[i_of_last_slash + 1 : i_of_targz].lower()

  # Subtract the unnormalized packagename to get the unnormalized version str.
  unnormalized_version = \
      unnormalized_package_and_version[len(unnormalized_packagename) + 1 :]

  # Now normalize them both and combine them into a normalized distkey.

  packname = depdata.normalize_package_name(unnormalized_packagename)
  version = depdata.normalize_version_string(unnormalized_version)

  distkey = depdata.distkey_format(packname, version)

  return distkey
def main():
  # Some defaults:
  n_sdists_to_process = 0 # debug; max packages to explore during debug - overriden by --n=N argument.
  conflict_model = 3
  no_skip = False
  careful_skip = False
  use_local_index = False
  use_local_index_old = False
  #run_all_conflicting = False

  # Files and directories.
  assert(os.path.exists(WORKING_DIRECTORY)), 'Working dir does not exist...??'

  # Ensure that appropriate directory for downloaded distros exists.
  # This would be terrible to duplicate if scraping a large number of packages.
  # One such sdist cache per system! Gets big.
  if not os.path.exists(TEMPDIR_FOR_DOWNLOADED_DISTROS):
    os.makedirs(TEMPDIR_FOR_DOWNLOADED_DISTROS)



  logger.info("scrape_deps_and_detect_conflicts - Version 0.5")
  distkeys_to_inspect_not_normalized = [] # not-yet-normalized user input, potentially filled with distkeys to check, from arguments
  distkeys_to_inspect = [] # list after argument normalization

  # Argument processing.
  # If we have arguments coming in, treat those as the packages to inspect.
  if len(sys.argv) > 1:
    for arg in sys.argv[1:]:
      if arg.startswith("--n="):
        n_sdists_to_process = int(arg[4:])
      elif arg == "--cm1":
        conflict_model = 1
      elif arg == "--cm2":
        conflict_model = 2
      elif arg == "--cm3":
        conflict_model = 3
      elif arg == "--noskip":
        no_skip = True
      elif arg == '--carefulskip':
        careful_skip = True
      elif arg == "--local-old":
        # without ='<directory>' means we pull alphabetically from local PyPI
        # mirror at /srv/pypi/
        # Parse .tar.gz files as they appear in bandersnatch version <= 1.8
        # For newer versions of bandersnatch, the sdist files are stored
        # differently (not in project-based directories) and so the argument
        # --local should be used instead.
        use_local_index_old = True
      elif arg == "--local":
        # without ='<directory>' means we pull from local PyPI mirror at
        # /srv/pypi/
        # Parse .tar.gz files as they appear in bandersnatch version 1.11
        # For bandersnatch 1.11, the sdist files are stored differently than in
        # <1.8. They are no longer kept in project-based directories).
        # If you are using a version of bandersnatch <=1.8, the argument
        # --local-old should be used instead.
        use_local_index = True
      #elif arg == '--conflicting':
      #  # Operate locally and run on the distkeys provided in the indicated
      #  # file, each on its own line.
      #  use_local_index = True
      #  run_all_conflicting = True
      else:
        distkeys_to_inspect_not_normalized.append(arg) # e.g. 'motorengine(0.7.4)'
        # For simplicity right now, I'll use one mode or another, not both.
        # Last arg has it if both.


  # Normalize any input distkeys we were given.
  for distkey in distkeys_to_inspect_not_normalized:
    assert '(' in distkey and distkey.endswith(')'), 'Invalid input.'
    distkey = depdata.normalize_distkey(distkey)
    distkeys_to_inspect.append(distkey)


  # Were we not given any distkeys to inspect?
  if not distkeys_to_inspect:# and not run_all_conflicting:

    if not use_local_index and not use_local_index_old:
      # If we're not using a local index, we have nothing to do.
      raise ValueError('You neither specified distributions to scrape nor '
          '(alternatively) indicated that they should be chosen from a local '
          'mirror.')

    elif use_local_index_old:
      # If we were told to work with a local mirror, but weren't given specific
      # sdists to inspect, we'll scan everything in
      # BANDERSNATCH_MIRROR_SDIST_DIR until we have n_sdists_to_process sdists.
      # There is a better way to do this, but I'll leave this as is for now.

      # Ensure that the local PyPI mirror directory exists first.
      if not os.path.exists(BANDERSNATCH_MIRROR_SDIST_DIR):
        raise Exception('--- Exception. Expecting a bandersnatched mirror of '
            'PyPI at ' + BANDERSNATCH_MIRROR_SDIST_DIR + ' but that directory '
            'does not exist.')
      i = 0
      for dir, subdirs, files in os.walk(BANDERSNATCH_MIRROR_SDIST_DIR):
        for fname in files:
          if is_sdist(fname):
            tarfilename_full = os.path.join(dir, fname)
            # Deduce package names and versions from sdist filename.
            distkey = get_distkey_from_full_filename(tarfilename_full)
            distkeys_to_inspect.append(distkey)
            i += 1
            # awkward control structures, but saving debug run time. tidy later
            if i >= n_sdists_to_process:
              break
        if i >= n_sdists_to_process:
          break

    else: # use_local_index (modern bandersnatch version)
      assert use_local_index, 'Programming error.'
      # # sdists live here: /srv/pypi/web/packages/??/??/*/*.tar.gz
      # # Can implement this such that it checks those places.
      # for name1 in os.listdir(BANDERSNATCH_NEW_MIRROR_SDIST_DIR):
      #   if len(name1) != 2:
      #     continue
      #   for name2 in os.listdir(os.path.join(
      #       BANDERSNATCH_NEW_MIRROR_SDIST_DIR, name1)):
      #     if len(name2) != 2:
      #       continue
      #     for name3 in os.listdir(os.path.join(
      #         BANDERSNATCH_NEW_MIRROR_SDIST_DIR, name1, name2)):
      #       if len(name3) != 60:
      #         continue
      #       for fname in os.listdir():
      #  #.... No, this is not going to unambiguously get me the package name
      #  # in the way that it used to in older versions of bandersnatch.
      #  # Rather than dealing with unexpected naming consequences, I'll go
      #  # with the following even more annoying hack....

      # A dictionary of all versions of all packages on the mirror,
      # collected out-of-band (via xml-rpc at same time as mirroring occurred).
      vbp_mirror = json.load(open('data/versions_by_package.json', 'r'))
      i = 0
      for package in vbp_mirror:
        if i >= n_sdists_to_process:
          break

        for version in vbp_mirror[package]:

          if i >= n_sdists_to_process:
            break

          distkey = depdata.distkey_format(package, version)
          distkeys_to_inspect.append(distkey)

          i += 1



  # We should now have distkeys to inspect (unless run_all_conflicting is True).


  # Load the dependencies, conflicts, and blacklist databases.
  # The blacklist is a list of runs that resulted in errors or runs that were
  # manually added because, for example, they hang seemingly forever or take an
  # inordinate length of time.
  depdata.ensure_data_loaded([conflict_model])

  # Alias depdata.conflicts_db to the relevant conflicts db. (Ugly)
  depdata.set_conflict_model_legacy(conflict_model) # should remove this


  #if run_all_conflicting:
  #  distkeys_to_inspect = [distkey for distkey in depdata.conflicts_3_db if
  #      depdata.conflicts_3_db[distkey]]


  n_inspected = 0
  n_successfully_processed = 0
  last_wrote_at = 0

  # Now take all of the distkeys ( e.g. 'python-twitter(0.2.1)' ) indicated and
  # run on them.
  for distkey in distkeys_to_inspect:
    
    # To avoid losing too much data, make sure we at least write data to disk
    # about every 100 successfully processed or 10000 inspected dists. Avoid
    # writing repeatedly in edge cases (e.g. when we write after 100
    # successfully processed and then have to keep writing for every skip that
    # occurs after that.
    progress = n_inspected + n_successfully_processed * 100
    if progress > last_wrote_at + 10000:
      last_wrote_at = progress
      logger.info("Writing early.")
      depdata.write_data_to_files([conflict_model])


    # The skip conditions.

    # If dist is in the blacklist for the same version of python we're running.
    blacklisted = distkey in depdata.blacklist \
        and sys.version_info.major in depdata.blacklist[distkey]

    # If dist has conflict info saved already
    already_in_conflicts = distkey in depdata.conflicts_db

    # Do we have dep info for the dist? Not a skip condition, but part of
    # careful_skip tests.
    already_in_dependencies = distkey in depdata.dependencies_by_dist


    # If we're not in no_skip mode, perform the skip checks.
    # Skip checks. If the dist is blacklisted or we already have dependency
    # data, then skip it - unless we're in careful skip mode and we don't
    # have dependency data for the dist.
    if not no_skip and (blacklisted or already_in_conflicts):

      # If dist isn't blacklisted, we already have conflict info, there's no
      # dependency info, and careful skip is on, don't actually skip.
      if careful_skip and not already_in_dependencies and not blacklisted:
        print('---    Not skipping ' + distkey + ': ' +
            'Already have conflict data, however there is no dependency info '
            'for the dist, the dist is not blacklisted, and we are in '
            'careful_skip mode.')

      else: # Skip, since we don't have a reason not to.
        n_inspected += 1
        print('---    SKIP -- ' + distkey + ': ' +
            'Blacklisted. '*blacklisted +
            'Already have conflict data. '*already_in_conflicts +
            '(Finished ' + str(n_inspected) + ' out of ' +
            str(len(distkeys_to_inspect)) + ')')
        continue


    # If we didn't skip, process the dist.

    packagename = depdata.get_packname(distkey)
    version_string = depdata.get_version(distkey)
    #assert(distkey.rfind(')') == len(distkey) - 1)
    formatted_requirement = packagename + "==" + version_string
    exitcode = None
    assert(conflict_model in [1, 2, 3])

    # Construct the argument list.
    # Include argument to pass to pip to tell it not to prod users about our
    # strange pip version (lest they follow that instruction and install a
    # standard pip version):
    pip_arglist = [
      'install',
      '-d', TEMPDIR_FOR_DOWNLOADED_DISTROS,
      '--disable-pip-version-check',
      '--find-dep-conflicts', str(conflict_model),
      '--quiet']
    
    if use_local_index:
      pip_arglist.extend(['-i', BANDERSNATCH_MIRROR_INDEX_DIR])

    pip_arglist.append(formatted_requirement)

    # With arg list constructed, call pip.main with it to run a modified pip
    # install attempt (will not install).
    # This assumes that we're dealing with my pip fork version 8.0.0.dev0seb).
    print('---    Sending ' + distkey + ' to pip.')
    logger.debug('Scraper says: before pip call, len(deps) is ' +
        str(len(depdata.dependencies_by_dist)))

    # Call pip, with a 5 minute timeout.
    exitcode = None # scoping paranoia
    try:
      exitcode = _call_pip_with_timeout(pip_arglist)
    except timeout.TimeoutException as e: # This catch is not likely. See below
      logger.warning('pip timed out on dist ' + distkey + '(5min)!'
          ' Will treat as error. Exception follows: ' + str(e.args))
      # Set the exit code to something other than 2 or 0 and it'll be treated
      # like any old pip error below, resulting in a blacklist.
      exitcode = 1000

    # However, unfortunately, we cannot assume that pip will let that exception
    # pass up to us. It seems to take the signal, stop and clean up, and then
    # return exit code 2. This is fine, except that then we can't really
    # blacklist the process. I'd have to add a timer here, detect something
    # very close to the timeout, and guess that it timed out. /: That sucks.
    # In any case, we'll not learn that it's a process that times out, but
    # we'll just look at it as a possible conflict case. (The data recorded
    # will not list it as a conflict. Hopefully, that data is not corrupted.
    # It's unlikely that it would have been, though, so I judge this OK.)
    
    # Process the output of the pip command.
    if exitcode == 2:
      print('--- X  SDist ' + distkey + ' : pip errored out (code=' +
        str(exitcode) + '). Possible DEPENDENCY CONFLICT. Result recorded in '
        'conflicts_<...>.json. (Finished ' +
        str(n_inspected) + ' out of ' + str(len(distkeys_to_inspect)) +
        ')')
    elif exitcode == 0:
      print('--- .  SDist ' + distkey + ' : pip completed successfully. '
        'No dependency conflicts observed. (Finished ' + str(n_inspected)
        + ' out of ' + str(len(distkeys_to_inspect)) + ')')
    else:
      print('--- .  SDist ' + distkey + ': pip errored out (code=' +
        str(exitcode) + '), but it seems to have been unrelated to any dep '
        'conflict.... (Finished ' + str(n_inspected) + ' out of ' +
        str(len(distkeys_to_inspect)) + ')')
      # Store in the list of failing packages along with the python version
      # we're running. (sys.version_info.major yields int 2 or 3)
      # Contents are to eventually be a list of the major versions in which it
      # fails. We should never get here if the dist is already in the blacklist
      # for this version of python, but let's keep going even if so.
      if distkey in depdata.blacklist and sys.version_info.major in \
        depdata.blacklist[distkey] and not no_skip:
        logger.warning('  WARNING! This should not happen! ' + distkey + ' was'
          'already in the blacklist for python ' + str(sys.version_info.major)
          + ', thus it should not have been run unless we have --noskip on '
          '(which it is not)!')
      else:
      # Either the dist is not in the blacklist or it's not in the blacklist
      # for this version of python. (Sensible)
        if distkey not in depdata.blacklist: # 
          depdata.blacklist[distkey] = [sys.version_info.major]
          logger.info("  Added entry to blacklist for " + distkey)
        else:
          assert(no_skip or sys.version_info.major not in depdata.blacklist[distkey])
          depdata.blacklist[distkey].append(sys.version_info.major)
          logger.info("  Added additional entry to blacklist for " + distkey)

          
    # end of exit code processing
    n_inspected += 1
    n_successfully_processed += 1

  # end of for each tarfile/sdist

  # We're done with all packages. Write the collected data back to file.
  logger.debug("Writing.")
  depdata.write_data_to_files([conflict_model])
Пример #4
0
    n_packs_processed += 1
    print('Processing package ' + p + '  (' + str(n_packs_processed) + '/' +
        str(n_total_packages) + ')')
    # If we lack the catalog of versions for this package, get it from PyPI.
    if p not in versions_by_package:
      # The True here requests all hidden packages. (Packages on PyPI can be
      # hidden by the package maintainer. I suspect that hidden packages are
      # more likely to be packages that had issues (like potential conflicts)
      # that were hidden when fixed versions were released.
      versions_by_package[p] = client.package_releases(p, True)

    # # Alternatively, instead, generate new vbp every time.
    # versions_by_package[p] = client.package_releases(p, True)

    for v in versions_by_package[p]:
      distkey = data.distkey_format(p, v)
      distkey_to_packver_map[distkey] = (p, v)

      # If we lack the metadata for this version, fetch it.
      if distkey not in metadata_by_distkey:
        metadata_by_distkey[distkey] = client.release_data(p, v)

    print('Done with package  ' + p)

except:
  print('----- Process interrupted by exception. Dumping data before '
      're-raising. Data filenames prepended with "aborted_".')
  write_file(metadata_by_distkey, OUTPUT_FNAME_METADATA + '_aborted')
  write_file(versions_by_package, OUTPUT_FNAME_VERSIONS + '_aborted')
  write_file(distkey_to_packver_map, OUTPUT_FNAME_DISTKEYS + '_aborted')
  print('Data dumped.')
Пример #5
0
def resolve_via_depsolver(distkey,
                          deps,
                          versions_by_package=None,
                          already_converted=False):
    """
  Wrapper for the depsolver package so that it can be tested via the same
  testing I employ for my own resolver package.
  Solves a dependency structure for a given package's dependencies, using
  the external depsolver package.

  Intended to be compatible with resolver.depdata.test_resolver.
  e.g.:
  depdata.test_resolver(resolve_via_depsolver, DEPS_SIMPLE_DEPSOLVER_SOLUTION,
      'X(1)', DEPS_SIMPLE, use_raw_deps=True)

  Converts the output of depsolve back into a comprehensible format for
  resolver.resolvability. Mapping involves some ugly fudging of version
  strings.

  Raises depresolve.UnresolvableConflictError if depsolver seems to detect
  an unresolvable conflict (which it does by raising, of all things, a
  NotImplementedError ): )

  If optional arg 'already_converted' is set to True, we take deps as depsolver
  compatible deps (PackageInfos), skipping any conversion process.

  Throws:
   - timeout.TimeoutException if the process takes longer than 5 minutes.

  """
    # Convert the dependencies into a format for depsolver, if they are not
    # already in a depsolver-friendly format.
    converted_dists = []
    dists_unable_to_convert = []

    if already_converted:
        converted_dists = deps
    else:
        (converted_dists, dists_unable_to_convert) = \
            convert_packs_to_packageinfo_for_depsolver(deps)

    # Create a depsolver "Repository" object containing a PackageInfo object for
    # each dist we know about from the deps dictionary of distributions.
    # NOTE: Inserting weird hack for now. These packages may already have a repo
    # for whatever reason. THIS HACK IS BAD AND MUST BE TEMPORARY.
    repo = None
    if converted_dists[0]._repository is not None:
        repo = converted_dists[0]._repository
    else:
        repo = depsolver.Repository(converted_dists)

    # Create an empty "Repository" to indicate nothing installed yet.
    installed_repo = depsolver.Repository()

    # A depsolver Pool is an abstraction encompassing the state of a repository
    # and what is installed locally. /:
    pool = depsolver.Pool([repo, installed_repo])

    # Putative installations are requests.
    request = depsolver.Request(pool)

    # This produces a sort of diff object that can be applied to the repository.
    # Installation would not actually occur. It's a request to install.
    try:
        request.install(
            depsolver.Requirement.from_string(
                convert_distkey_for_depsolver(distkey, as_req=True)))

    except DepsolverConversionError as e:
        logger.exception('Unable to convert given distkey to install into a '
                         'depsolver-compatible format. Given distkey: ' +
                         distkey)
        raise

    try:
        depsolver_solution = [
            operation for operation in depsolver.Solver(
                pool, installed_repo).solve(request)
        ]

    except NotImplementedError as e:  # Sadly, this is what depsolver throws.
        logger.debug("Caught NotImplementedError from depsolver: \n" +
                     str(e.args) + "\n")
        raise depresolve.UnresolvableConflictError(
            'Unable to resolve conflict '
            'via depsolver SAT solver. Presume that the distribution ' +
            distkey + ' has an unresolvable conflict.')

    # What depsolver will have provided there will look like:
    #  [Installing A (3.0.0), Installing C (1.0.0), Installing B (1.0.0),
    #      Installing X (1.0.0)]
    #  where each of those is a depsolver.solver.operations.Install object....
    #
    # We want to strip the nonsense in it and return something like:
    #   ['X(1)', 'B(1)', 'C(1)', 'A(3)']
    # so that the output can be assessed by the resolver.test_depdata module.
    #
    parsed_depsolver_solution = []
    for install in depsolver_solution:
        packname = convert_packname_from_depsolver(install.package.name)
        version = convert_version_from_depsolver(install.package.version)
        distkey = depdata.distkey_format(packname, version)

        parsed_depsolver_solution.append(distkey)

    return parsed_depsolver_solution
Пример #6
0
def _backtracking_satisfy(distkey_to_satisfy,
                          edeps,
                          versions_by_package,
                          _depth=0,
                          _candidates=[],
                          _conflicting_distkeys=[]):
    """
  Recursive helper to backtracking_satisfy. See comments there.

  The ADDITIONAL arguments, for recursion state, are:
    - _depth: recursion depth, optionally, for debugging output
    - _candidates: used in recursion: the list of candidates already
      chosen, both to avoid circular dependencies and also to select sane
      choices and force early conflicts (to catch all solutions)
    - _conflicting_distkeys: similar to _candidates, but lists dists that
      we've established conflict with accepted members of _candidates. Saves
      time (minimal dynamic programming)

  The ADDITIONAL returns, for recursion state, are:
    - _conflicting_distkeys, for internal use in recursion
    - str, newline separated list, of the edges in the dot graph describing the
      dependencies satisifed here
      (e.g. 'X(1) -> B(1)\nX(1) -> C(1)\nC(1) -> A(3)\nB(1) -> A(3)')


  """
    # (Not sure this check is necessary yet, but we'll see.)
    if conflicts_with(distkey_to_satisfy, _candidates):
        assert False, "This should be impossible now...."  # Can't install me! You " +\
        #"already have a different version of me! I'm: " + distkey_to_satisfy +\
        #"; you had " + str(_candidates) + " as candidates to install already."
        #   str(_candidates) + " as candidates to install already.")
        #   " a different version of me! I'm: " + distkey_to_satisfy + "; you had " +
        #   str(_candidates) + " as candidates to install already.")
        # raise depresolve.ConflictingVersionError("Can't install me! You already have"
        #   " a different version of me! I'm: " + distkey_to_satisfy + "; you had " +
        #   str(_candidates) + " as candidates to install already.")

    # I think this should also be impossible now due to checks before this call
    # would be made?
    if distkey_to_satisfy in _candidates:
        assert False, "This should also be impossible now, I think."
        # You've already got me, bud. Whatchu doin'? (Terminate recursion on
        # circular dependencies, since we're already covered.)
        return [], [], ''

    # Start the set of candidates to install with what our parent (depender)
    # already needs to install, plus ourselves.
    satisfying_candidate_set = _candidates + [
        distkey_to_satisfy,
    ]

    # Start a list of distkeys that conflict with us while we try to fulfil our
    # dependencies. (Prevents duplicating work)
    my_conflicting_distkeys = []

    # Identify the version of the package to install on the dotgraph. /:
    dotgraph = dot_sanitize(depdata.get_packname(distkey_to_satisfy)) + \
        '[label = "' + distkey_to_satisfy + '"];\n'

    depdata.assume_dep_data_exists_for(distkey_to_satisfy, edeps)

    my_edeps = edeps[distkey_to_satisfy]  # my elaborated dependencies

    if not my_edeps:  # if no dependencies, return only what's already listed
        logger.debug('    ' * _depth + distkey_to_satisfy +
                     ' had no dependencies. '
                     'Returning just it.')
        return satisfying_candidate_set, [], ''

    for edep in my_edeps:

        satisfying_packname = edep[0]
        satisfying_versions = sort_versions(edep[1])
        chosen_version = None

        if not satisfying_versions:
            raise depresolve.NoSatisfyingVersionError(
                'Dependency of ' + distkey_to_satisfy + ' on ' +
                satisfying_packname + ' with '
                'specstring ' + edep[2] +
                ' cannot be satisfied: no versions found '
                'in elaboration attempt.')

        logger.debug('    ' * _depth + 'Dependency of ' + distkey_to_satisfy +
                     ' on ' + satisfying_packname + ' with specstring ' +
                     edep[2] + ' is '
                     'satisfiable with these versions: ' +
                     str(satisfying_versions))

        # Is there already a dist of this package in the candidate set?
        preexisting_dist_of_this_package = find_dists_matching_packname(
            satisfying_packname, satisfying_candidate_set)

        if preexisting_dist_of_this_package:
            assert 1 == len(preexisting_dist_of_this_package), \
                "Programming error." # Can't have more than 1 to begin with!
            # Set of 1 item -> 1 item.
            preexisting_dist_of_this_package = preexisting_dist_of_this_package[
                0]

            preexisting_version = \
                depdata.get_version(preexisting_dist_of_this_package)

            if preexisting_version in satisfying_versions:
                logger.debug(
                    '    ' * _depth + 'Dependency of ' + distkey_to_satisfy +
                    ' on ' + satisfying_packname + ' with specstring ' +
                    edep[2] +
                    ' is already satisfied by pre-existing candidate ' +
                    preexisting_dist_of_this_package + '. Next dependency.')
                continue

            else:
                raise depresolve.ConflictingVersionError(
                    'Dependency of ' + distkey_to_satisfy + ' on ' +
                    satisfying_packname + ' with '
                    'specstring ' + edep[2] +
                    ' conflicts with a pre-existing distkey in'
                    ' the list of candidates to install: ' +
                    preexisting_dist_of_this_package)

        for candidate_version in sort_versions(satisfying_versions):

            candidate_distkey = depdata.distkey_format(satisfying_packname,
                                                       candidate_version)

            if candidate_distkey in _conflicting_distkeys:
                logger.debug('    ' * _depth + '  Skipping version ' +
                             candidate_version + '(' + candidate_distkey +
                             '): already in _conflicting_distkeys.')
                continue

            # else try this version.
            logger.debug('    ' * _depth + '  Trying version ' +
                         candidate_version)

            # Would the addition of this candidate result in a conflict?
            # Recurse and test result. Detect UnresolvableConflictError.
            # Because we're detecting such an error in the child, there's no reason
            # to still do detection of the combined set here in the parent, but I
            # will leave in an assert in case.
            try:
                (candidate_satisfying_candidate_set, new_conflicts, child_dotgraph) = \
                    _backtracking_satisfy(candidate_distkey, edeps,
                    versions_by_package, _depth+1, satisfying_candidate_set)

            # I don't know that I should be catching both. Let's see what happens.
            except (depresolve.ConflictingVersionError,
                    depresolve.UnresolvableConflictError):
                logger.debug('    ' * _depth + '  ' + candidate_version +
                             ' conflicted. '
                             'Trying next.')
                my_conflicting_distkeys.append(candidate_distkey)
                continue

            else:  # Could design it so child adds to this set, but won't yet.
                combined_satisfying_candidate_set = combine_candidate_sets(
                    satisfying_candidate_set,
                    candidate_satisfying_candidate_set)

                assert not detect_direct_conflict(combined_satisfying_candidate_set), \
                    "Programming error. See comments adjacent."

                # save the new candidates (could be designed away, but for now, keeping)
                chosen_version = candidate_version
                satisfying_candidate_set = combined_satisfying_candidate_set
                my_conflicting_distkeys.extend(new_conflicts)

                # Save the graph visualization output for the new candidate.
                #dotgraph += dot_sanitize(satisfying_packname) + '[label = "' + \
                #    candidate_distkey + '"];\n'
                dotgraph += dot_sanitize(depdata.get_packname(distkey_to_satisfy)) + \
                    ' -> ' + dot_sanitize(satisfying_packname) + ';\n' + child_dotgraph

                logger.debug('    ' * _depth + '  ' + candidate_version +
                             ' fits. Next '
                             'dependency.')
                break

        if chosen_version is None:
            raise depresolve.UnresolvableConflictError(
                'Dependency of ' + distkey_to_satisfy + ' on ' +
                satisfying_packname + ' with specstring ' + edep[2] +
                ' cannot be satisfied: versions '
                'found, but none had 0 conflicts.')

    return satisfying_candidate_set, my_conflicting_distkeys, dotgraph
Пример #7
0
def naive_satisfy(depender_distkey,
                  edeps,
                  versions_by_package=None,
                  _preexisting_candidates=[],
                  _preexisting_candidate_packs=[]):
    """
  Vaguely pip-like "simple dependency resolution". Recurse and list all dists
  that together form a simple resolution to a given distribution's dependencies
  (may have dependency conflicts and not be a true resolution).

  Where there is ambiguity, select the first result from sort_versions().
  If multiple dists depend on the same package, we get both in this result.

  This has the same level of capability as pip's dependency resolution, though
  the results are slightly different.

  Arguments:
    - depender_distkey ('django(1.8.3)'),
    - edeps (dictionary returned by depdata.deps_elaborated; see there.)
    - versions_by_package (dictionary of all distkeys, keyed by package name)

  Returns:
    - list of distkeys needed as direct or indirect dependencies to install
      depender_distkey
  """
    # Avoid circular dependencies and ignore conflicts.
    satisfying_candidate_set = _preexisting_candidates[:]
    satisfying_candidate_packs = _preexisting_candidate_packs[:]
    if depender_distkey in satisfying_candidate_set or \
        depdata.get_packname(depender_distkey) in satisfying_candidate_packs:
        return []
    else:
        satisfying_candidate_set.append(depender_distkey)
        satisfying_candidate_packs.append(
            depdata.get_packname(depender_distkey))

    if versions_by_package is None:
        versions_by_package = depdata.generate_dict_versions_by_package(edeps)

    depdata.assume_dep_data_exists_for(depender_distkey, edeps)

    my_edeps = edeps[depender_distkey]
    if not my_edeps:  # if no dependencies, return nothing new
        return satisfying_candidate_set

    for edep in my_edeps:
        satisfying_packname = edep[0]
        satisfying_versions = edep[1]
        if satisfying_packname in satisfying_candidate_packs:
            # Avoid circular dependencies and ignore conflicts.
            continue
        if not satisfying_versions:
            raise depresolve.NoSatisfyingVersionError(
                "Dependency of " + depender_distkey + " on " +
                satisfying_packname + " with specstring " + edep[2] +
                " cannot be satisfied: no versions found in elaboration "
                "attempt.")
        chosen_version = sort_versions(satisfying_versions)[0]  # grab first
        chosen_distkey = \
            depdata.distkey_format(satisfying_packname, chosen_version)
        #satisfying_candidate_set.append(chosen_distkey)
        #satisfying_candidate_packs.append(satisfying_packname)

        # Now recurse.
        satisfying_candidate_set = naive_satisfy(chosen_distkey, edeps,
                                                 versions_by_package,
                                                 satisfying_candidate_set,
                                                 satisfying_candidate_packs)

    return satisfying_candidate_set
Пример #8
0
def resolve_via_depsolver(distkey, deps, versions_by_package=None,
    already_converted=False):
  """
  Wrapper for the depsolver package so that it can be tested via the same
  testing I employ for my own resolver package.
  Solves a dependency structure for a given package's dependencies, using
  the external depsolver package.

  Intended to be compatible with resolver.depdata.test_resolver.
  e.g.:
  depdata.test_resolver(resolve_via_depsolver, DEPS_SIMPLE_DEPSOLVER_SOLUTION,
      'X(1)', DEPS_SIMPLE, use_raw_deps=True)

  Converts the output of depsolve back into a comprehensible format for
  resolver.resolvability. Mapping involves some ugly fudging of version
  strings.

  Raises depresolve.UnresolvableConflictError if depsolver seems to detect
  an unresolvable conflict (which it does by raising, of all things, a
  NotImplementedError ): )

  If optional arg 'already_converted' is set to True, we take deps as depsolver
  compatible deps (PackageInfos), skipping any conversion process.

  Throws:
   - timeout.TimeoutException if the process takes longer than 5 minutes.

  """
  # Convert the dependencies into a format for depsolver, if they are not
  # already in a depsolver-friendly format.
  converted_dists = []
  dists_unable_to_convert = []

  if already_converted:
    converted_dists = deps
  else:
    (converted_dists, dists_unable_to_convert) = \
        convert_packs_to_packageinfo_for_depsolver(deps)

  
  # Create a depsolver "Repository" object containing a PackageInfo object for
  # each dist we know about from the deps dictionary of distributions.
  # NOTE: Inserting weird hack for now. These packages may already have a repo
  # for whatever reason. THIS HACK IS BAD AND MUST BE TEMPORARY.
  repo = None
  if converted_dists[0]._repository is not None:
    repo = converted_dists[0]._repository
  else:
    repo = depsolver.Repository(converted_dists)

      
  # Create an empty "Repository" to indicate nothing installed yet.
  installed_repo = depsolver.Repository()

  # A depsolver Pool is an abstraction encompassing the state of a repository
  # and what is installed locally. /:
  pool = depsolver.Pool([repo, installed_repo])

  # Putative installations are requests.
  request = depsolver.Request(pool)

  # This produces a sort of diff object that can be applied to the repository.
  # Installation would not actually occur. It's a request to install.
  try:
    request.install(
        depsolver.Requirement.from_string(convert_distkey_for_depsolver(
        distkey, as_req=True)))

  except DepsolverConversionError as e:
    logger.exception('Unable to convert given distkey to install into a '
        'depsolver-compatible format. Given distkey: ' + distkey)
    raise


  try:
    depsolver_solution = [operation for operation in 
        depsolver.Solver(pool, installed_repo).solve(request)]

  except NotImplementedError as e: # Sadly, this is what depsolver throws.
    logger.debug("Caught NotImplementedError from depsolver: \n" +
        str(e.args) + "\n")
    raise depresolve.UnresolvableConflictError('Unable to resolve conflict '
        'via depsolver SAT solver. Presume that the distribution ' + distkey +
        ' has an unresolvable conflict.')

  # What depsolver will have provided there will look like:
  #  [Installing A (3.0.0), Installing C (1.0.0), Installing B (1.0.0),
  #      Installing X (1.0.0)]
  #  where each of those is a depsolver.solver.operations.Install object....
  #
  # We want to strip the nonsense in it and return something like:
  #   ['X(1)', 'B(1)', 'C(1)', 'A(3)']
  # so that the output can be assessed by the resolver.test_depdata module.
  #
  parsed_depsolver_solution = []
  for install in depsolver_solution:
    packname = convert_packname_from_depsolver(install.package.name)
    version = convert_version_from_depsolver(install.package.version)
    distkey = depdata.distkey_format(packname, version)

    parsed_depsolver_solution.append(distkey)

  return parsed_depsolver_solution
Пример #9
0
        n_packs_processed += 1
        print('Processing package ' + p + '  (' + str(n_packs_processed) +
              '/' + str(n_total_packages) + ')')
        # If we lack the catalog of versions for this package, get it from PyPI.
        if p not in versions_by_package:
            # The True here requests all hidden packages. (Packages on PyPI can be
            # hidden by the package maintainer. I suspect that hidden packages are
            # more likely to be packages that had issues (like potential conflicts)
            # that were hidden when fixed versions were released.
            versions_by_package[p] = client.package_releases(p, True)

        # # Alternatively, instead, generate new vbp every time.
        # versions_by_package[p] = client.package_releases(p, True)

        for v in versions_by_package[p]:
            distkey = data.distkey_format(p, v)
            distkey_to_packver_map[distkey] = (p, v)

            # If we lack the metadata for this version, fetch it.
            if distkey not in metadata_by_distkey:
                metadata_by_distkey[distkey] = client.release_data(p, v)

        print('Done with package  ' + p)

except:
    print('----- Process interrupted by exception. Dumping data before '
          're-raising. Data filenames prepended with "aborted_".')
    write_file(metadata_by_distkey, OUTPUT_FNAME_METADATA + '_aborted')
    write_file(versions_by_package, OUTPUT_FNAME_VERSIONS + '_aborted')
    write_file(distkey_to_packver_map, OUTPUT_FNAME_DISTKEYS + '_aborted')
    print('Data dumped.')