Exemplo n.º 1
0
  def transform_symbol(self, name, revision):
    """Transform a symbol according to the project's symbol transforms.

    Transform the symbol with the original name NAME and canonicalized
    revision number REVISION.  Return the new symbol name or None if
    the symbol should be ignored entirely.

    Log the results of the symbol transform if necessary."""

    old_name = name
    # Apply any user-defined symbol transforms to the symbol name:
    name = self.cvs_file.project.transform_symbol(
        self.cvs_file, name, revision
        )

    if name is None:
      # Ignore symbol:
      self.pdc.log_symbol_transform(old_name, None)
      Log().verbose(
          "   symbol '%s'=%s ignored in %s"
          % (old_name, revision, self.cvs_file.filename,)
          )
    else:
      if name != old_name:
        self.pdc.log_symbol_transform(old_name, name)
        Log().verbose(
            "   symbol '%s'=%s transformed to '%s' in %s"
            % (old_name, revision, name, self.cvs_file.filename,)
            )

    return name
Exemplo n.º 2
0
    def _check_invalid_tags(self, symbol_map):
        """Check for commits on any symbols that are to be converted as tags.

    SYMBOL_MAP is a map {AbstractSymbol : (Trunk|TypedSymbol)}
    indicating how each AbstractSymbol is to be converted.  If there
    is a commit on a symbol, then it cannot be converted as a tag.  If
    any tags with commits are found, output error messages describing
    the problems then raise a FatalException."""

        Log().quiet("Checking for forced tags with commits...")

        invalid_tags = []
        for symbol in symbol_map.itervalues():
            if isinstance(symbol, Tag):
                stats = self.get_stats(symbol)
                if stats.branch_commit_count > 0:
                    invalid_tags.append(symbol)

        if not invalid_tags:
            # No problems found:
            return

        s = []
        s.append('%s: The following branches cannot be forced to be tags '
                 'because they have commits:\n' % (error_prefix, ))
        for tag in invalid_tags:
            s.append('    %s\n' % (tag.name))
        s.append('\n')
        Log().error(''.join(s))

        raise FatalException()
Exemplo n.º 3
0
    def transform_symbol(self, cvs_file, symbol_name, revision):
        """Transform the symbol SYMBOL_NAME.

    SYMBOL_NAME refers to revision number REVISION in CVS_FILE.
    REVISION is the CVS revision number as a string, with zeros
    removed (e.g., '1.7' or '1.7.2').  Use the renaming rules
    specified with --symbol-transform to possibly rename the symbol.
    Return the transformed symbol name, or the original name if it
    should not be transformed."""

        newname = self.symbol_transform.transform(cvs_file, symbol_name,
                                                  revision)
        if newname is None:
            Log().warn("   symbol '%s'=%s ignored in %s" % (
                symbol_name,
                revision,
                cvs_file.filename,
            ))
        elif newname != symbol_name:
            Log().warn("   symbol '%s'=%s transformed to '%s' in %s" % (
                symbol_name,
                revision,
                newname,
                cvs_file.filename,
            ))

        return newname
Exemplo n.º 4
0
    def process_common_options(self):
        """Process the options that are compatible with --options."""

        # Adjust level of verbosity:
        for (opt, value) in self.get_options('--verbose', '-v'):
            Log().increase_verbosity()

        for (opt, value) in self.get_options('--quiet', '-q'):
            Log().decrease_verbosity()

        for (opt, value) in self.get_options('--pass', '--passes', '-p'):
            if value.find(':') >= 0:
                start_pass, end_pass = value.split(':')
                self.start_pass = self.pass_manager.get_pass_number(
                    start_pass, 1)
                self.end_pass = self.pass_manager.get_pass_number(
                    end_pass, self.pass_manager.num_passes)
            else:
                self.end_pass = \
                    self.start_pass = \
                    self.pass_manager.get_pass_number(value)

        if self.get_options('--dry-run'):
            Ctx().dry_run = True

        if self.get_options('--profile'):
            self.profiling = True
Exemplo n.º 5
0
    def start_commit(self, revnum, revprops):
        """Prints out the Subversion revision number of the commit that is
    being started."""

        Log().verbose("=" * 60)
        Log().normal("Starting Subversion r%d / %d" %
                     (revnum, self.total_revs))
Exemplo n.º 6
0
    def add_new_changeset(self, changeset):
        """Add the new CHANGESET to the graph and also to the databases."""

        if Log().is_on(Log.DEBUG):
            Log().debug('Adding changeset %r' % (changeset, ))

        self.add_changeset(changeset)
        self.store_changeset(changeset)
Exemplo n.º 7
0
    def log_leftovers(self):
        """If any TextRecords still exist, log them."""

        if self.text_records:
            Log().warn(
                "%s: internal problem: leftover revisions in the checkout cache:"
                % warning_prefix)
            for text_record in self.itervalues():
                Log().warn('    %s' % (text_record, ))
Exemplo n.º 8
0
 def get_symbol(self, symbol, stats):
     if isinstance(symbol, (Trunk, TypedSymbol)):
         return symbol
     elif stats.tag_create_count >= stats.branch_create_count:
         Log().verbose(
             'Converting symbol %s as a tag because it is more often used '
             'as a tag.' % (symbol, ))
         return Tag(symbol)
     else:
         Log().verbose(
             'Converting symbol %s as a branch because it is more often used '
             'as a branch.' % (symbol, ))
         return Branch(symbol)
Exemplo n.º 9
0
    def delete_changeset(self, changeset):
        """Remove CHANGESET from the graph and also from the databases.

    In fact, we don't remove CHANGESET from
    self._cvs_item_to_changeset_id, because in practice the CVSItems
    in CHANGESET are always added again as part of a new CHANGESET,
    which will cause the old values to be overwritten."""

        if Log().is_on(Log.DEBUG):
            Log().debug('Removing changeset %r' % (changeset, ))

        del self[changeset.id]
        del self._changeset_db[changeset.id]
Exemplo n.º 10
0
    def get_symbol(self, symbol, stats):
        if isinstance(symbol,
                      TypedSymbol) and symbol.preferred_parent_id is None:
            preferred_parent = self._get_preferred_parent(stats)
            if preferred_parent is None:
                Log().verbose('%s has no preferred parent' % (symbol, ))
            else:
                symbol.preferred_parent_id = preferred_parent.id
                Log().verbose('The preferred parent of %s is %s' % (
                    symbol,
                    preferred_parent,
                ))

        return symbol
Exemplo n.º 11
0
  def _generate_attic_cvs_files(self, cvs_directory):
    """Generate CVSFiles for the files in Attic directory CVS_DIRECTORY.

    Also add CVS_DIRECTORY to self if any files are being retained in
    that directory."""

    retained_attic_file = False

    fnames = os.listdir(cvs_directory.filename)
    fnames.sort()
    for fname in fnames:
      pathname = os.path.join(cvs_directory.filename, fname)
      if os.path.isdir(pathname):
        Log().warn("Directory %s found within Attic; ignoring" % (pathname,))
      elif fname.endswith(',v'):
        cvs_file = self._get_attic_file(cvs_directory, fname)
        if cvs_file.parent_directory == cvs_directory:
          # This file will be retained in the Attic directory.
          retained_attic_file = True
        yield cvs_file

    if retained_attic_file:
      # If any files were retained in the Attic directory, then write
      # the Attic directory to CVSFileDatabase:
      self.add_cvs_directory(cvs_directory)
Exemplo n.º 12
0
  def _process_branch_changeset(self, changeset, timestamp):
    """Process BranchChangeset CHANGESET, producing a SVNBranchCommit.

    Filter out CVSBranchNoops.  If no CVSBranches are left, don't
    generate a SVNBranchCommit."""

    if Ctx().trunk_only:
      raise InternalError(
          'BranchChangeset encountered during a --trunk-only conversion')

    cvs_branches = [
        cvs_branch
        for cvs_branch in changeset.iter_cvs_items()
        if not isinstance(cvs_branch, CVSBranchNoop)
        ]
    if cvs_branches:
      svn_commit = SVNBranchCommit(
          changeset.symbol,
          [cvs_branch.id for cvs_branch in cvs_branches],
          timestamp,
          self.revnum_generator.gen_id(),
          )
      yield svn_commit
      for cvs_branch in cvs_branches:
        Ctx()._symbolings_logger.log_branch_revision(
            cvs_branch, svn_commit.revnum
            )
    else:
      Log().debug(
          'Omitting %r because it contains only CVSBranchNoops' % (changeset,)
          )
Exemplo n.º 13
0
  def _get_attic_file(self, parent_directory, basename):
    """Return a CVSFile object for the Attic file at BASENAME.

    PARENT_DIRECTORY is the CVSDirectory that physically contains the
    file on the filesystem (i.e., the Attic directory).  It is not
    necessarily the parent_directory of the CVSFile that will be
    returned.

    Return CVSFile, whose parent directory is usually
    PARENT_DIRECTORY.parent_directory, but might be PARENT_DIRECTORY
    iff CVSFile will remain in the Attic directory."""

    try:
      return self._get_cvs_file(parent_directory, basename, True)
    except FileInAndOutOfAtticException, e:
      if Ctx().retain_conflicting_attic_files:
        Log().warn(
            "%s: %s;\n"
            "   storing the latter into 'Attic' subdirectory.\n"
            % (warning_prefix, e)
            )
      else:
        self.record_fatal_error(str(e))

      # Either way, return a CVSFile object so that the rest of the
      # file processing can proceed:
      return self._get_cvs_file(
          parent_directory, basename, True, leave_in_attic=True
          )
Exemplo n.º 14
0
  def process_project(self, project):
    Ctx()._projects[project.id] = project

    root_cvs_directory = CVSDirectory(
        self.file_key_generator.gen_id(), project, None, ''
        )
    project.root_cvs_directory_id = root_cvs_directory.id
    pdc = _ProjectDataCollector(self, project)

    found_rcs_file = False
    for cvs_file in self._generate_cvs_files(root_cvs_directory):
      pdc.process_file(cvs_file)
      found_rcs_file = True

    if not found_rcs_file:
      self.record_fatal_error(
          'No RCS files found under %r!\n'
          'Are you absolutely certain you are pointing cvs2svn\n'
          'at a CVS repository?\n'
          % (project.project_cvs_repos_path,)
          )

    pdc.summarize_symbol_transforms()

    self.num_files += pdc.num_files
    Log().verbose('Processed', self.num_files, 'files')
Exemplo n.º 15
0
  def write_new_nodes(self, nodes):
    """Write NODES to the database.

    NODES is an iterable of writable CurrentMirrorDirectory instances."""

    if len(self._cache) > self._cache_max_size:
      # The size of the cache has exceeded the threshold.  Discard the
      # old cache values (but still store the new nodes into the
      # cache):
      Log().debug('Clearing node cache')
      self._cache.clear()

    data = {}
    max_node_id = 0
    for node in nodes:
      max_node_id = max(max_node_id, node.id)
      data[node.id] = self._dump(node._entries)
      self._cache[node.id] = node._entries

    self.db[len(self._max_node_ids)] = data

    if max_node_id == 0:
      # Rewrite last value:
      self._max_node_ids.append(self._max_node_ids[-1])
    else:
      self._max_node_ids.append(max_node_id)
Exemplo n.º 16
0
def check_for_garbage():
    # We've turned off the garbage collector because we shouldn't
    # need it (we don't create circular dependencies) and because it
    # is therefore a waste of time.  So here we check for any
    # unreachable objects and generate a debug-level warning if any
    # occur:
    gc.set_debug(gc.DEBUG_SAVEALL)
    gc_count = gc.collect()
    if gc_count:
        if Log().is_on(Log.DEBUG):
            Log().debug(
                'INTERNAL: %d unreachable object(s) were garbage collected:' %
                (gc_count, ))
            for g in gc.garbage:
                Log().debug('    %s' % (g, ))
        del gc.garbage[:]
Exemplo n.º 17
0
  def _add_branch(self, name, branch_number):
    """Record that BRANCH_NUMBER is the branch number for branch NAME,
    and derive and record the revision from which NAME sprouts.
    BRANCH_NUMBER is an RCS branch number with an odd number of
    components, for example '1.7.2' (never '1.7.0.2').  Return the
    _BranchData instance (which is usually newly-created)."""

    branch_data = self.branches_data.get(branch_number)

    if branch_data is not None:
      Log().warn(
          "%s: in '%s':\n"
          "   branch '%s' already has name '%s',\n"
          "   cannot also have name '%s', ignoring the latter\n"
          % (warning_prefix,
             self.cvs_file.filename, branch_number,
             branch_data.symbol.name, name)
          )
      return branch_data

    symbol = self.pdc.get_symbol(name)
    branch_data = _BranchData(
        self.collect_data.item_key_generator.gen_id(), symbol, branch_number
        )
    self.branches_data[branch_number] = branch_data
    return branch_data
Exemplo n.º 18
0
  def _eliminate_trivial_duplicate_defs(self, symbol_defs):
    """Iterate through SYMBOL_DEFS, Removing identical duplicate definitions.

    Duplicate definitions of symbol names have been seen in the wild,
    and they can also happen when --symbol-transform is used.  If a
    symbol is defined to the same revision number repeatedly, then
    ignore all but the last definition."""

    # Make a copy, since we have to iterate through the definitions
    # twice:
    symbol_defs = list(symbol_defs)

    # A map { (name, revision) : [index,...] } of the indexes where
    # symbol definitions name=revision were found:
    known_definitions = {}
    for (i, symbol_def) in enumerate(symbol_defs):
      known_definitions.setdefault(symbol_def, []).append(i)

    # A set of the indexes of entries that have to be removed from
    # symbol_defs:
    dup_indexes = set()
    for ((name, revision), indexes) in known_definitions.iteritems():
      if len(indexes) > 1:
        Log().verbose(
            "in %r:\n"
            "   symbol %s:%s defined multiple times; ignoring duplicates\n"
            % (self.cvs_file.filename, name, revision,)
            )
        dup_indexes.update(indexes[:-1])

    for (i, symbol_def) in enumerate(symbol_defs):
      if i not in dup_indexes:
        yield symbol_def
Exemplo n.º 19
0
    def process_options(self):
        # Consistency check for options and arguments.
        if len(self.args) == 0:
            self.usage()
            sys.exit(1)

        if len(self.args) > 1:
            Log().error(error_prefix +
                        ": must pass only one CVS repository.\n")
            self.usage()
            sys.exit(1)

        cvsroot = self.args[0]

        self.process_extraction_options()
        self.process_output_options()
        self.process_symbol_strategy_options()
        self.process_property_setter_options()

        # Create the default project (using ctx.trunk, ctx.branches, and
        # ctx.tags):
        self.add_project(
            cvsroot,
            trunk_path=self.options.trunk_base,
            branches_path=self.options.branches_base,
            tags_path=self.options.tags_base,
            symbol_transforms=self.options.symbol_transforms,
            symbol_strategy_rules=self.options.symbol_strategy_rules,
        )
Exemplo n.º 20
0
  def _resolve_branch_dependencies(self):
    """Resolve dependencies involving branches."""

    for branch_data in self.sdc.branches_data.values():
      # The branch_data's parent has the branch as a child regardless
      # of whether the branch had any subsequent commits:
      try:
        parent_data = self._rev_data[branch_data.parent]
      except KeyError:
        Log().warn(
            'In %r:\n'
            '    branch %r references non-existing revision %s\n'
            '    and will be ignored.'
            % (self.cvs_file.filename, branch_data.symbol.name,
               branch_data.parent,))
        del self.sdc.branches_data[branch_data.branch_number]
      else:
        parent_data.branches_data.append(branch_data)

        # If the branch has a child (i.e., something was committed on
        # the branch), then we store a reference to the branch_data
        # there, define the child's parent to be the branch's parent,
        # and list the child in the branch parent's branches_revs_data:
        if branch_data.child is not None:
          child_data = self._rev_data[branch_data.child]
          assert child_data.parent_branch_data is None
          child_data.parent_branch_data = branch_data
          assert child_data.parent is None
          child_data.parent = branch_data.parent
          parent_data.branches_revs_data.append(branch_data.child)
Exemplo n.º 21
0
    def flush(self):
        Log().debug('Flushing cache for %s' % (self, ))

        pairs = [(i, s) for (i, (dirty, s)) in self._cache.items() if dirty]

        if pairs:
            pairs.sort()
            old_i = None
            f = self.f
            for (i, s) in pairs:
                if i == old_i:
                    # No seeking needed
                    pass
                elif i <= self._limit_written:
                    # Just jump there:
                    f.seek(i * self._record_len)
                else:
                    # Jump to the end of the file then write _empty_values until
                    # we reach the correct location:
                    f.seek(self._limit_written * self._record_len)
                    while self._limit_written < i:
                        f.write(self.packer.empty_value)
                        self._limit_written += 1
                f.write(s)
                old_i = i + 1
                self._limit_written = max(self._limit_written, old_i)

            self.f.flush()

        self._cache.clear()
Exemplo n.º 22
0
 def log(self, symbol):
     Log().verbose(
         'Converting symbol %s as a branch because it matches regexp "%s".'
         % (
             symbol,
             self.regexp.pattern,
         ))
Exemplo n.º 23
0
  def __init__(self, configfilename, ignore_case=True):
    config = ConfigParser.ConfigParser()
    if ignore_case:
      self.transform_case = self.squash_case
    else:
      config.optionxform = self.preserve_case
      self.transform_case = self.preserve_case

    configtext = open(configfilename).read()
    if self.comment_re.search(configtext):
      Log().warn(
          '%s: Please be aware that a space followed by a\n'
          'semicolon is sometimes treated as a comment in configuration\n'
          'files.  This pattern was seen in\n'
          '    %s\n'
          'Please make sure that you have not inadvertently commented\n'
          'out part of an important line.'
          % (warning_prefix, configfilename,)
          )

    config.readfp(StringIO(configtext), configfilename)
    self.patterns = []
    sections = config.sections()
    sections.sort()
    for section in sections:
      if self.transform_case(section) == 'auto-props':
        patterns = config.options(section)
        patterns.sort()
        for pattern in patterns:
          value = config.get(section, pattern)
          if value:
            self._add_pattern(pattern, value)
Exemplo n.º 24
0
    def remove_initial_branch_deletes(self, metadata_db):
        """If the first revision on a branch is an unnecessary delete, remove it.

    If a file is added on a branch (whether or not it already existed
    on trunk), then new versions of CVS add a first branch revision in
    the 'dead' state (to indicate that the file did not exist on the
    branch when the branch was created) followed by the second branch
    revision, which is an add.  When we encounter this situation, we
    sever the branch from trunk and delete the first branch
    revision."""

        for lod_items in self.iter_lods():
            if self._initial_branch_delete_unneeded(lod_items, metadata_db):
                cvs_revision = lod_items.cvs_revisions[0]
                Log().debug('Removing unnecessary initial branch delete %s' %
                            (cvs_revision, ))
                cvs_branch = lod_items.cvs_branch
                cvs_rev_source = self[cvs_branch.source_id]
                cvs_rev_next = lod_items.cvs_revisions[1]

                # Delete cvs_revision:
                del self[cvs_revision.id]
                cvs_rev_next.prev_id = None
                self.root_ids.add(cvs_rev_next.id)
                cvs_rev_source.branch_commit_ids.remove(cvs_revision.id)

                # Delete the CVSBranch on which it is located:
                del self[cvs_branch.id]
                cvs_rev_source.branch_ids.remove(cvs_branch.id)
Exemplo n.º 25
0
class SymbolMapper(SymbolTransform):
  """A SymbolTransform that transforms specific symbol definitions.

  The user has to specify the exact CVS filename, symbol name, and
  revision number to be transformed, and the new name (or None if the
  symbol should be ignored).  The mappings can be set via a
  constructor argument or by calling __setitem__()."""

  def __init__(self, items=[]):
    """Initialize the mapper.

    ITEMS is a list of tuples (cvs_filename, symbol_name, revision,
    new_name) which will be set as mappings."""

    # A map {(cvs_filename, symbol_name, revision) : new_name}:
    self._map = {}

    for (cvs_filename, symbol_name, revision, new_name) in items:
      self[cvs_filename, symbol_name, revision] = new_name

  def __setitem__(self, (cvs_filename, symbol_name, revision), new_name):
    """Set a mapping for a particular file, symbol, and revision."""

    key = (cvs_filename, symbol_name, revision)
    if key in self._map:
      Log().warn(
          'Overwriting symbol transform for\n'
          '    filename=%r symbol=%s revision=%s'
          % (cvs_filename, symbol_name, revision,)
          )
    self._map[key] = new_name
Exemplo n.º 26
0
  def _process_revision_changeset(self, changeset, timestamp):
    """Process CHANGESET, using TIMESTAMP as the commit time.

    Create and yield one or more SVNCommits in the process.  CHANGESET
    must be an OrderedChangeset.  TIMESTAMP is used as the timestamp
    for any resulting SVNCommits."""

    if not changeset.cvs_item_ids:
      Log().warn('Changeset has no items: %r' % changeset)
      return

    Log().verbose('-' * 60)
    Log().verbose('CVS Revision grouping:')
    Log().verbose('  Time: %s' % time.ctime(timestamp))

    # Generate an SVNCommit unconditionally.  Even if the only change in
    # this group of CVSRevisions is a deletion of an already-deleted
    # file (that is, a CVS revision in state 'dead' whose predecessor
    # was also in state 'dead'), the conversion will still generate a
    # Subversion revision containing the log message for the second dead
    # revision, because we don't want to lose that information.

    cvs_revs = list(changeset.iter_cvs_items())
    if cvs_revs:
      cvs_revs.sort(lambda a, b: cmp(a.cvs_file.filename, b.cvs_file.filename))
      svn_commit = SVNPrimaryCommit(
          cvs_revs, timestamp, self.revnum_generator.gen_id()
          )

      yield svn_commit

      for cvs_rev in cvs_revs:
        Ctx()._symbolings_logger.log_revision(cvs_rev, svn_commit.revnum)

      # Generate an SVNPostCommit if we have default branch revs.  If
      # some of the revisions in this commit happened on a non-trunk
      # default branch, then those files have to be copied into trunk
      # manually after being changed on the branch (because the RCS
      # "default branch" appears as head, i.e., trunk, in practice).
      # Unfortunately, Subversion doesn't support copies with sources
      # in the current txn.  All copies must be based in committed
      # revisions.  Therefore, we generate the copies in a new
      # revision.
      for svn_post_commit in self._post_commit(
            cvs_revs, svn_commit.revnum, timestamp
            ):
        yield svn_post_commit
Exemplo n.º 27
0
 def get_symbol(self, symbol, stats):
     if isinstance(symbol, (Trunk, TypedSymbol)):
         return symbol
     else:
         Log().verbose(
             'Converting symbol %s as a tag because no other rules applied.'
             % (symbol, ))
         return Tag(symbol)
Exemplo n.º 28
0
 def summarize_symbol_transforms(self):
   if self.symbol_transform_counts and Log().is_on(Log.NORMAL):
     log = Log()
     log.normal('Summary of symbol transforms:')
     transforms = self.symbol_transform_counts.items()
     transforms.sort()
     for ((old_name, new_name), count) in transforms:
       if new_name is None:
         log.normal('    "%s" ignored in %d files' % (old_name, count,))
       else:
         log.normal(
             '    "%s" transformed to "%s" in %d files'
             % (old_name, new_name, count,)
             )
Exemplo n.º 29
0
 def usage(self):
     Log().write(
         usage_message_template % {
             'progname': self.progname,
             'trunk_base': config.DEFAULT_TRUNK_BASE,
             'branches_base': config.DEFAULT_BRANCHES_BASE,
             'tags_base': config.DEFAULT_TAGS_BASE,
             'svn_keywords_value': config.SVN_KEYWORDS_VALUE,
         })
Exemplo n.º 30
0
    def check_link_consistency(self):
        """Check that the CVSItems are linked correctly with each other."""

        for cvs_item in self.values():
            try:
                cvs_item.check_links(self)
            except AssertionError:
                Log().warn('Link consistency error in %s\n' % (cvs_item, ))
                raise