def __init__(self, stats_keeper): self._cvs_item_store = NewCVSItemStore( artifact_manager.get_temp_file(config.CVS_ITEMS_STORE)) self.metadata_db = MetadataDatabase( artifact_manager.get_temp_file(config.METADATA_STORE), artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE), DB_OPEN_NEW, ) self.metadata_logger = MetadataLogger(self.metadata_db) self.fatal_errors = [] self.num_files = 0 self.symbol_stats = SymbolStatisticsCollector() self.stats_keeper = stats_keeper # Key generator for CVSItems: self.item_key_generator = KeyGenerator() # Key generator for Symbols: self.symbol_key_generator = KeyGenerator()
class CollectData: """Repository for data collected by parsing the CVS repository files. This class manages the databases into which information collected from the CVS repository is stored. The data are stored into this class by _FileDataCollector instances, one of which is created for each file to be parsed.""" def __init__(self, stats_keeper): self._cvs_item_store = NewCVSItemStore( artifact_manager.get_temp_file(config.CVS_ITEMS_STORE)) self.metadata_db = MetadataDatabase( artifact_manager.get_temp_file(config.METADATA_STORE), artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE), DB_OPEN_NEW, ) self.metadata_logger = MetadataLogger(self.metadata_db) self.fatal_errors = [] self.num_files = 0 self.symbol_stats = SymbolStatisticsCollector() self.stats_keeper = stats_keeper # Key generator for CVSItems: self.item_key_generator = KeyGenerator() # Key generator for Symbols: self.symbol_key_generator = KeyGenerator() def record_fatal_error(self, err): """Record that fatal error ERR was found. ERR is a string (without trailing newline) describing the error. Output the error to stderr immediately, and record a copy to be output again in a summary at the end of CollectRevsPass.""" err = '%s: %s' % (error_prefix, err,) logger.error(err + '\n') self.fatal_errors.append(err) def add_cvs_directory(self, cvs_directory): """Record CVS_DIRECTORY.""" Ctx()._cvs_path_db.log_path(cvs_directory) def add_cvs_file_items(self, cvs_file_items): """Record the information from CVS_FILE_ITEMS. Store the CVSFile to _cvs_path_db under its persistent id, store the CVSItems, and record the CVSItems to self.stats_keeper.""" Ctx()._cvs_path_db.log_path(cvs_file_items.cvs_file) self._cvs_item_store.add(cvs_file_items) self.stats_keeper.record_cvs_file(cvs_file_items.cvs_file) for cvs_item in cvs_file_items.values(): self.stats_keeper.record_cvs_item(cvs_item) def register_trunk(self, trunk): """Create a symbol statistics record for the specified trunk LOD.""" # This causes a record to spring into existence: self.symbol_stats[trunk] def _process_cvs_file_items(self, cvs_file_items): """Process the CVSFileItems from one CVSFile.""" # Remove an initial delete on trunk if it is not needed: cvs_file_items.remove_unneeded_initial_trunk_delete(self.metadata_db) # Remove initial branch deletes that are not needed: cvs_file_items.remove_initial_branch_deletes(self.metadata_db) # If this is a --trunk-only conversion, discard all branches and # tags, then draft any non-trunk default branch revisions to # trunk: if Ctx().trunk_only: cvs_file_items.exclude_non_trunk() cvs_file_items.check_link_consistency() self.add_cvs_file_items(cvs_file_items) self.symbol_stats.register(cvs_file_items) def process_project(self, project, cvs_paths): pdc = _ProjectDataCollector(self, project) found_rcs_file = False for cvs_path in cvs_paths: if isinstance(cvs_path, CVSDirectory): self.add_cvs_directory(cvs_path) else: cvs_file_items = pdc.process_file(cvs_path) self._process_cvs_file_items(cvs_file_items) found_rcs_file = True if not found_rcs_file: self.record_fatal_error( 'No RCS files found under %r!\n' 'Are you absolutely certain you are pointing cvs2svn\n' 'at a CVS repository?\n' % (project.project_cvs_repos_path,) ) pdc.summarize_symbol_transforms() self.num_files += pdc.num_files logger.verbose('Processed', self.num_files, 'files') def _register_empty_subdirectories(self): """Set the CVSDirectory.empty_subdirectory_id members.""" directories = set( path for path in Ctx()._cvs_path_db.itervalues() if isinstance(path, CVSDirectory) ) for path in Ctx()._cvs_path_db.itervalues(): if isinstance(path, CVSFile): directory = path.parent_directory while directory is not None and directory in directories: directories.remove(directory) directory = directory.parent_directory for directory in directories: if directory.parent_directory is not None: directory.parent_directory.empty_subdirectory_ids.append(directory.id) def close(self): """Close the data structures associated with this instance. Return a list of fatal errors encountered while processing input. Each list entry is a string describing one fatal error.""" self.symbol_stats.purge_ghost_symbols() self.symbol_stats.close() self.symbol_stats = None self.metadata_logger = None self.metadata_db.close() self.metadata_db = None self._cvs_item_store.close() self._cvs_item_store = None self._register_empty_subdirectories() retval = self.fatal_errors self.fatal_errors = None return retval
class CollectData: """Repository for data collected by parsing the CVS repository files. This class manages the databases into which information collected from the CVS repository is stored. The data are stored into this class by _FileDataCollector instances, one of which is created for each file to be parsed.""" def __init__(self, revision_recorder, stats_keeper): self.revision_recorder = revision_recorder self._cvs_item_store = NewCVSItemStore( artifact_manager.get_temp_file(config.CVS_ITEMS_STORE)) self.metadata_db = MetadataDatabase( artifact_manager.get_temp_file(config.METADATA_STORE), artifact_manager.get_temp_file(config.METADATA_INDEX_TABLE), DB_OPEN_NEW, ) self.metadata_logger = MetadataLogger(self.metadata_db) self.fatal_errors = [] self.num_files = 0 self.symbol_stats = SymbolStatisticsCollector() self.stats_keeper = stats_keeper # Key generator for CVSFiles: self.file_key_generator = KeyGenerator() # Key generator for CVSItems: self.item_key_generator = KeyGenerator() # Key generator for Symbols: self.symbol_key_generator = KeyGenerator() self.revision_recorder.start() def record_fatal_error(self, err): """Record that fatal error ERR was found. ERR is a string (without trailing newline) describing the error. Output the error to stderr immediately, and record a copy to be output again in a summary at the end of CollectRevsPass.""" err = '%s: %s' % (error_prefix, err,) Log().error(err + '\n') self.fatal_errors.append(err) def add_cvs_directory(self, cvs_directory): """Record CVS_DIRECTORY.""" Ctx()._cvs_file_db.log_file(cvs_directory) def add_cvs_file_items(self, cvs_file_items): """Record the information from CVS_FILE_ITEMS. Store the CVSFile to _cvs_file_db under its persistent id, store the CVSItems, and record the CVSItems to self.stats_keeper.""" Ctx()._cvs_file_db.log_file(cvs_file_items.cvs_file) self._cvs_item_store.add(cvs_file_items) self.stats_keeper.record_cvs_file(cvs_file_items.cvs_file) for cvs_item in cvs_file_items.values(): self.stats_keeper.record_cvs_item(cvs_item) def _get_cvs_file( self, parent_directory, basename, file_in_attic, leave_in_attic=False ): """Return a CVSFile describing the file with name BASENAME. PARENT_DIRECTORY is the CVSDirectory instance describing the directory that physically holds this file in the filesystem. BASENAME must be the base name of a *,v file within PARENT_DIRECTORY. FILE_IN_ATTIC is a boolean telling whether the specified file is in an Attic subdirectory. If FILE_IN_ATTIC is True, then: - If LEAVE_IN_ATTIC is True, then leave the 'Attic' component in the filename. - Otherwise, raise FileInAndOutOfAtticException if a file with the same filename appears outside of Attic. The CVSFile is assigned a new unique id. All of the CVSFile information is filled in except mode (which can only be determined by parsing the file). Raise FatalError if the resulting filename would not be legal in SVN.""" filename = os.path.join(parent_directory.filename, basename) try: verify_svn_filename_legal(basename[:-2]) except IllegalSVNPathError, e: raise FatalError( 'File %r would result in an illegal SVN filename: %s' % (filename, e,) ) if file_in_attic and not leave_in_attic: in_attic = True logical_parent_directory = parent_directory.parent_directory # If this file also exists outside of the attic, it's a fatal # error: non_attic_filename = os.path.join( logical_parent_directory.filename, basename, ) if os.path.exists(non_attic_filename): raise FileInAndOutOfAtticException(non_attic_filename, filename) else: in_attic = False logical_parent_directory = parent_directory file_stat = os.stat(filename) # The size of the file in bytes: file_size = file_stat[stat.ST_SIZE] # Whether or not the executable bit is set: file_executable = bool(file_stat[0] & stat.S_IXUSR) # mode is not known, so we temporarily set it to None. return CVSFile( self.file_key_generator.gen_id(), parent_directory.project, logical_parent_directory, basename[:-2], in_attic, file_executable, file_size, None )