def __init__(
        self,
        svn_repository=None,
        svn_repository_local_copy=None,
        dsn=None,
        user='******',
        images_cache_local=None,
        images_cache_url=None,
        ):

        assert user
        self.svn_repository = SVNRepository(svn_repository=svn_repository, svn_repository_local_copy=svn_repository_local_copy)
        print 'initializing %s' % self
        self.db = DBRepository(
            dsn=dsn,
            user=user,
            repository=self,
            )
        self.db.repository = self
        if images_cache_local:
            try:
                msg = 'this path (for "images_cache_local") does not exist; %s' % images_cache_local
                assert os.path.exists(images_cache_local), msg
            except:
                print msg

        self.images_cache_local = images_cache_local
        self.images_cache_url = images_cache_url
        self.user = user
class Repository(object):

    ENABLE_SVN = False
    ENABLE_DB = True

    def __init__(
        self,
        svn_repository=None,
        svn_repository_local_copy=None,
        dsn=None,
        user='******',
        images_cache_local=None,
        images_cache_url=None,
        ):

        assert user
        self.svn_repository = SVNRepository(svn_repository=svn_repository, svn_repository_local_copy=svn_repository_local_copy)
        print 'initializing %s' % self
        self.db = DBRepository(
            dsn=dsn,
            user=user,
            repository=self,
            )
        self.db.repository = self
        if images_cache_local:
            try:
                msg = 'this path (for "images_cache_local") does not exist; %s' % images_cache_local
                assert os.path.exists(images_cache_local), msg
            except:
                print msg

        self.images_cache_local = images_cache_local
        self.images_cache_url = images_cache_url
        self.user = user

    def get_bioport_ids(self):
        """return _all_ bioport_ids in the system"""
        return self.db.get_bioport_ids()

    def get_person(self, bioport_id):
        return self.db.get_person(bioport_id=bioport_id, repository=self)

    def count_persons(self, **args):
        return self.db.count_persons(**args)

    def get_persons(self, **args):
        """Get persons satisfying the given arguments

        arguments:
            order_by - a string - default is 'sort_key'

        returns: a PersonList instance - a list of Person instances
        """
        return self.db.get_persons(**args)

    def get_persons_sequence(self, *args, **kwargs):
        return self.db.get_persons_sequence(*args, **kwargs)

    def get_bioport_id(self, url_biography):
        return self.db.get_bioport_id(url_biography=url_biography)

    def delete_person(self, person):
        if self.ENABLE_DB:
            return self.db.delete_person(person)
        if self.ENABLE_SVN:
            raise NotImplementedError()

    def count_biographies(self, **args):
        return self.db.count_biographies(**args)

    def get_biographies(self, **args):
        if self.ENABLE_DB:
            return self.db.get_biographies(**args)
        elif self.ENABLE_SVN:
            raise NotImplementedError()

    def get_biography(self, local_id=None, **args):
        return self.db.get_biography(local_id=local_id, **args)

    def redirects_to(self, bioport_id):
        return self.db.redirects_to(bioport_id)

    def add_source(self, source):
        """add a source of data to the db"""
        if source.id in [src.id for src in self.get_sources()]:
            raise ValueError('A source with id %s already exists' % source.id)
        self.db.add_source(source)
        return source

    def delete_source(self, source):
        return self.db.delete_source(source)

    def get_source(self, id):  # @ReservedAssignment
        ls = [src for src in self.get_sources() if src.id == id]
        if not ls:
            raise ValueError('No source found with id %s\nAvailabe sources are %s' % (id, [s.id for s in self.get_sources()]))
        source = ls[0]
        return source

    def get_sources(self, order_by='quality', desc=True):
        """
        return: a list of Source instances
        """
        return self.db.get_sources(order_by=order_by, desc=desc)

    def get_status_value(self, k, default=None):
        items = STATUS_VALUES
        return dict(items).get(k, default)

    def get_status_values(self):
        return STATUS_VALUES

    def get_source_types(self):
        return SOURCE_TYPES

    def get_religion_values(self):
        return RELIGION_VALUES

    def get_author(self, author_id):
        if self.ENABLE_DB:
            return self.db.get_author(author_id)
        raise NotImplementedError

    def save(self, x):
        if x.__class__ == Biography:
            self.save_biography(x)
        elif x.__class__ == Source:
            self.save_source(x)
        else:
            raise TypeError('Cannot save a object %s in the repository: unknown type' % x)

    def save_source(self, source):
        source.repository = self
        if self.ENABLE_DB:
            self.db.save_source(source)
        if self.ENABLE_SVN:
            raise NotImplementedError()

    def save_person(self, person):
        if self.ENABLE_DB:
            self.db.save_person(person)
        if self.ENABLE_SVN:
            raise NotImplementedError()

    def save_biography(self, biography, comment=''):
        biography.repository = self
        if self.ENABLE_DB:
            biography = self.db.save_biography(biography, user=self.user, comment=comment)

        if self.ENABLE_SVN:
            raise NotImplementedError()

        return biography

    def detach_biography(self, biography):
        person = self.db.detach_biography(biography)
        return person

    def delete_biographies(self, source):
        sources_ids = [src.id for src in self.get_sources()]
        if source.id not in sources_ids:
            raise ValueError("no source with id %s was found" % source.id)
        else:
            if self.ENABLE_DB:
                self.db.delete_biographies(source)
            if self.ENABLE_SVN:
                raise NotImplementedError

    def delete_biography(self, biography):
        return self.db.delete_biography(biography)

    def download_biographies(self, source, limit=None):
        """Download all biographies from source.url and add them to the repository.
        Mark any biographies that we did not find (anymore), by removing the source_url property.
        Return the number of total and skipped biographies.

        arguments:
            source: a Source instance

        returns:
             a list of biography instances
        """

        # at the URL given we find a list of links to biodes files
        # print 'Opening', source.url
        assert source.url, 'No URL was defined with the source "%s"' % source.id

        logging.info('downloading data at %s' % source.url)
        logging.info('parsing source url')

        # TODO: perhaps it would be better to check on Source.__init__ if repository argument is given
        if not source.repository:
            source.repository = self
        try:
            ls = biodes.parse_list(source.url)
            if limit:
                ls = ls[:limit]
        except etree.XMLSyntaxError, error:  # @UndefinedVariable
            raise BioPortException('Error parsing data at %s -- check if this is valid XML\n%s' % (source.url, error))

        if not ls:
            raise BioPortException('The file at %s does not contain any links to biographies' % source.url)

        # we have a valid list of biographies to download
        # first we remove all previously imported biographies at this source
        logging.info('deleting existing biographies from %s' % source)
        self.delete_biographies(source=source)
        logging.info('downloading biodes files')
        total = len(ls)
        skipped = 0
        ls.sort()
        for iteration, biourl in enumerate(ls):
            iteration += 1
            if not biourl.startswith("http:"):
                # we're dealing with a fs path
                biourl = os.path.normpath(biourl)
                if not os.path.isabs(biourl):
                    biourl = os.path.join(os.path.dirname(source.url), biourl)
            if limit and iteration > limit:
                break
            logging.info('progress %s/%s: adding biography at %s' % (iteration, len(ls), biourl))
            # create a Biography object
            bio = Biography(source_id=source.id, repository=source.repository)
            bio.from_url(biourl)
            bio.save(user='', comment=u'downloaded biography from source %s' % source)

        # remove the temp directory which has been used to extract
        # the xml files
        if ls[0].startswith("/tmp/"):
            shutil.rmtree(os.path.dirname(ls[0]))

        s = '%s biographies downloaded from source %s' % (iteration, source.id)
        logging.info(s)
        source.last_bios_update = time.time()
        self.save_source(source)

#         logging.info('deleting orphaned persons')
#         self.delete_orphaned_persons(source_id=source.id)
        return total, skipped