class ParseError(Exception):
    """An exception which is used to signal a parse failure.


    filename - name of the file
    lineno - line number in the file
    msg - error message

    def __init__(self, filename, lineno, msg):
        assert type(lineno) == types.IntType
        self.filename = filename
        self.lineno = lineno
        self.msg = msg

    def __str__(self):
        return self.msg

    def __repr__(self):
        return "ParseError(%s, %d, %s)" % ( ` self.filename `, self.lineno, `
                                            self.msg `)

    def print_out(self, file):
        """Writes a machine-parsable error message to file."""
        file.write("%s:%d: %s\n" % (self.filename, self.lineno, self.msg))

    printOut = function_deprecated_by(print_out)
    def __cmp__(self, other):
        return cmp(self._order, other._order)
    def __hash__(self):
        return hash(self._order)

class Release(PseudoEnum): pass

def list_releases():
    releases = {}
    rels = ("potato", "woody", "sarge", "etch", "lenny", "sid")
    for r in range(len(rels)):
        releases[rels[r]] = Release(rels[r], r)
    Release.releases = releases
    return releases

listReleases = function_deprecated_by(list_releases)

def intern_release(name, releases=list_releases()):
    if releases.has_key(name):
        return releases[name]
        return None

internRelease = function_deprecated_by(intern_release)

del listReleases
del list_releases

def read_lines_sha1(lines):
    m = hashlib.sha1()
    for l in lines:
class DB:
	In-memory database mapping packages to tags and tags to packages.
    def __init__(self):
        self.db = {}
        self.rdb = {}

    def read(self, input, tag_filter=None):
		Read the database from a file.

			# Read the system Debtags database
			db.read(open("/var/lib/debtags/package-tags", "r"))
        self.db, self.rdb = read_tag_database_both_ways(input, tag_filter)

    def qwrite(self, file):
        "Quickly write the data to a pickled file"
        cPickle.dump(self.db, file)
        cPickle.dump(self.rdb, file)

    def qread(self, file):
        "Quickly read the data from a pickled file"
        self.db = cPickle.load(file)
        self.rdb = cPickle.load(file)

    def insert(self, pkg, tags):
        self.db[pkg] = tags.copy()
        for tag in tags:
            if self.rdb.has_key(tag):
                self.rdb[tag] = set((pkg))

    def dump(self):

    def dump_reverse(self):

    dumpReverse = function_deprecated_by(dump_reverse)

    def reverse(self):
        "Return the reverse collection, sharing tagsets with this one"
        res = DB()
        res.db = self.rdb
        res.rdb = self.db
        return res

    def facet_collection(self):
		Return a copy of this collection, but replaces the tag names
		with only their facets.
        fcoll = DB()
        tofacet = re.compile(r"^([^:]+).+")
        for pkg, tags in self.iter_packagesTags():
            ftags = set([tofacet.sub(r"\1", t) for t in tags])
            fcoll.insert(pkg, ftags)
        return fcoll

    facetCollection = function_deprecated_by(facet_collection)

    def copy(self):
		Return a copy of this collection, with the tagsets copied as
        res = DB()
        res.db = self.db.copy()
        res.rdb = self.rdb.copy()
        return res

    def reverse_copy(self):
		Return the reverse collection, with a copy of the tagsets of
		this one.
        res = DB()
        res.db = self.rdb.copy()
        res.rdb = self.db.copy()
        return res

    reverseCopy = function_deprecated_by(reverse_copy)

    def choose_packages(self, package_iter):
		Return a collection with only the packages in package_iter,
		sharing tagsets with this one
        res = DB()
        db = {}
        for pkg in package_iter:
            if self.db.has_key(pkg): db[pkg] = self.db[pkg]
        res.db = db
        res.rdb = reverse(db)
        return res

    choosePackages = function_deprecated_by(choose_packages)

    def choose_packages_copy(self, package_iter):
		Return a collection with only the packages in package_iter,
		with a copy of the tagsets of this one
        res = DB()
        db = {}
        for pkg in package_iter:
            db[pkg] = self.db[pkg]
        res.db = db
        res.rdb = reverse(db)
        return res

    choosePackagesCopy = function_deprecated_by(choose_packages_copy)

    def filter_packages(self, package_filter):
		Return a collection with only those packages that match a
		filter, sharing tagsets with this one.  The filter will match
		on the package.
        res = DB()
        db = {}
        for pkg in filter(package_filter, self.db.iterkeys()):
            db[pkg] = self.db[pkg]
        res.db = db
        res.rdb = reverse(db)
        return res

    filterPackages = function_deprecated_by(filter_packages)

    def filter_packages_copy(self, filter):
		Return a collection with only those packages that match a
		filter, with a copy of the tagsets of this one.  The filter
		will match on the package.
        res = DB()
        db = {}
        for pkg in filter(filter, self.db.iterkeys()):
            db[pkg] = self.db[pkg].copy()
        res.db = db
        res.rdb = reverse(db)
        return res

    filterPackagesCopy = function_deprecated_by(filter_packages_copy)

    def filter_packages_tags(self, package_tag_filter):
		Return a collection with only those packages that match a
		filter, sharing tagsets with this one.  The filter will match
		on (package, tags).
        res = DB()
        db = {}
        for pkg, tags in filter(package_tag_filter, self.db.iteritems()):
            db[pkg] = self.db[pkg]
        res.db = db
        res.rdb = reverse(db)
        return res

    filterPackagesTags = function_deprecated_by(filter_packages_tags)

    def filter_packages_tags_copy(self, package_tag_filter):
		Return a collection with only those packages that match a
		filter, with a copy of the tagsets of this one.  The filter
		will match on (package, tags).
        res = DB()
        db = {}
        for pkg, tags in filter(package_tag_filter, self.db.iteritems()):
            db[pkg] = self.db[pkg].copy()
        res.db = db
        res.rdb = reverse(db)
        return res

    filterPackagesTagsCopy = function_deprecated_by(filter_packages_tags_copy)

    def filter_tags(self, tag_filter):
		Return a collection with only those tags that match a
		filter, sharing package sets with this one.  The filter will match
		on the tag.
        res = DB()
        rdb = {}
        for tag in filter(tag_filter, self.rdb.iterkeys()):
            rdb[tag] = self.rdb[tag]
        res.rdb = rdb
        res.db = reverse(rdb)
        return res

    filterTags = function_deprecated_by(filter_tags)

    def filter_tags_copy(self, tag_filter):
		Return a collection with only those tags that match a
		filter, with a copy of the package sets of this one.  The
		filter will match on the tag.
        res = DB()
        rdb = {}
        for tag in filter(tag_filter, self.rdb.iterkeys()):
            rdb[tag] = self.rdb[tag].copy()
        res.rdb = rdb
        res.db = reverse(rdb)
        return res

    filterTagsCopy = function_deprecated_by(filter_tags_copy)

    def has_package(self, pkg):
        """Check if the collection contains the given package"""
        return self.db.has_key(pkg)

    hasPackage = function_deprecated_by(has_package)

    def has_tag(self, tag):
        """Check if the collection contains packages tagged with tag"""
        return self.rdb.has_key(tag)

    hasTag = function_deprecated_by(has_tag)

    def tags_of_package(self, pkg):
        """Return the tag set of a package"""
        return self.db.has_key(pkg) and self.db[pkg] or set()

    tagsOfPackage = function_deprecated_by(tags_of_package)

    def packages_of_tag(self, tag):
        """Return the package set of a tag"""
        return self.rdb.has_key(tag) and self.rdb[tag] or set()

    packagesOfTag = function_deprecated_by(packages_of_tag)

    def tags_of_packages(self, pkgs):
        """Return the set of tags that have all the packages in pkgs"""
        res = None
        for p in pkgs:
            if res == None:
                res = set(self.tags_of_package(p))
                res &= self.tags_of_package(p)
        return res

    tagsOfPackages = function_deprecated_by(tags_of_packages)

    def packages_of_tags(self, tags):
        """Return the set of packages that have all the tags in tags"""
        res = None
        for t in tags:
            if res == None:
                res = set(self.packages_of_tag(t))
                res &= self.packages_of_tag(t)
        return res

    packagesOfTags = function_deprecated_by(packages_of_tags)

    def card(self, tag):
		Return the cardinality of a tag
        return self.rdb.has_key(tag) and len(self.rdb[tag]) or 0

    def discriminance(self, tag):
		Return the discriminance index if the tag.
		Th discriminance index of the tag is defined as the minimum
		number of packages that would be eliminated by selecting only
		those tagged with this tag or only those not tagged with this
        n = self.card(tag)
        tot = self.package_count()
        return min(n, tot - n)

    def iter_packages(self):
        """Iterate over the packages"""
        return self.db.iterkeys()

    iterPackages = function_deprecated_by(iter_packages)

    def iter_tags(self):
        """Iterate over the tags"""
        return self.rdb.iterkeys()

    iterTags = function_deprecated_by(iter_tags)

    def iter_packages_tags(self):
        """Iterate over 2-tuples of (pkg, tags)"""
        return self.db.iteritems()

    iterPackagesTags = function_deprecated_by(iter_packages_tags)

    def iter_tags_packages(self):
        """Iterate over 2-tuples of (tag, pkgs)"""
        return self.rdb.iteritems()

    iterTagsPackages = function_deprecated_by(iter_tags_packages)

    def package_count(self):
        """Return the number of packages"""
        return len(self.db)

    packageCount = function_deprecated_by(package_count)

    def tag_count(self):
        """Return the number of tags"""
        return len(self.rdb)

    tagCount = function_deprecated_by(tag_count)

    def ideal_tagset(self, tags):
		Return an ideal selection of the top tags in a list of tags.

		Return the tagset made of the highest number of tags taken in
		consecutive sequence from the beginning of the given vector,
		that would intersecate with the tagset of a comfortable amount
		of packages.

		Comfortable is defined in terms of how far it is from 7.

        # TODO: the scoring function is quite ok, but may need more
        # tuning.  I also center it on 15 instead of 7 since we're
        # setting a starting point for the search, not a target point
        def score_fun(x):
            return float((x - 15) * (x - 15)) / x

        hits = []
        tagset = set()
        min_score = 3
        for i in range(len(tags)):
            pkgs = self.packages_of_tags(tags[:i + 1])
            card = len(pkgs)
            if card == 0: break
            score = score_fun(card)
            if score < min_score:
                min_score = score
                tagset = set(tags[:i + 1])

        # Return always at least the first tag
        if len(tagset) == 0:
            return set(tags[:1])
            return tagset

    idealTagset = function_deprecated_by(ideal_tagset)

    def correlations(self):
		Generate the list of correlation as a tuple (hastag, hasalsotag, score).

		Every touple will indicate that the tag 'hastag' tends to also
		have 'hasalsotag' with a score of 'score'.
        for pivot in self.iter_tags():
            with_ = self.filter_packages_tags(lambda pt: pivot in pt[1])
            without = self.filter_packages_tags(lambda pt: pivot not in pt[1])
            for tag in with_.iter_tags():
                if tag == pivot: continue
                has = float(with_.card(tag)) / float(with_.package_count())
                hasnt = float(without.card(tag)) / float(
                yield pivot, tag, has - hasnt
def parse_tags(input):
    lre = re.compile(r"^(.+?)(?::?\s*|:\s+(.+?)\s*)$")
    for line in input:
        # Is there a way to remove the last character of a line that does not
        # make a copy of the entire line?
        m = lre.match(line)
        pkgs = set(m.group(1).split(', '))
        if m.group(2):
            tags = set(m.group(2).split(', '))
            tags = set()
        yield pkgs, tags

parseTags = function_deprecated_by(parse_tags)

def read_tag_database(input):
    "Read the tag database, returning a pkg->tags dictionary"
    db = {}
    for pkgs, tags in parse_tags(input):
        # Create the tag set using the native set
        for p in pkgs:
            db[p] = tags.copy()
    return db

readTagDatabase = function_deprecated_by(read_tag_database)

class Deb822(Deb822Dict):
    def __init__(self,
        """Create a new Deb822 instance.

        :param sequence: a string, or any any object that returns a line of
            input each time, normally a file().  Alternately, sequence can
            be a dict that contains the initial key-value pairs.

        :param fields: if given, it is interpreted as a list of fields that
            should be parsed (the rest will be discarded).

        :param _parsed: internal parameter.

        :param encoding: When parsing strings, interpret them in this encoding.
            (All values are given back as unicode objects, so an encoding is
            necessary in order to properly interpret the strings.)

        if hasattr(sequence, 'items'):
            _dict = sequence
            sequence = None
            _dict = None

        if sequence is not None:
                self._internal_parser(sequence, fields)
            except EOFError:

        self.gpg_info = None

    def iter_paragraphs(cls,
        """Generator that yields a Deb822 object for each paragraph in sequence.

        :param sequence: same as in __init__.

        :param fields: likewise.

        :param use_apt_pkg: if sequence is a file(), apt_pkg will be used 
            if available to parse the file, since it's much much faster.  Set
            this parameter to False to disable using apt_pkg.
        :param shared_storage: not used, here for historical reasons.  Deb822
            objects never use shared storage anymore.
        :param encoding: Interpret the paragraphs in this encoding.
            (All values are given back as unicode objects, so an encoding is
            necessary in order to properly interpret the strings.)

        if _have_apt_pkg and use_apt_pkg and isinstance(sequence, file):
            parser = apt_pkg.TagFile(sequence)
            for section in parser:
                paragraph = cls(fields=fields,
                if paragraph:
                    yield paragraph

            iterable = iter(sequence)
            x = cls(iterable, fields, encoding=encoding)
            while len(x) != 0:
                yield x
                x = cls(iterable, fields, encoding=encoding)

    iter_paragraphs = classmethod(iter_paragraphs)


    def _skip_useless_lines(sequence):
        """Yields only lines that do not begin with '#'.

        Also skips any blank lines at the beginning of the input.
        at_beginning = True
        for line in sequence:
            if line.startswith('#'):
            if at_beginning:
                if not line.rstrip('\r\n'):
                at_beginning = False
            yield line

    def _internal_parser(self, sequence, fields=None):
        # The key is non-whitespace, non-colon characters before any colon.
        key_part = r"^(?P<key>[^: \t\n\r\f\v]+)\s*:\s*"
        single = re.compile(key_part + r"(?P<data>\S.*?)\s*$")
        multi = re.compile(key_part + r"$")
        multidata = re.compile(r"^\s(?P<data>.+?)\s*$")

        wanted_field = lambda f: fields is None or f in fields

        if isinstance(sequence, basestring):
            sequence = sequence.splitlines()

        curkey = None
        content = ""

        for line in self.gpg_stripped_paragraph(
            m = single.match(line)
            if m:
                if curkey:
                    self[curkey] = content

                if not wanted_field(m.group('key')):
                    curkey = None

                curkey = m.group('key')
                content = m.group('data')

            m = multi.match(line)
            if m:
                if curkey:
                    self[curkey] = content

                if not wanted_field(m.group('key')):
                    curkey = None

                curkey = m.group('key')
                content = ""

            m = multidata.match(line)
            if m:
                content += '\n' + line  # XXX not m.group('data')?

        if curkey:
            self[curkey] = content

    def __str__(self):
        return self.dump()

    def __unicode__(self):
        return self.dump()

    # __repr__ is handled by Deb822Dict

    def get_as_string(self, key):
        """Return the self[key] as a string (or unicode)

        The default implementation just returns unicode(self[key]); however,
        this can be overridden in subclasses (e.g. _multivalued) that can take
        special values.
        return unicode(self[key])

    def dump(self, fd=None, encoding=None):
        """Dump the the contents in the original format

        If fd is None, return a unicode object.

        If fd is not None, attempt to encode the output to the encoding the
        object was initialized with, or the value of the encoding argument if
        it is not None.  This will raise UnicodeEncodeError if the encoding
        can't support all the characters in the Deb822Dict values.

        if fd is None:
            fd = StringIO.StringIO()
            return_string = True
            return_string = False

        if encoding is None:
            # Use the encoding we've been using to decode strings with if none
            # was explicitly specified
            encoding = self.encoding

        for key in self.iterkeys():
            value = self.get_as_string(key)
            if not value or value[0] == '\n':
                # Avoid trailing whitespace after "Field:" if it's on its own
                # line or the value is empty.  We don't have to worry about the
                # case where value == '\n', since we ensure that is not the
                # case in __setitem__.
                entry = '%s:%s\n' % (key, value)
                entry = '%s: %s\n' % (key, value)
            if not return_string:
        if return_string:
            return fd.getvalue()


    def is_single_line(self, s):
        if s.count("\n"):
            return False
            return True

    isSingleLine = function_deprecated_by(is_single_line)

    def is_multi_line(self, s):
        return not self.is_single_line(s)

    isMultiLine = function_deprecated_by(is_multi_line)

    def _merge_fields(self, s1, s2):
        if not s2:
            return s1
        if not s1:
            return s2

        if self.is_single_line(s1) and self.is_single_line(s2):
            ## some fields are delimited by a single space, others
            ## a comma followed by a space.  this heuristic assumes
            ## that there are multiple items in one of the string fields
            ## so that we can pick up on the delimiter being used
            delim = ' '
            if (s1 + s2).count(', '):
                delim = ', '

            L = (s1 + delim + s2).split(delim)

            prev = merged = L[0]

            for item in L[1:]:
                ## skip duplicate entries
                if item == prev:
                merged = merged + delim + item
                prev = item
            return merged

        if self.is_multi_line(s1) and self.is_multi_line(s2):
            for item in s2.splitlines(True):
                if item not in s1.splitlines(True):
                    s1 = s1 + "\n" + item
            return s1

        raise ValueError

    _mergeFields = function_deprecated_by(_merge_fields)

    def merge_fields(self, key, d1, d2=None):
        ## this method can work in two ways - abstract that away
        if d2 == None:
            x1 = self
            x2 = d1
            x1 = d1
            x2 = d2

        ## we only have to do work if both objects contain our key
        ## otherwise, we just take the one that does, or raise an
        ## exception if neither does
        if key in x1 and key in x2:
            merged = self._mergeFields(x1[key], x2[key])
        elif key in x1:
            merged = x1[key]
        elif key in x2:
            merged = x2[key]
            raise KeyError

        ## back to the two different ways - if this method was called
        ## upon an object, update that object in place.
        ## return nothing in this case, to make the author notice a
        ## problem if she assumes the object itself will not be modified
        if d2 == None:
            self[key] = merged
            return None

        return merged

    mergeFields = function_deprecated_by(merge_fields)

    def split_gpg_and_payload(sequence):
        """Return a (gpg_pre, payload, gpg_post) tuple

        Each element of the returned tuple is a list of lines (with trailing
        whitespace stripped).

        gpg_pre_lines = []
        lines = []
        gpg_post_lines = []
        state = 'SAFE'
        gpgre = re.compile(
            r'^-----(?P<action>BEGIN|END) PGP (?P<what>[^-]+)-----$')
        blank_line = re.compile('^$')
        first_line = True

        for line in sequence:
            line = line.strip('\r\n')

            # skip initial blank lines, if any
            if first_line:
                if blank_line.match(line):
                    first_line = False

            m = gpgre.match(line)

            if not m:
                if state == 'SAFE':
                    if not blank_line.match(line):
                        if not gpg_pre_lines:
                            # There's no gpg signature, so we should stop at
                            # this blank line
                elif state == 'SIGNED MESSAGE':
                    if blank_line.match(line):
                        state = 'SAFE'
                elif state == 'SIGNATURE':
                if m.group('action') == 'BEGIN':
                    state = m.group('what')
                elif m.group('action') == 'END':
                if not blank_line.match(line):
                    if not lines:

        if len(lines):
            return (gpg_pre_lines, lines, gpg_post_lines)
            raise EOFError('only blank lines found in input')

    split_gpg_and_payload = staticmethod(split_gpg_and_payload)

    def gpg_stripped_paragraph(cls, sequence):
        return cls.split_gpg_and_payload(sequence)[1]

    gpg_stripped_paragraph = classmethod(gpg_stripped_paragraph)

    def get_gpg_info(self, keyrings=None):
        """Return a GpgInfo object with GPG signature information

        This method will raise ValueError if the signature is not available
        (e.g. the original text cannot be found).

        :param keyrings: list of keyrings to use (see GpgInfo.from_sequence)

        # raw_text is saved (as a string) only for Changes and Dsc (see
        # _gpg_multivalued.__init__) which is small compared to Packages or
        # Sources which contain no signature
        if not hasattr(self, 'raw_text'):
            raise ValueError, "original text cannot be found"

        if self.gpg_info is None:
            self.gpg_info = GpgInfo.from_sequence(self.raw_text,

        return self.gpg_info

    def validate_input(self, key, value):
        """Raise ValueError if value is not a valid value for key

        Subclasses that do interesting things for different keys may wish to
        override this method.

        # The value cannot end in a newline (if it did, dumping the object
        # would result in multiple stanzas)
        if value.endswith('\n'):
            raise ValueError("value must not end in '\\n'")

        # Make sure there are no blank lines (actually, the first one is
        # allowed to be blank, but no others), and each subsequent line starts
        # with whitespace
        for line in value.splitlines()[1:]:
            if not line:
                raise ValueError("value must not have blank lines")
            if not line[0].isspace():
                raise ValueError("each line must start with whitespace")

    def __setitem__(self, key, value):
        self.validate_input(key, value)
        Deb822Dict.__setitem__(self, key, value)
from deprecation import function_deprecated_by

def parse_tags(input):
	lre = re.compile(r"^(.+?)(?::?\s*|:\s+(.+?)\s*)$")
	for line in input:
		# Is there a way to remove the last character of a line that does not
		# make a copy of the entire line?
		m = lre.match(line)
		pkgs = set(m.group(1).split(', '))
		if m.group(2):
			tags = set(m.group(2).split(', '))
			tags = set()
		yield pkgs, tags

parseTags = function_deprecated_by(parse_tags)

def read_tag_database(input):
	"Read the tag database, returning a pkg->tags dictionary"
	db = {}
	for pkgs, tags in parse_tags(input):
		# Create the tag set using the native set
		for p in pkgs:
			db[p] = tags.copy()
	return db;

readTagDatabase = function_deprecated_by(read_tag_database)

def read_tag_database_reversed(input):
	"Read the tag database, returning a tag->pkgs dictionary"
	db = {}
class PackageFile:
    """A Debian package file.

    Objects of this class can be used to read Debian's Source and
    Packages files."""

    re_field = re.compile(r'^([A-Za-z][A-Za-z0-9-]+):(?:\s*(.*?))?\s*$')
    re_continuation = re.compile(r'^\s+(?:\.|(\S.*?)\s*)$')

    def __init__(self, name, file_obj=None):
        """Creates a new package file object.

        name - the name of the file the data comes from
        file_obj - an alternate data source; the default is to open the
                  file with the indicated name.
        if file_obj is None:
            file_obj = file(name)
        self.name = name
        self.file = file_obj
        self.lineno = 0

    def __iter__(self):
        line = self.file.readline()
        self.lineno += 1
        pkg = []
        while line:
            if line.strip(' \t') == '\n':
                if len(pkg) == 0:
                    self.raise_syntax_error('expected package record')
                yield pkg
                pkg = []
                line = self.file.readline()
                self.lineno += 1

            match = self.re_field.match(line)
            if not match:
                self.raise_syntax_error("expected package field")
            (name, contents) = match.groups()
            contents = contents or ''

            while True:
                line = self.file.readline()
                self.lineno += 1
                match = self.re_continuation.match(line)
                if match:
                    (ncontents, ) = match.groups()
                    if ncontents is None:
                        ncontents = ""
                    contents = "%s\n%s" % (contents, ncontents)
            pkg.append((name, contents))
        if pkg:
            yield pkg

    def raise_syntax_error(self, msg, lineno=None):
        if lineno is None:
            lineno = self.lineno
        raise ParseError(self.name, lineno, msg)

    raiseSyntaxError = function_deprecated_by(raise_syntax_error)

class Release(PseudoEnum):

def list_releases():
    releases = {}
    rels = ("potato", "woody", "sarge", "etch", "lenny", "sid")
    for r in range(len(rels)):
        releases[rels[r]] = Release(rels[r], r)
    Release.releases = releases
    return releases

listReleases = function_deprecated_by(list_releases)

def intern_release(name, releases=list_releases()):
    if releases.has_key(name):
        return releases[name]
        return None

internRelease = function_deprecated_by(intern_release)

del listReleases
del list_releases