Пример #1
0
 def create(dirname):
   os.mkdir(os.path.join(dirname, 'tar'))
   os.mkdir(os.path.join(dirname, 'idx'))
   os.mkdir(os.path.join(dirname, 'label'))
   TarDB.create(os.path.join(dirname, 'tar'))
   return
Пример #2
0
class MessageDB:

    MAX_TEXT_SIZE = 100000
    
    def __init__(self, basedir):
        self.basedir = basedir
        self._tar = TarDB(os.path.join(basedir, 'tar'))
        self._text = TextDB(os.path.join(basedir, 'text'))
        return

    def create(self):
        os.makedirs(self.basedir)
        self._tar.create()
        self._text.create()
        return

    def open(self):
        self._tar.open()
        self._text.open()
        return

    def close(self):
        self._tar.close()
        self._text.close()
        return
    
    def flush(self):
        self._tar.flush()
        self._text.flush()
        return

    def add_file(self, data):
        recno = self._tar.next_recno()
        info = TarInfo('%08d' % recno)
        self._tar.add_record(info, bytes2gzip(data))
        msg = message_from_bytes(data)
        text = cutoff(msg2str(msg), self.MAX_TEXT_SIZE)
        self._text.add_text(recno, text)
        for tag in msg2tags(msg):
            self._text.add_tag(recno, tag)
        return recno

    def search_tag(self, tags):
        result = None
        for tag in tags:
            recs = set(self._text.search_tag(tag))
            if result is None:
                result = recs
            else:
                result.update_intersection(recs)
        for recno in sorted(result, reverse=True):
            yield self._text.get_text(recno)
        return
    
    def search_text(self, qs):
        result = None
        for q in qs:
            recs = set(self._text.search_text(q))
            if result is None:
                result = recs
            else:
                result.update_intersection(recs)
        for recno in sorted(result, reverse=True):
            yield self._text.get_text(recno)
        return