示例#1
0
文件: mkdict.py 项目: afcarl/nlcrypt
 def write(self, g2wpath, w2gpath):
     print >> sys.stderr, 'Sorting...'
     grp2words = {}
     for (w, (n, poss)) in self._words.iteritems():
         grp = '%s:%d' % ('+'.join(poss), n)
         if grp not in grp2words: grp2words[grp] = []
         grp2words[grp].append(w)
     word2grp = {}
     r = sorted(grp2words.iteritems(),
                key=lambda (k, v): len(v),
                reverse=True)
     for (grp, words) in r:
         words.sort()
         for (n, w) in enumerate(words):
             word2grp[w] = (grp, n)
         print >> sys.stderr, ' Group: %r (%d)' % (grp, len(words))
     print >> sys.stderr, 'Writing: %r' % g2wpath
     g2w = cdb.cdbmake(g2wpath, g2wpath + '.tmp')
     for (grp, words) in grp2words.iteritems():
         g2w.add(grp, ' '.join(words))
     g2w.finish()
     print >> sys.stderr, 'Writing: %r' % w2gpath
     w2g = cdb.cdbmake(w2gpath, w2gpath + '.tmp')
     for (word, (grp, n)) in word2grp.iteritems():
         w2g.add(word, '%s,%d' % (grp, n))
     for w in self.skip:
         w2g.add(w, ',0')
     w2g.finish()
     return
示例#2
0
文件: mkdict.py 项目: euske/nlcrypt
 def write(self, g2wpath, w2gpath):
   print >>sys.stderr, 'Sorting...'
   grp2words = {}
   for (w, (n,poss)) in self._words.iteritems():
     grp = '%s:%d' % ('+'.join(poss), n)
     if grp not in grp2words: grp2words[grp] = []
     grp2words[grp].append(w)
   word2grp = {}
   r = sorted(grp2words.iteritems(), key=lambda (k,v):len(v), reverse=True)
   for (grp, words) in r:
     words.sort()
     for (n,w) in enumerate(words):
       word2grp[w] = (grp, n)
     print >>sys.stderr, ' Group: %r (%d)' % (grp, len(words))
   print >>sys.stderr, 'Writing: %r' % g2wpath
   g2w = cdb.cdbmake(g2wpath, g2wpath+'.tmp')
   for (grp,words) in grp2words.iteritems():
     g2w.add(grp, ' '.join(words))
   g2w.finish()
   print >>sys.stderr, 'Writing: %r' % w2gpath
   w2g = cdb.cdbmake(w2gpath, w2gpath+'.tmp')
   for (word,(grp,n)) in word2grp.iteritems():
     w2g.add(word, '%s,%d' % (grp,n))
   for w in self.skip:
     w2g.add(w, ',0')
   w2g.finish()
   return
示例#3
0
文件: pycdb.py 项目: sleepy-cat/cdb
def cdbmake_true(f, a):
    import cdb
    c = cdb.cdbmake(f, f + ".tmp")
    for (k, v) in a.iteritems():
        c.add(k, v)
    c.finish()
    return
示例#4
0
    def realSync(self):
        if self.modified:
            self.modified = False
            newDB = cdb.cdbmake(self.cdbName, self.cdbName + ".tmp")
           
            for key, value in iter(self.cdbObject.each, None):
                if key in self.delList:
                    if key in self.addList:
                        newDB.add(key, cPickle.dumps(self.addList[key], cPickle.HIGHEST_PROTOCOL))
                        del self.addList[key]
                elif key in self.addList:                   
                    newDB.add(key, cPickle.dumps(self.addList[key], cPickle.HIGHEST_PROTOCOL))
                    del self.addList[key]
                else:
                    newDB.add(key, value)
               

            self.closeCDB()

            for key, value in self.addList.iteritems():
                newDB.add(key, cPickle.dumps(value, cPickle.HIGHEST_PROTOCOL))
           
            newDB.finish()
            del newDB
           
            self.addList = {}
            self.delList = []

            self.openCDB()
示例#5
0
文件: pycdb.py 项目: sleepy-cat/cdb
def cdbmake_true(f, a):
  import cdb
  c = cdb.cdbmake(f, f+".tmp")
  for (k,v) in a.iteritems():
    c.add(k,v)
  c.finish()
  return
示例#6
0
    def realSync(self):
        if self.modified:
            self.modified = False
            newDB = cdb.cdbmake(self.cdbName, self.cdbName + ".tmp")

            for key, value in iter(self.cdbObject.each, None):
                if key in self.delList:
                    if key in self.addList:
                        newDB.add(
                            key,
                            cPickle.dumps(self.addList[key],
                                          cPickle.HIGHEST_PROTOCOL))
                        del self.addList[key]
                elif key in self.addList:
                    newDB.add(
                        key,
                        cPickle.dumps(self.addList[key],
                                      cPickle.HIGHEST_PROTOCOL))
                    del self.addList[key]
                else:
                    newDB.add(key, value)

            self.closeCDB()

            for key, value in self.addList.iteritems():
                newDB.add(key, cPickle.dumps(value, cPickle.HIGHEST_PROTOCOL))

            newDB.finish()
            del newDB

            self.addList = {}
            self.delList = []

            self.openCDB()
示例#7
0
    def sync(self, force=False):
        if not self.db:
            return

        tmp = cdb.cdbmake(self.filename, self.tempfile)

        # Copy original
        r = self.cdb.each()
        while r:
            k, v = r
            dk = decode(k)
            if k not in self.db:
                tmp.add(*r)
            r = self.cdb.each()

        # Add new stuff
        for k, l in self.db.iteritems():
            for v in l:
                try:
                    tmp.add(k, v)
                except:
                    print(k, v)
                    raise

        tmp.finish()
        self.cdb = cdb.init(self.filename)
        self.db = {}
 def writeCdbPages(self, filename, pageFromId):
     maker = cdb.cdbmake(filename, filename + ".tmp")
     s = struct.Struct("<l")
     for i in pageFromId:
         name = pageFromId[i]['name']
         linkIds = pageFromId[i]['links']
         projects = pageFromId[i]['projects']
         buf = create_string_buffer(8 + (4 + len(linkIds) * 4) + (len(projects) * 4 * 2) + len(name))
         # pack in the lengths of the links and projects sets
         offset = 0
         struct.pack_into("<l", buf, offset, len(linkIds))
         offset += 4
         struct.pack_into("<l", buf, offset, len(projects))
         offset += 4
         # pack in the page class and importance
         struct.pack_into("<l", buf, offset, (pageFromId[i]['class'] << 8) | pageFromId[i]['importance'])
         offset += 4
         # pack in the links
         for j in linkIds:
             struct.pack_into("<l", buf, offset, j)
             offset += 4
         # pack in the projects
         for j in projects:
             struct.pack_into("<l", buf, offset, j)
             offset += 4
             struct.pack_into("<l", buf, offset, (projects[j]['class'] << 8) | projects[j]['importance'])
             offset += 4
         # pack in the name
         buf[offset:] = name
         maker.add(s.pack(i), buf)
     print "Added %d records to CDB %s (fd %d)" % (maker.numentries, maker.fn, maker.fd)
     maker.finish()
     del(maker)
示例#9
0
文件: cdb_dict.py 项目: amuralle/pygr
 def __setitem__(self, key, value):            
     
     try:
         self.db.add( key, value)  
     except:
         # cdb has two modes and if we're in the wrong mode, switch
         self.db = cdb.cdbmake(self.fn, self.fn + ".tmp")
         self.db.add( key, value )  
示例#10
0
 def _create_new_cdb(self, arg):
     """
     Create new name-mapping if it doesn't exist yet,
     call this under the name-mapping.lock.
     """
     if not os.path.exists(self._name_db):
         maker = cdb.cdbmake(self._name_db, self._name_db + '.tmp')
         maker.finish()
 def writeCdbNameFromId(self, filename, dictionary):
     maker = cdb.cdbmake(filename, filename + ".tmp")
     s = struct.Struct("<l")
     for i in dictionary:
         maker.add(s.pack(i), dictionary[i]['name'])
     print "Added %d records to CDB %s (fd %d)" % (maker.numentries, maker.fn, maker.fd)
     maker.finish()
     del(maker)
示例#12
0
文件: tests.py 项目: abael/python-cdb
    def test_reuse_cdb_make(self):
        cm = cdb.cdbmake('data', 'tmp')
        cm.add('foo', 'bar')
        cm.finish()

        self.assertRaises(cdb.error, cm.add, 'spam', 'eggs')
        self.assertRaises(cdb.error, cm.addmany, [('spam', 'eggs')])
        self.assertRaises(cdb.error, cm.finish)
示例#13
0
    def __setitem__(self, key, value):

        try:
            self.db.add(key, value)
        except:
            # cdb has two modes and if we're in the wrong mode, switch
            self.db = cdb.cdbmake(self.fn, self.fn + ".tmp")
            self.db.add(key, value)
示例#14
0
    def test_reuse_cdb_make(self):
        cm = cdb.cdbmake('data', 'tmp')
        cm.add('foo', 'bar')
        cm.finish()

        self.assertRaises(cdb.error, cm.add, 'spam', 'eggs')
        self.assertRaises(cdb.error, cm.addmany, [('spam', 'eggs')])
        self.assertRaises(cdb.error, cm.finish)
示例#15
0
 def clear(self):
     """Remove all entries from the dictionary."""
     os.remove(self.filename)
     open(self.filename, "w").close()
     maker = cdb.cdbmake(self.filename, self.filename + ".tmp")
     maker.finish()
     del(maker)
     self.cdb = cdb.init(self.filename)
 def writeCdbIdFromName(self, filename, dictionary):
     maker = cdb.cdbmake(filename, filename + ".tmp")
     s = struct.Struct("<l")
     for i in dictionary:
         # add key, value
         #print "added:", i, dictionary[i]['id']
         maker.add(i, s.pack(dictionary[i]['id']))
     print "Added %d records to CDB %s (fd %d)" % (maker.numentries, maker.fn, maker.fd)
     maker.finish()
     del(maker)
示例#17
0
 def generate_cdb_file(self, dstdir, filename, key, hosts=None, groups=None, users=None, host=None, guard=True):
     if guard:
         fn = os.path.join(dstdir, filename).encode('ascii', 'ignore')
         maker = cdb.cdbmake(fn, fn + '.tmp')
         for user in users:
             if user.is_not_retired():                               # FIXME really?
                 val = getattr(user, key)
                 if val:
                     maker.add(user.uid, val)
         maker.finish()
示例#18
0
文件: generate.py 项目: zobelhelas/ud
 def generate_cdb_file(self, dstdir, filename, key, hosts=None, groups=None, users=None, host=None, guard=True):
     if guard:
         fn = os.path.join(dstdir, filename).encode('ascii', 'ignore')
         maker = cdb.cdbmake(fn, fn + '.tmp')
         for user in users: # TODO latest version of python-cdb can do bulk add
             if user.is_not_retired():
                 val = getattr(user, key)
                 if val:
                     maker.add(user.uid, val)
         maker.finish()
def cdb_write_proc (file_cdb,dict_aa):
	maker = cdb.cdbmake(file_cdb, file_cdb + ".tmp")
#
	for key in dict_aa.keys():
		unit = dict_aa[key]
		json_str = json.dumps (unit)
		maker.add(key,json_str)
	maker.finish()
	del(maker)
#
	os.chmod (file_cdb,0777)
示例#20
0
    def setUp(self):
        self.temp_dir = mkdtemp()
        self.cdb_path = join(self.temp_dir, 'database.cdb')
        self.tmp_path = join(self.temp_dir, 'database.tmp')

        self.db = cdb.cdbmake(self.cdb_path.encode('utf-8'),
                              self.tmp_path.encode('utf-8'))
        self.db.add('a', '1')
        self.db.add('a', '2')
        self.db.addmany([('b', '1'), ('c', '1')])
        self.db.add('a', b'\x80')
示例#21
0
 def create_db(f, db_fname):
    '''Write out db of headers'''
    
    fh = open(f, 'r')
    fh_headers = (x.strip()[1:-2] for i, x in enumerate(fh) if not (i % 4))
    
    db = cdb.cdbmake(db_fname, db_fname + '.tmp')
    for h in fh_headers:
       db.add(h, 'T')
    db.finish()
    del(db)
示例#22
0
 def update(self, values):
     """Add values to the dictionary."""
     maker = cdb.cdbmake(self.filename, self.filename + ".tmp")
     for i in values:
         # add key,value
         maker.add(self._pack_key(i), self._pack_value(values[i]))
     print "Added %d records to CDB %s (fd %d)" \
         % (maker.numentries, maker.fn, maker.fd)
     maker.finish()
     del(maker)
     self.cdb = cdb.init(self.filename)
示例#23
0
def update_database():
    fn = "mac_address_db"
    db = cdb.cdbmake("../lib/" + fn, "../lib/" + fn + ".tmp")

    with open("../lib/mac.txt", "r") as file:
        for line in file:
            line = line.split()
            mac = line[0]
            vendor = line[1]
            db.add(mac, vendor)
        db.finish()
示例#24
0
文件: cmap.py 项目: frid/PythonPool
def dumpcdb(cmap, cdbfile, verbose=1):
    m = cdb.cdbmake(cdbfile, cdbfile + '.tmp')
    if verbose:
        print >> stderr, 'Writing: %r...' % cdbfile
    for (k, v) in cmap.getall_attrs():
        m.add('/' + k, repr(v))
    for (code, cid) in cmap.getall_code2cid():
        m.add('c' + code, pack('>L', cid))
    for (cid, code) in cmap.getall_cid2code():
        m.add('i' + pack('>L', cid), code)
    m.finish()
    return
示例#25
0
def dodb(ds, dst):
    db = cdb.cdbmake(dst, dst + "tmp")
    for d in ds:
        k = d["id"]
        del d["id"]
        j = json.dumps(d)
        d["id"] = k
        e = RC6.encrypt(bytearray(j, "utf-8"), bytearray(seckey, "ascii"))
        v = array.array("B")
        v.fromlist(e)
        db.add(k, v)
    db.finish()
示例#26
0
def make_indices(path):
    f = bz2.BZ2File(path.replace('.xml.bz2', '-index.txt.bz2'))
    id_path = '%s.ids' % path
    title_path = '%s.titles' % path
    offset_path = '%s.offsets' % path
    id_db = cdb.cdbmake(id_path, id_path + '.tmp')
    title_db = cdb.cdbmake(title_path, title_path + '.tmp')
    offset_db = cdb.cdbmake(offset_path, offset_path + '.tmp')

    def build():
        for line in f:
            (bytes, id, title) = line[:-1].split(':', 2)
            id_db.add(id, title)
            title_db.add(title, id)
            offset_db.add(id, bytes)
            yield

    progress(build())
    id_db.finish()
    title_db.finish()
    offset_db.finish()
示例#27
0
文件: cmap.py 项目: frid/PythonPool
def dumpcdb(cmap, cdbfile, verbose=1):
  m = cdb.cdbmake(cdbfile, cdbfile+'.tmp')
  if verbose:
    print >>stderr, 'Writing: %r...' % cdbfile
  for (k,v) in cmap.getall_attrs():
    m.add('/'+k, repr(v))
  for (code,cid) in cmap.getall_code2cid():
    m.add('c'+code, pack('>L',cid))
  for (cid,code) in cmap.getall_cid2code():
    m.add('i'+pack('>L',cid), code)
  m.finish()
  return
示例#28
0
    def __init__(self, filename):
        self.filename = filename
        self.tempfile = "%s.tmp" % filename

        self.db = {}
        try:
            self.cdb = cdb.init(self.filename)
        except cdb.error:
            d = cdb.cdbmake(self.filename, self.tempfile)
            d.finish()
            del d
            self.cdb = cdb.init(self.filename)
示例#29
0
        def create_db(f, db_fname):
            '''Write out db of headers'''

            fh = open(f, 'r')
            fh_headers = (x.strip()[1:-2] for i, x in enumerate(fh)
                          if not (i % 4))

            db = cdb.cdbmake(db_fname, db_fname + '.tmp')
            for h in fh_headers:
                db.add(h, 'T')
            db.finish()
            del (db)
    def open( self ):
        self.word_db = tc.BDB()
        self.word_db.open( self._get_db_filepath( 'word' ), tc.BDBOREADER )

        try:
            os.system( 'rm -rf ' + self.cdb_dir )
            os.system( 'mkdir ' + self.cdb_dir )
        except OSError:
            pass
        
        word_cdb_name = self._get_cdb_filepath( 'word' )
        self.word_cdb = cdb.cdbmake( word_cdb_name, word_cdb_name + ".tmp" )
        self.index_file = open( word_cdb_name.replace( 'word.cdb', 'word.index'), 'w' )
示例#31
0
def generate_cdb_file(data, filename):
    """Generate a CDB file"""
    cache_dir = config.get('cache_dir', '/var/lib/baruwa/data')
    dest = os.path.join(cache_dir, 'db', filename)
    maker = cdbmake(dest, dest + ".tmp")
    for line in data:
        maker.add(line.key, line.value)
    maker.finish()
    del(maker)
    os.chmod(dest, 0640)
    uid = pwd.getpwnam("baruwa").pw_uid
    gid = grp.getgrnam("exim").gr_gid
    os.chown(dest, uid, gid)
示例#32
0
 def make_cdb_db(self):
     lib = []
     for bl in self.blacklist_files:
         bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0]))
         bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0]))
         if bl[0] in self.categories:
             if not os.path.isfile(bl_cdb_file):
                 cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
                 f = open(bl[1], "r")
                 for line in f:
                     cdb_file.add(line.strip("\n"), "True")
                 cdb_file.finish()
             lib.append(bl_cdb_file)
     self.cache = lib
 def make_cdb_db(self):
     lib = []
     for bl in self.blacklist_files:
         bl_cdb_file = ("%s/%s.cdb" % (self.base_dir, bl[0]))
         bl_cdb_file_tmp = ("%s/%s.tmp" % (self.base_dir, bl[0]))
         if bl[0] in self.categories:
             if not os.path.isfile(bl_cdb_file):
                 cdb_file = cdb.cdbmake(bl_cdb_file, bl_cdb_file_tmp)
                 f = open(bl[1], "r")
                 for line in f:
                     cdb_file.add(line.strip("\n"), "True")
                 cdb_file.finish()
             lib.append(bl_cdb_file)
     self.cache = lib
示例#34
0
文件: Util.py 项目: pjz/TMDAng
def build_cdb(filename):
    """Build a cdb file from a text file."""
    import cdb
    try:
        cdbname = filename + '.cdb'
        tempfile.tempdir = os.path.dirname(filename)
        tmpname = os.path.split(tempfile.mktemp())[1]
        cdb = cdb.cdbmake(cdbname, cdbname + '.' + tmpname)
        for line in file_to_list(filename):
            key, value = (line.split() + [''])[:1]
            cdb.add(key.lower(), value)
        cdb.finish()
    except:
        return False
    return True
 def writeCdbPageProjects(self, filename, pageFromId):
     maker = cdb.cdbmake(filename, filename + ".tmp")
     s = struct.Struct("<l")
     for i in pageFromId:
         projects = pageFromId[i]['projects']
         buf = create_string_buffer(len(projects) * 4 * 2)
         offset = 0
         for j in projects:
             struct.pack_into("<l", buf, offset, j)
             offset += 4
             struct.pack_into("<l", buf, offset, (projects[j]['class'] << 8) | projects[j]['importance'])
             offset += 4
         maker.add(s.pack(i), buf)
     print "Added %d records to CDB %s (fd %d)" % (maker.numentries, maker.fn, maker.fd)
     maker.finish()
     del(maker)
 def writeCdbPageLinks(self, filename, pageFromId):
     maker = cdb.cdbmake(filename, filename + ".tmp")
     s = struct.Struct("<l")
     for i in pageFromId:
         linkIds = pageFromId[i]['links']
         buf = create_string_buffer(4 + len(linkIds) * 4)
         offset = 0
         struct.pack_into("<l", buf, offset, (pageFromId[i]['class'] << 8) | pageFromId[i]['importance'])
         offset += 4
         for j in linkIds:
             struct.pack_into("<l", buf, offset, j)
             offset += 4
         maker.add(s.pack(i), buf)
     print "Added %d records to CDB %s (fd %d)" % (maker.numentries, maker.fn, maker.fd)
     maker.finish()
     del(maker)
示例#37
0
def phase_1(pkt):
    if pkt.haslayer(Dot11):
        if pkt.type == 0 and pkt.subtype in (0, 2, 4):
            if pkt.addr2 not in clients:
                vendor_id = pkt.addr2[0:8]
                upper_case = str(vendor_id).upper()

                db_name = "mac_address_db"
                db = cdb.cdbmake("../lib/" + db_name, "../lib/"+ db_name + ".tmp")
                del db
                db = cdb.init("../lib/" + db_name)
                match = db.get(upper_case)

                print("{:<6s}{:>13}{:>12s}".format(str(len(clients) + 1), pkt.addr2, match))
                clients.append(pkt.addr2)
                vendors.append(match)
示例#38
0
    def openCDB(self):
        prevmask = os.umask(0)

        if not os.path.exists(self.path):
            os.makedirs(self.path, 02775)
            os.chown(self.path, self.uid, self.gid)

        if not os.path.isfile(self.cdbName):
            maker = cdb.cdbmake(self.cdbName, self.cdbName + ".tmp")
            maker.finish()
            del maker
            os.chown(self.cdbName, self.uid, self.gid)
            os.chmod(self.cdbName, 0664)

        os.umask(prevmask)

        self.cdbObject = cdb.init(self.cdbName)
示例#39
0
    def openCDB(self):
        prevmask = os.umask(0)
       
        if not os.path.exists(self.path):
            os.makedirs(self.path, 02775)
            os.chown(self.path, self.uid, self.gid)
           
        if not os.path.isfile(self.cdbName):
            maker = cdb.cdbmake(self.cdbName, self.cdbName + ".tmp")
            maker.finish()
            del maker
            os.chown(self.cdbName, self.uid, self.gid)
            os.chmod(self.cdbName, 0664)

        os.umask(prevmask)
           
        self.cdbObject = cdb.init(self.cdbName)
示例#40
0
def dumpcdb(cmap, cdbfile, verbose=1):
  from struct import pack, unpack
  try:
    import cdb
  except ImportError:
    import pycdb as cdb
  m = cdb.cdbmake(cdbfile, cdbfile+'.tmp')
  if verbose:
    print >>stderr, 'Writing: %r...' % cdbfile
  for (k,v) in cmap.getall_attrs():
    m.add('/'+k, repr(v))
  for (code,cid) in cmap.getall_code2cid():
    m.add('c'+code, pack('>L',cid))
  for (cid,code) in cmap.getall_cid2code():
    m.add('i'+pack('>L',cid), code)
  m.finish()
  return
示例#41
0
    def _destroy_item_locked(self, item):
        c = cdb.init(self._name_db)
        maker = cdb.cdbmake(self._name_db + '.ndb', self._name_db + '.tmp')
        r = c.each()
        while r:
            i, v = r
            if v != item._fs_item_id:
                maker.add(i, v)
            r = c.each()
        maker.finish()

        filesys.rename(self._name_db + '.ndb', self._name_db)
        path = os.path.join(self._path, item._fs_item_id)
        try:
            shutil.rmtree(path)
        except OSError, err:
            raise CouldNotDestroyError("Could not destroy item '%r' [errno: %d]" % (
                item.name, err.errno))
示例#42
0
 def generate_cdb_file(self,
                       dstdir,
                       filename,
                       key,
                       hosts=None,
                       groups=None,
                       users=None,
                       host=None,
                       guard=True):
     if guard:
         fn = os.path.join(dstdir, filename).encode('ascii', 'ignore')
         maker = cdb.cdbmake(fn, fn + '.tmp')
         for user in users:  # TODO latest version of python-cdb can do bulk add
             if user.is_not_retired():
                 val = getattr(user, key)
                 if val:
                     maker.add(user.uid, val)
         maker.finish()
示例#43
0
 def write(self, outfile=""):
   if outfile.endswith(".cdb"):
     self.msg("Writing to CDB: %s..." % (outfile))
     out = cdb.cdbmake(outfile, outfile+".tmp")
     for (w, poss) in self.dict.iteritems():
       s = map(lambda pf:"%s:%s" % pf, poss.iteritems())
       out.add(w, ",".join(s))
     out.finish()
   else:
     self.msg("Writing to plaintext: %s..." % (outfile))
     if outfile:
       fp = file(outfile, "w")
     else:
       fp = sys.stdout
     for (w, poss) in self.dict.iteritems():
       s = map(lambda pf:"%s:%s" % pf, poss.iteritems())
       fp.write(w+"\t"+",".join(s)+"\n")
     fp.close()
   return
示例#44
0
    def add(self, key, value):
        if self.record_counter % self.fetch == 0:
            proc = subprocess.Popen(['wc', '-c', self.tmpfile],
                                    stdout=subprocess.PIPE)
            size = proc.stdout.read().strip().split(' ')[0]
            if int(size) > self.limit_file_size:
                self.cdb.finish()
                del self.cdb
                self.num_of_cdbs += 1

                dbnamei = "{}.{}".format(self.dbname, self.num_of_cdbs)
                print "processing {}".format(dbnamei)
                dbnamei_tmp = dbnamei + ".tmp"
                self.tmpfile = dbnamei_tmp
                self.cdb = cdb.cdbmake(dbnamei, dbnamei_tmp)
                self.record_counter = 0
                # save head keys of each splitted cdbs
                filebase = os.path.basename(dbnamei)
                self.keymap.write(u"{} {}\n".format(key, filebase))
        self.record_counter += 1
        self.cdb.add(key.encode(self.encoding), value)
示例#45
0
def build_cdb(filename):
    """Build a cdb file from a text file."""
    import cdb
    try:
        cdbname = filename + '.cdb'
        tempfile.tempdir = os.path.dirname(filename)
        tmpname = os.path.split(tempfile.mktemp())[1]
        cdb = cdb.cdbmake(cdbname, cdbname + '.' + tmpname)
        for line in file_to_list(filename):
            linef = line.split()
            key = linef[0].lower()
            try:
                value = linef[1]
            except IndexError:
                value = ''
            cdb.add(key, value)
        cdb.finish()
    except:
        return 0
    else:
        return 1
示例#46
0
 def create_db(f, db_fname):
    '''Write out db of headers'''
    
    if f.endswith('.gz'):
       fh = gzip.open(f, 'rb')
    else:
       fh = open(f, 'r')
    
    if self.fqtype[0] == 'Illumina1.4':
       fh_headers = (x.strip()[1:-2] for i, x in enumerate(fh) if not (i % 4))
    elif self.fqtype[0] == 'Illumina1.8':
       fh_headers = (x.split(' ')[0][1:] for i, x in enumerate(fh) if not (i % 4))
    elif self.fqtype[0] == 'IlluminaSRA':
       fh_headers = (x.split(' ')[1][:-3] for i, x in enumerate(fh) if not (i % 4))
    else:
       sys.stderr.write('Header encoding not determined: %s\n' % self.fqtype[0])
    
    db = cdb.cdbmake(db_fname, db_fname + '.tmp')
    for h in fh_headers:
       db.add(h, 'T')
    db.finish()
    del(db)
示例#47
0
    def __init__(self, dbname, keyMapFile, limit_file_size=LFS_DEFAULT,
                 fetch=1000000, encoding='utf-8'):
        # the options.
        self.dbname = dbname
        # used by CDB_Reader to decide which cdb includes the query key
        self.keyMapFile = keyMapFile
        self.limit_file_size = limit_file_size
        # determines how often to check if current cdb size exceeds the limit
        self.fetch = fetch
        self.record_counter = 0
        self.num_of_cdbs = 0
        self.encoding = encoding

        dbname = "{}.{}".format(self.dbname, self.num_of_cdbs)
        print "processing {}".format(dbname)
        dbname_tmp = dbname + ".tmp"
        self.tmpfile = dbname_tmp
        self.cdb = cdb.cdbmake(dbname, dbname_tmp)

        dbdir = os.path.dirname(self.dbname)
        keyMapPath = "{}/{}".format(dbdir, keyMapFile)
        self.keymap = codecs.open(keyMapPath, 'w', self.encoding)
示例#48
0
    def _rename_item_locked(self, arg):
        item, newname = arg
        nn = newname.encode('utf-8')
        npath = os.path.join(self._path, item._fs_item_id, 'name')

        c = cdb.init(self._name_db)
        maker = cdb.cdbmake(self._name_db + '.ndb', self._name_db + '.tmp')
        r = c.each()
        while r:
            i, v = r
            if i == nn:
                raise ItemAlreadyExistsError("Target item '%r' already exists!" % newname)
            elif v == item._fs_item_id:
                maker.add(nn, v)
            else:
                maker.add(i, v)
            r = c.each()
        maker.finish()

        filesys.rename(self._name_db + '.ndb', self._name_db)
        nf = open(npath, mode='wb')
        nf.write(nn)
        nf.close()
示例#49
0
def pack_tree(cdb_file, base_path):
    exclude_list = ['.svn',]
    version_map = {}
    cdb_maker = cdb.cdbmake(cdb_file, cdb_file + '.tmp')
    
    base_path = os.path.abspath(base_path)
    for (path, dir_list, file_list) in os.walk(base_path):
        for dirname in exclude_list:
            if dirname in dir_list:
                dir_list.remove(dirname)

        for filename in file_list:
            relative_dir = path[len(base_path):]
            if not relative_dir:
                relative_dir = '/'
                # print 'no relative_dir', path, filename
            absolute_path = os.path.join(path, filename)
            relative_path = os.path.join(relative_dir, filename)
            f = open(absolute_path)
            data = f.read()
            f.close()
            cdb_maker.add(relative_path, data)

    cdb_maker.finish()
示例#50
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--val-type', default='int')
    parser.add_argument('in_file')
    parser.add_argument('out_file')

    args = parser.parse_args()

    str_to_val, val_to_str = {
        'int': (int, struct.Struct('<Q').pack),
        'float': (float, struct.Struct('<f').pack),
        'prych_hex': (hexstr_to_list, compress_hex_list)
    }[args.val_type]

    if args.out_file.endswith('.mcdb'):
        db = mcdb.make(args.out_file)
    else:
        db = cdb.cdbmake(args.out_file, args.out_file + '.tmp')
    with open(args.in_file, 'r') as f:
        f.seek(0, os.SEEK_END)
        pb = progressbar.ProgressBar(maxval=f.tell())
        pb.start()
        f.seek(0)
        for l in f:
            k, v = l.strip().split(None, 1)
            v = val_to_str(str_to_val(v))
            db.add(k, v)
            pb.update(f.tell())
        pb.finish()
    db.finish()
示例#51
0
    def write_pairs(self, f1, f2):
        '''Parse through two paired files and only write if both pairs are present'''
        def intersect(a, b):
            '''Intesection between lists'''
            return list(set(a) & set(b))

        def rm_files(patterns):
            '''Remove files using glob given as list of patterns'''

            import glob
            import os

            for p in patterns:
                files = glob.glob(p)
                if len(files) == 0:
                    pass
                else:
                    map(os.remove, files)

        def write_out(db_common, f, o):
            '''Write out reads'''

            if self.gz:
                fh = open(f, 'r')
                out = gzip.open(o + '.gz', 'wb')
            else:
                fh = open(f, 'r')
                out = open(o, 'w')

            written_count = 0
            total_count = 0
            for (title, sequence, quality) in FastqGeneralIterator(fh):
                total_count += 1
                if db_common.has_key(title[:-2]):
                    out.write('@%s\n%s\n+\n%s\n' % (title, sequence, quality))
                    written_count += 1
            sys.stderr.write('%s: Total %i, Written %i (%.1f%%)\n' %
                             (f, total_count, written_count,
                              written_count / total_count * 100))
            fh.close()
            out.close()

        def create_db(f, db_fname):
            '''Write out db of headers'''

            fh = open(f, 'r')
            fh_headers = (x.strip()[1:-2] for i, x in enumerate(fh)
                          if not (i % 4))

            db = cdb.cdbmake(db_fname, db_fname + '.tmp')
            for h in fh_headers:
                db.add(h, 'T')
            db.finish()
            del (db)

        ## get headers from both trimmed files ##
        # strip the /2 or /1 and grab only the headers
        # write in dbm to minimze memory usage

        # create db's (parallel)
        rand = ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for x in range(36))
        db1_fname = 'db1_%s' % rand
        db2_fname = 'db2_%s' % rand

        jobs = []
        p = multiprocessing.Process(target=create_db, args=(
            f1,
            db1_fname,
        ))
        p.start()
        jobs.append(p)

        p = multiprocessing.Process(target=create_db, args=(
            f2,
            db2_fname,
        ))
        p.start()
        jobs.append(p)

        # wait for jobs to finish
        for job in jobs:
            job.join()

        ## get headers that are in both trimmed files ##
        db1 = cdb.init(db1_fname)
        db2 = cdb.init(db2_fname)
        common = intersect(db1.keys(), db2.keys())

        dbcommon_fname = 'dbcommon_%s' % rand
        db_common = cdb.cdbmake(dbcommon_fname, dbcommon_fname + '.tmp')
        for h in common:
            db_common.add(h, 'T')
        db_common.finish()
        del (db_common)

        ## get headers that are in only one trimmed file ##
        symdiff = set(db1.keys()).symmetric_difference(set(db2.keys()))

        dbdiff_fname = 'dbdiff_%s' % rand
        db_diff = cdb.cdbmake(dbdiff_fname, dbdiff_fname + '.tmp')
        for h in symdiff:
            db_diff.add(h, 'T')
        db_diff.finish()
        del (db_diff)

        ## open common db ##
        db_common = cdb.init(dbcommon_fname)
        jobs = []
        p = multiprocessing.Process(target=write_out,
                                    args=(db_common, f1, self.o[0]))
        p.start()
        jobs.append(p)

        p = multiprocessing.Process(target=write_out,
                                    args=(db_common, f2, self.o[1]))
        p.start()
        jobs.append(p)

        ## open single db ##
        self.single = [self.o[0] + '.single', self.o[1] + '.single']

        db_diff = cdb.init(dbdiff_fname)
        p = multiprocessing.Process(target=write_out,
                                    args=(db_diff, f1, self.single[0]))
        p.start()
        jobs.append(p)

        p = multiprocessing.Process(target=write_out,
                                    args=(db_diff, f2, self.single[1]))
        p.start()
        jobs.append(p)

        # wait for jobs to finish
        for job in jobs:
            job.join()

        rm_files([db1_fname, db2_fname, dbcommon_fname, dbdiff_fname, f1, f2])
示例#52
0
文件: pbindex.py 项目: yll1325/lwpb
def main():

    reader_format = 'pb'
    delim = '\t'
    fields = []
    key = None
    typename = ""
    pb2file = None
    pb2codec = None
    indextype = None
    indexer = None
    outfile = None
    tempfile = None
    fin = sys.stdin
    infile = '-'
    verbose = 0

    opts, args = getopt.getopt(sys.argv[1:], 'R:F:d:p:k:i:o:t:m:v')

    for o, a in opts:
        if o == '-R':
            reader_format = a
        elif o == '-F':
            fields = a.split(',')
        elif o == '-d':
            delim = a
        elif o == '-p':
            pb2file = a
        elif o == '-k':
            key = a
        elif o == '-m':
            typename = a
        elif o == '-o':
            outfile = a
        elif o == '-t':
            tempfile = a
        elif o == '-i':
            indextype = a
        elif o == '-v':
            verbose += 1

    if len(args):
        infile = shift(args)
        fin = file(infile)

    if key == None:
        raise Exception("missing key parameter, specify with -k")

    # create the indexer object

    if indextype == 'cdb':

        import cdb
        if not outfile: outfile = "%s-%s-%s.idx" % (infile, key, indextype)
        if not tempfile: tempfile = "%s.tmp" % outfile
        indexer = cdb.cdbmake(outfile, tempfile)

    elif indextype == None:

        raise Exception("missing index type parameter, specify with -i")

    # create the stream reader

    if reader_format == 'pb':
        import lwpb.stream
        import lwpb.codec
        pb2codec = lwpb.codec.MessageCodec(pb2file=pb2file, typename=typename)
        reader = lwpb.stream.StreamReader(fin, codec=pb2codec)
    elif reader_format == 'txt':
        import percent.stream
        import percent.codec
        txtcodec = percent.codec.PercentCodec(fields, delim)
        reader = percent.stream.PercentCodecReader(fin, txtcodec)
    else:
        raise Exception("bad reader format")

    # index all the records

    for record in reader:
        indexkey = str(record[key])
        indexval = str(reader.current_offset)
        if verbose: print >> sys.stderr, indexkey
        indexer.add(indexkey, indexval)

    indexer.finish()

    return 0
示例#53
0
 def __init__(self):
     fd, self.fn = mkstemp('.cdb', dir=os.getcwd())
     os.close(fd)
     self.maker = cdb.cdbmake(self.fn, self.fn + '.tmp')
示例#54
0
import cdb

TRIPLES_FILE_PATH = 'top_100000_triples.txt'
OUTPUT_PROPERTIES_DB = 'top_100000_properties.cdb'
OUTPUT_VALUES_DB = 'top_100000_values.cdb'

properties_db = cdb.cdbmake(OUTPUT_PROPERTIES_DB,
                            OUTPUT_PROPERTIES_DB + '.tmp')
values_db = cdb.cdbmake(OUTPUT_VALUES_DB, OUTPUT_VALUES_DB + '.tmp')

print "Loading DBPedia triples..."
for triple in open(TRIPLES_FILE_PATH, 'r'):
    subject, prop, value = triple.split('|$|')
    properties_db.add(subject, prop)
    values_db.add(subject + '|$|' + prop, value.rstrip('\n'))
print "Done"

properties_db.finish()
print "Properties cdb created"

values_db.finish()
print "Values cdb created"
示例#55
0
#!/usr/bin/env python

import zipfile
import csv
import cdb
import sys

csv.field_size_limit(sys.maxsize)

if __name__ == '__main__':

    latlon = cdb.cdbmake('latlon.cdb', 'latlon.cdb.tmp')
    geonames = cdb.cdbmake('geonames.cdb', 'geonames.cdb.tmp')
    for fn in sys.argv[1:]:
        zf = zipfile.ZipFile(fn, 'r')
        for name in zf.namelist():
            if name == 'readme.txt':
                continue
            info = zf.getinfo(name)
            print "Reading ", name, " ... ", info.file_size
            f = zf.open(name, 'r')

            try:
                reader = csv.reader(f, delimiter="\t")
                for row in reader:
                    geonameid = row[0]
                    name = row[1]
                    lat = row[4]
                    lon = row[5]
                    cl = row[6]
                    cc = row[8]
示例#56
0
                os.mkdir(ipath)
                done = True
            except OSError, err:
                if err.errno != errno.EEXIST:
                    raise
            if cntr > 2 and not done and self._itemspace <= 2 ** 31:
                self._itemspace *= 2
                cntr = 0
            elif cntr > 20:
                # XXX: UnexpectedBackendError() that propagates to user?
                raise Exception('Item space full!')

        nn = item.name.encode('utf-8')

        c = cdb.init(self._name_db)
        maker = cdb.cdbmake(self._name_db + '.ndb', self._name_db + '.tmp')
        r = c.each()
        while r:
            i, v = r
            if i == nn:
                # Oops. This item already exists! Clean up and error out.
                maker.finish()
                os.unlink(self._name_db + '.ndb')
                os.rmdir(ipath)
                if newrev is not None:
                    os.unlink(newrev)
                raise ItemAlreadyExistsError("Item '%r' already exists!" % item.name)
            else:
                maker.add(i, v)
            r = c.each()
        maker.add(nn, itemid)
示例#57
0
 def __init__(self, filename):
     self.db = cdb.cdbmake(filename, filename + '.tmp')