Python choplist示例，utils.choplist Python示例

示例#1

0

显示文件

文件： psparser.py 项目： AshutoshUpadhya/pdfminer.six

    def nextobject(self):
        """Yields a list of objects.

        Returns keywords, literals, strings, numbers, arrays and dictionaries.
        Arrays and dictionaries are represented as Python lists and dictionaries.
        """
        while not self.results:
            (pos, token) = self.nexttoken()
            #print (pos,token), (self.curtype, self.curstack)
            if isinstance(token, (int, long, float, bool, str, PSLiteral)):
                # normal token
                self.push((pos, token))
            elif token == KEYWORD_ARRAY_BEGIN:
                # begin array
                self.start_type(pos, 'a')
            elif token == KEYWORD_ARRAY_END:
                # end array
                try:
                    self.push(self.end_type('a'))
                except PSTypeError:
                    if STRICT:
                        raise
            elif token == KEYWORD_DICT_BEGIN:
                # begin dictionary
                self.start_type(pos, 'd')
            elif token == KEYWORD_DICT_END:
                # end dictionary
                try:
                    (pos, objs) = self.end_type('d')
                    if len(objs) % 2 != 0:
                        raise PSSyntaxError('Invalid dictionary construct: %r' % objs)
                    # construct a Python dictionary.
                    d = dict((literal_name(k), v) for (k, v) in choplist(2, objs) if v is not None)
                    self.push((pos, d))
                except PSTypeError:
                    if STRICT:
                        raise
            elif token == KEYWORD_PROC_BEGIN:
                # begin proc
                self.start_type(pos, 'p')
            elif token == KEYWORD_PROC_END:
                # end proc
                try:
                    self.push(self.end_type('p'))
                except PSTypeError:
                    if STRICT:
                        raise
            else:
                if 2 <= self.debug:
                    print >>sys.stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \
                          (pos, token, self.curstack)
                self.do_keyword(pos, token)
            if self.context:
                continue
            else:
                self.flush()
        obj = self.results.pop(0)
        if 2 <= self.debug:
            print >>sys.stderr, 'nextobject: %r' % (obj,)
        return obj

示例#2

0

显示文件

文件： fwalk.py 项目： verolero86/pcircle

def main():
    global comm, args
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.use_store = args.use_store
    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FWALK version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))

    circle = Circle()
    treewalk = FWalk(circle, G.src)
    circle.begin(treewalk)

    if G.use_store:
        treewalk.flushdb()

    if args.stats:
        hist = global_histogram(treewalk)
        total = hist.sum()
        bucket_scale = 0.5
        if comm.rank == 0:
            print("\nFileset histograms:\n")
            for idx, rightbound in enumerate(bins[1:]):
                percent = 100 * hist[idx] / float(total)
                star_count = int(bucket_scale * percent)
                print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format("< ",
                    utils.bytes_fmt(rightbound), hist[idx],
                    "%0.2f%%" % percent, '∎' * star_count))

    if args.stats:
        treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True)
        globaltops = comm.gather(treewalk.flist[:args.top])
        if comm.rank == 0:
            globaltops = [item for sublist in globaltops for item in sublist]
            globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True)
            if len(globaltops) < args.top:
                args.top = len(globaltops)
            print("\nStats, top %s files\n" % args.top)
            for i in xrange(args.top):
                print("\t{:15}{:<30}".format(utils.bytes_fmt(globaltops[i].st_size),
                      globaltops[i].path))

    treewalk.epilogue()
    treewalk.cleanup()
    circle.finalize()

示例#3

0

显示文件

文件： pdfparser.py 项目： AmyRFinnegan/pdfminer

 def lookup(d):
     if 'Limits' in d:
         (k1,k2) = list_value(d['Limits'])
         if key < k1 or k2 < key: return None
         if 'Names' in d:
             objs = list_value(d['Names'])
             names = dict(choplist(2, objs))
             return names[key]
     if 'Kids' in d:
         for c in list_value(d['Kids']):
             v = lookup(dict_value(c))
             if v: return v
     raise KeyError((cat,key))

示例#4

0

显示文件

文件： pdfdocument.py 项目： upjohnc/pdfminer

 def lookup(d):
     if 'Limits' in d:
         (k1, k2) = list_value(d['Limits'])
         if key < k1 or k2 < key: return None
     if 'Names' in d:
         objs = list_value(d['Names'])
         names = dict(choplist(2, objs))
         return names[key]
     if 'Kids' in d:
         for c in list_value(d['Kids']):
             v = lookup(dict_value(c))
             if v: return v
     raise KeyError((cat, key))

示例#5

0

显示文件

文件： psparser.py 项目： abduld/pdftoref

 def nextobject(self):
   '''
   Yields a list of objects: keywords, literals, strings, 
   numbers, arrays and dictionaries. Arrays and dictionaries
   are represented as Python sequence and dictionaries.
   '''
   while not self.results:
     (pos, token) = self.nexttoken()
     #print (pos,token), (self.curtype, self.curstack)
     if (isinstance(token, int) or
         isinstance(token, float) or
         isinstance(token, bool) or
         isinstance(token, str) or
         isinstance(token, PSLiteral)):
       # normal token
       self.push((pos, token))
     elif token == KEYWORD_ARRAY_BEGIN:
       # begin array
       self.start_type(pos, 'a')
     elif token == KEYWORD_ARRAY_END:
       # end array
       try:
         self.push(self.end_type('a'))
       except PSTypeError:
         if STRICT: raise
     elif token == KEYWORD_DICT_BEGIN:
       # begin dictionary
       self.start_type(pos, 'd')
     elif token == KEYWORD_DICT_END:
       # end dictionary
       try:
         (pos, objs) = self.end_type('d')
         if len(objs) % 2 != 0:
           raise PSSyntaxError('invalid dictionary construct: %r' % objs)
         d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs))
         self.push((pos, d))
       except PSTypeError:
         if STRICT: raise
     else:
       if 2 <= self.debug:
         print >>stderr, 'do_keyword: pos=%r, token=%r, stack=%r' % \
               (pos, token, self.curstack)
       self.do_keyword(pos, token)
     if self.context:
       continue
     else:
       self.flush()
   obj = self.results.pop(0)
   if 2 <= self.debug:
     print >>stderr, 'nextobject: %r' % (obj,)
   return obj

示例#6

0

显示文件

文件： pdfinterp.py 项目： mcs07/pdfminer

 def do_keyword(self, pos, token):
     if token is self.KEYWORD_BI:
         # inline image within a content stream
         self.start_type(pos, 'inline')
     elif token is self.KEYWORD_ID:
         try:
             (_, objs) = self.end_type('inline')
             if len(objs) % 2 != 0:
                 raise PSTypeError('Invalid dictionary construct: %r' % objs)
             d = dict((literal_name(k), v) for (k, v) in choplist(2, objs))
             (pos, data) = self.get_inline_data(pos + len('ID '))
             obj = PDFStream(d, data)
             self.push((pos, obj))
             self.push((pos, self.KEYWORD_EI))
         except PSTypeError, e:
             handle_error(type(e), str(e))

示例#7

0

显示文件

def get_widths2(seq):
    widths = {}
    r = []
    for v in seq:
        if isinstance(v, list):
            if r:
                char1 = r[-1]
                for (i,(w,vx,vy)) in enumerate(choplist(3,v)):
                    widths[char1+i] = (w,(vx,vy))
                r = []
        elif isinstance(v, int):
            r.append(v)
            if len(r) == 5:
                (char1,char2,w,vx,vy) = r
                for i in xrange(char1, char2+1):
                    widths[i] = (w,(vx,vy))
                r = []
    return widths

示例#8

0

显示文件

文件： pdfparser.py 项目： mcs07/pdfminer

 def load(self, parser):
     (_, objid) = parser.nexttoken()  # ignored
     (_, genno) = parser.nexttoken()  # ignored
     (_, kwd) = parser.nexttoken()
     (_, stream) = parser.nextobject()
     if not isinstance(stream, PDFStream) or stream['Type'] is not LITERAL_XREF:
         raise PDFNoValidXRef('Invalid PDF stream spec.')
     size = stream['Size']
     index_array = stream.get('Index', (0, size))
     if len(index_array) % 2 != 0:
         raise PDFSyntaxError('Invalid index number')
     self.objid_ranges.extend(ObjIdRange(start, nobjs) for (start, nobjs) in choplist(2, index_array))
     (self.fl1, self.fl2, self.fl3) = stream['W']
     self.data = stream.get_data()
     self.entlen = self.fl1 + self.fl2 + self.fl3
     self.trailer = stream.attrs
     log.info('xref stream: objid=%s, fields=%d,%d,%d',
                  ', '.join(map(repr, self.objid_ranges)), self.fl1, self.fl2, self.fl3)

示例#9

0

显示文件

文件： pdfinterp.py 项目： upjohnc/pdfminer

 def do_keyword(self, pos, token):
     if token is self.KEYWORD_BI:
         # inline image within a content stream
         self.start_type(pos, 'inline')
     elif token is self.KEYWORD_ID:
         try:
             (_, objs) = self.end_type('inline')
             if len(objs) % 2 != 0:
                 raise PSTypeError('Invalid dictionary construct: %r' % objs)
             d = dict( (literal_name(k), v) for (k,v) in choplist(2, objs) )
             (pos, data) = self.get_inline_data(pos+len('ID '))
             obj = PDFStream(d, data)
             self.push((pos, obj))
             self.push((pos, self.KEYWORD_EI))
         except PSTypeError:
             if STRICT: raise
     else:
         self.push((pos, token))
     return

示例#10

0

显示文件

文件： pdfinterp.py 项目： upjohnc/pdfminer

 def do_keyword(self, pos, token):
     if token is self.KEYWORD_BI:
         # inline image within a content stream
         self.start_type(pos, "inline")
     elif token is self.KEYWORD_ID:
         try:
             (_, objs) = self.end_type("inline")
             if len(objs) % 2 != 0:
                 raise PSTypeError("Invalid dictionary construct: %r" % objs)
             d = dict((literal_name(k), v) for (k, v) in choplist(2, objs))
             (pos, data) = self.get_inline_data(pos + len("ID "))
             obj = PDFStream(d, data)
             self.push((pos, obj))
             self.push((pos, self.KEYWORD_EI))
         except PSTypeError:
             if STRICT:
                 raise
     else:
         self.push((pos, token))
     return

示例#11

0

显示文件

文件： pdfdocument.py 项目： webpet/pdfminer

 def load(self, parser, debug=0):
     (_, objid) = parser.nexttoken()  # ignored
     (_, genno) = parser.nexttoken()  # ignored
     (_, kwd) = parser.nexttoken()
     (_, stream) = parser.nextobject()
     if not isinstance(stream, PDFStream) or stream['Type'] is not LITERAL_XREF:
         raise PDFNoValidXRef('Invalid PDF stream spec.')
     size = stream['Size']
     index_array = stream.get('Index', (0, size))
     if len(index_array) % 2 != 0:
         raise PDFSyntaxError('Invalid index number')
     self.ranges.extend(choplist(2, index_array))
     (self.fl1, self.fl2, self.fl3) = stream['W']
     self.data = stream.get_data()
     self.entlen = self.fl1+self.fl2+self.fl3
     self.trailer = stream.attrs
     if 1 <= debug:
         print >>sys.stderr, ('xref stream: objid=%s, fields=%d,%d,%d' %
                              (', '.join(map(repr, self.ranges)),
                              self.fl1, self.fl2, self.fl3))
     return

示例#12

0

显示文件

def main():
    global args, log, circle, fcp, treewalk
    # This might be an overkill function
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)
    tally_hosts()
    G.loglevel = args.loglevel
    G.fix_opt = False if args.no_fixopt else True
    G.preserve = args.preserve
    G.resume = True if args.cpid else False
    G.reduce_interval = args.reduce_interval
    G.verbosity = args.verbosity
    G.am_root = True if os.geteuid() == 0 else False
    G.memitem_threshold = args.item

    if args.signature:  # with signature implies doing verify as well
        args.verify = True

    if args.rid:
        G.resume = True
        args.force = True
        G.rid = args.rid
        args.signature = False # when recovery, no signature

    if not args.cpid:
        ts = utils.timestamp()
        args.cpid = MPI.COMM_WORLD.bcast(ts)

    G.tempdir = os.path.join(os.getcwd(),(".pcircle" + args.cpid))
    if not os.path.exists(G.tempdir):
        try:
            os.mkdir(G.tempdir)
        except OSError:
            pass

    G.src, G.dest = check_source_and_target(args.src, args.dest)
    dbname = get_workq_name()

    circle = Circle(dbname="fwalk")
    #circle.dbname = dbname

    global oflimit

    if num_of_hosts != 0:
        max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
        procs_per_host = circle.size // num_of_hosts
        oflimit = ((max_ofile - 64) // procs_per_host) // 2
    if oflimit < 8:
            oflimit = 8


    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<25}{:<20}".format("Starting at:", utils.current_time()))
        print("\t{:<25}{:<20}".format("FCP version:", __version__))
        print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src)))
        print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest)))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|",
            "Num of Processes:", comm.size))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|",
            "Copy Verification:", "%r" % args.verify))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|",
            "Stripe Preserve:", "%r" % G.preserve))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|",
            "Checkpoint ID:", "%s" % args.cpid))

        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Items in memory: ",
            " % r" % G.memitem_threshold, "|", "O file limit", "%s" % oflimit))
        #
        if args.verbosity > 0:
            print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype))

    fcp_start()

    if args.pause and args.verify:
        if circle.rank == 0:
            # raw_input("\n--> Press any key to continue ...\n")
            print("Pause, resume after %s seconds ..." % args.pause)
            sys.stdout.flush()
        time.sleep(args.pause)
        circle.comm.Barrier()

    # do checksum verification
    if args.verify:
        circle = Circle(dbname="verify")
        pcheck = PVerify(circle, fcp, G.total_chunks, T.total_filesize, args.signature)
        circle.begin(pcheck)
        circle.finalize()
        tally = pcheck.fail_tally()
        tally = comm.bcast(tally)
        if circle.rank == 0:
            print("")
            if tally == 0:
                print("\t{:<20}{:<20}".format("Verify result:", "PASS"))
            else:
                print("\t{:<20}{:<20}".format("Verify result:", "FAILED"))

        comm.Barrier()

        if args.signature and tally == 0:
            gen_signature(pcheck.bfsign, T.total_filesize)

    # fix permission
    comm.Barrier()
    if G.fix_opt and treewalk:
        if comm.rank == 0:
            print("\nFixing ownership and permissions ...")
        fix_opt(treewalk)

    if treewalk:
        treewalk.cleanup()
    if fcp:
        fcp.cleanup()
    #if circle:
    #    circle.finalize(cleanup=True)
    comm.Barrier()
    if comm.rank == 0:
        try:
            os.rmdir(G.tempdir)
        except:
            pass

示例#13

0

显示文件

文件： fcp.py 项目： verolero86/pcircle

def main():
    global args, log, circle, fcp, treewalk
    # This might be an overkill function
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)
    tally_hosts()
    G.loglevel = args.loglevel
    G.fix_opt = False if args.no_fixopt else True
    G.preserve = args.preserve
    G.resume = True if args.cpid else False
    G.reduce_interval = args.reduce_interval
    G.verbosity = args.verbosity
    G.am_root = True if os.geteuid() == 0 else False

    if args.signature:  # with signature implies doing verify as well
        args.verify = True

    G.src, G.dest = check_source_and_target(args.src, args.dest)
    dbname = get_workq_name()

    circle = Circle()
    circle.dbname = dbname

    if args.rid:
        circle.resume = True
        args.signature = False # when recovery, no signature

    if not args.cpid:
        ts = utils.timestamp()
        args.cpid = circle.comm.bcast(ts)

    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<25}{:<20}".format("Starting at:", utils.current_time()))
        print("\t{:<25}{:<20}".format("FCP version:", __version__))
        print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src)))
        print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest)))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|",
            "Num of Processes:", comm.size))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|",
            "Copy Verification:", "%r" % args.verify))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|",
            "Stripe Preserve:", "%r" % G.preserve))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|",
            "Checkpoint ID:", "%s" % args.cpid))

        #
        if args.verbosity > 0:
            print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype))


    fcp_start()

    if args.pause and args.verify:
        if circle.rank == 0:
            # raw_input("\n--> Press any key to continue ...\n")
            print("Pause, resume after %s seconds ..." % args.pause)
            sys.stdout.flush()
        time.sleep(args.pause)
        circle.comm.Barrier()

    # do checksum verification
    if args.verify:
        circle = Circle()
        pcheck = PVerify(circle, fcp, G.totalsize)
        circle.begin(pcheck)
        tally = pcheck.fail_tally()
        tally = comm.bcast(tally)
        if circle.rank == 0:
            print("")
            if tally == 0:
                print("\t{:<20}{:<20}".format("Result:", "PASS"))
            else:
                print("\t{:<20}{:<20}".format("Result:", "FAILED"))

        comm.Barrier()

        if args.signature and tally == 0:
            gen_signature(fcp, G.totalsize)

    # fix permission
    comm.Barrier()
    if G.fix_opt and treewalk:
        if comm.rank == 0:
            print("\nFixing ownership and permissions ...")
        fix_opt(treewalk)

    if treewalk:
        treewalk.cleanup()

    if fcp:
        fcp.epilogue()
        fcp.cleanup()

    # if circle:
    #     circle.finalize(cleanup=True)
    # TODO: a close file error can happen when circle.finalize()
    #
    if isinstance(circle.workq, DbStore):
        circle.workq.cleanup()

示例#14

0

显示文件

文件： cmapdb.py 项目： dpkm95/PDF-Content-Indexing-System

    def do_keyword(self, pos, token):
        name = token.name
        if name == 'begincmap':
            self._in_cmap = True
            self.popall()
            return
        elif name == 'endcmap':
            self._in_cmap = False
            return
        if not self._in_cmap:
            return
        #
        if name == 'def':
            try:
                ((_, k), (_, v)) = self.pop(2)
                self.cmap.set_attr(literal_name(k), v)
            except PSSyntaxError:
                pass
            return

        if name == 'usecmap':
            try:
                ((_, cmapname), ) = self.pop(1)
                self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
            except PSSyntaxError:
                pass
            except CMapDB.CMapNotFound:
                pass
            return

        if name == 'begincodespacerange':
            self.popall()
            return
        if name == 'endcodespacerange':
            self.popall()
            return

        if name == 'begincidrange':
            self.popall()
            return
        if name == 'endcidrange':
            objs = [obj for (__, obj) in self.popall()]
            for (s, e, cid) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str)
                        or not isinstance(cid, int) or len(s) != len(e)):
                    continue
                sprefix = s[:-4]
                eprefix = e[:-4]
                if sprefix != eprefix:
                    continue
                svar = s[-4:]
                evar = e[-4:]
                s1 = nunpack(svar)
                e1 = nunpack(evar)
                vlen = len(svar)
                #assert s1 <= e1
                for i in xrange(e1 - s1 + 1):
                    x = sprefix + struct.pack('>L', s1 + i)[-vlen:]
                    self.cmap.add_code2cid(x, cid + i)
            return

        if name == 'begincidchar':
            self.popall()
            return
        if name == 'endcidchar':
            objs = [obj for (__, obj) in self.popall()]
            for (cid, code) in choplist(2, objs):
                if isinstance(code, str) and isinstance(cid, str):
                    self.cmap.add_code2cid(code, nunpack(cid))
            return

        if name == 'beginbfrange':
            self.popall()
            return
        if name == 'endbfrange':
            objs = [obj for (__, obj) in self.popall()]
            for (s, e, code) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str)
                        or len(s) != len(e)):
                    continue
                s1 = nunpack(s)
                e1 = nunpack(e)
                #assert s1 <= e1
                if isinstance(code, list):
                    for i in xrange(e1 - s1 + 1):
                        self.cmap.add_cid2unichr(s1 + i, code[i])
                else:
                    var = code[-4:]
                    base = nunpack(var)
                    prefix = code[:-4]
                    vlen = len(var)
                    for i in xrange(e1 - s1 + 1):
                        x = prefix + struct.pack('>L', base + i)[-vlen:]
                        self.cmap.add_cid2unichr(s1 + i, x)
            return

        if name == 'beginbfchar':
            self.popall()
            return
        if name == 'endbfchar':
            objs = [obj for (__, obj) in self.popall()]
            for (cid, code) in choplist(2, objs):
                if isinstance(cid, str) and isinstance(code, str):
                    self.cmap.add_cid2unichr(nunpack(cid), code)
            return

        if name == 'beginnotdefrange':
            self.popall()
            return
        if name == 'endnotdefrange':
            self.popall()
            return

        self.push((pos, token))
        return

示例#15

0

显示文件

文件： cmapdb.py 项目： 131db/emotweets

    def do_keyword(self, pos, token):
        name = token.name
        if name == 'begincmap':
            self._in_cmap = True
            self.popall()
            return
        elif name == 'endcmap':
            self._in_cmap = False
            return
        if not self._in_cmap: return
        #
        if name == 'def':
            try:
                ((_,k),(_,v)) = self.pop(2)
                self.cmap.set_attr(literal_name(k), v)
            except PSSyntaxError:
                pass
            return

        if name == 'usecmap':
            try:
                ((_,cmapname),) = self.pop(1)
                self.cmap.use_cmap(CMapDB.get_cmap(literal_name(cmapname)))
            except PSSyntaxError:
                pass
            except CMapDB.CMapNotFound:
                pass
            return

        if name == 'begincodespacerange':
            self.popall()
            return
        if name == 'endcodespacerange':
            self.popall()
            return

        if name == 'begincidrange':
            self.popall()
            return
        if name == 'endcidrange':
            objs = [ obj for (_,obj) in self.popall() ]
            for (s,e,cid) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str) or
                    not isinstance(cid, int) or len(s) != len(e)): continue
                sprefix = s[:-4]
                eprefix = e[:-4]
                if sprefix != eprefix: continue
                svar = s[-4:]
                evar = e[-4:]
                s1 = nunpack(svar)
                e1 = nunpack(evar)
                vlen = len(svar)
                #assert s1 <= e1
                for i in xrange(e1-s1+1):
                    x = sprefix+struct.pack('>L',s1+i)[-vlen:]
                    self.cmap.add_code2cid(x, cid+i)
            return

        if name == 'begincidchar':
            self.popall()
            return
        if name == 'endcidchar':
            objs = [ obj for (_,obj) in self.popall() ]
            for (cid,code) in choplist(2, objs):
                if isinstance(code, str) and isinstance(cid, str):
                    self.cmap.add_code2cid(code, nunpack(cid))
            return

        if name == 'beginbfrange':
            self.popall()
            return
        if name == 'endbfrange':
            objs = [ obj for (_,obj) in self.popall() ]
            for (s,e,code) in choplist(3, objs):
                if (not isinstance(s, str) or not isinstance(e, str) or
                    len(s) != len(e)): continue
                s1 = nunpack(s)
                e1 = nunpack(e)
                #assert s1 <= e1
                if isinstance(code, list):
                    for i in xrange(e1-s1+1):
                        self.cmap.add_cid2unichr(s1+i, code[i])
                else:
                    var = code[-4:]
                    base = nunpack(var)
                    prefix = code[:-4]
                    vlen = len(var)
                    for i in xrange(e1-s1+1):
                        x = prefix+struct.pack('>L',base+i)[-vlen:]
                        self.cmap.add_cid2unichr(s1+i, x)
            return

        if name == 'beginbfchar':
            self.popall()
            return
        if name == 'endbfchar':
            objs = [ obj for (_,obj) in self.popall() ]
            for (cid,code) in choplist(2, objs):
                if isinstance(cid, str) and isinstance(code, str):
                    self.cmap.add_cid2unichr(nunpack(cid), code)
            return

        if name == 'beginnotdefrange':
            self.popall()
            return
        if name == 'endnotdefrange':
            self.popall()
            return

        self.push((pos, token))
        return

示例#16

0

显示文件

文件： fsum.py 项目： fwang2/pcircle

def main():
    global args, comm
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel
    #G.use_store = args.use_store
    G.reduce_interval = args.interval
    G.memitem_threshold = args.item

    hosts_cnt = tally_hosts()
    circle = Circle()

    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FSUM version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))
        print("\t{:<20}{:<20}".format("Items in memory:", G.memitem_threshold))

    fwalk = FWalk(circle, G.src)
    circle.begin(fwalk)
    if G.use_store:
        fwalk.flushdb()

    fwalk.epilogue()
    circle.finalize()


    # by default, we use adaptive chunksize
    chunksize = utils.calc_chunksize(T.total_filesize)
    if args.chunksize:
        chunksize = conv_unit(args.chunksize)

    if circle.rank == 0:
        print("Chunksize = ", chunksize)

    circle = Circle()
    fcheck = Checksum(circle, fwalk, chunksize, T.total_filesize, T.total_files)

    circle.begin(fcheck)
    circle.finalize()

    if circle.rank == 0:
        sys.stdout.write("\nAggregating ... ")

    """
    chunkl = circle.comm.gather(fcheck.chunkq)

    if circle.rank == 0:
        chunks = [item for sublist in chunkl for item in sublist]
        chunks.sort()
        sys.stdout.write("%s chunks\n" % len(chunks))
        sha1val = do_checksum(chunks)
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % T.total_filesize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)
        if args.export_block_signatures:
            export_checksum2(chunks, args.output)
            print("Exporting block signatures ... \n")
    """
    if circle.rank > 0:
        circle.comm.send(fcheck.bfsign.bitarray, dest=0)
    else:
        for p in xrange(1, circle.comm.size):
            other_bitarray = circle.comm.recv(source=p)
            fcheck.bfsign.or_bf(other_bitarray)
    circle.comm.Barrier()

    if circle.comm.rank == 0:
        sha1val = fcheck.bfsign.gen_signature()
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % T.total_filesize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)

    fcheck.epilogue()

    if circle.comm.rank == 0:
        if os.path.exists(G.tempdir):
            shutil.rmtree(G.tempdir, ignore_errors=True)

示例#17

0

显示文件

文件： fwalk.py 项目： vikramkhatri/pcircle

def main():
    global comm, args
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.use_store = args.use_store
    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FWALK version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:",
                                      MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))

    circle = Circle()
    treewalk = FWalk(circle, G.src)
    circle.begin(treewalk)

    if G.use_store:
        treewalk.flushdb()

    if args.stats:
        hist = global_histogram(treewalk)
        total = hist.sum()
        bucket_scale = 0.5
        if comm.rank == 0:
            print("\nFileset histograms:\n")
            for idx, rightbound in enumerate(bins[1:]):
                percent = 100 * hist[idx] / float(total)
                star_count = int(bucket_scale * percent)
                print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format(
                    "< ", utils.bytes_fmt(rightbound), hist[idx],
                    "%0.2f%%" % percent, '∎' * star_count))

    if args.stats:
        treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size),
                            reverse=True)
        globaltops = comm.gather(treewalk.flist[:args.top])
        if comm.rank == 0:
            globaltops = [item for sublist in globaltops for item in sublist]
            globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size),
                            reverse=True)
            if len(globaltops) < args.top:
                args.top = len(globaltops)
            print("\nStats, top %s files\n" % args.top)
            for i in xrange(args.top):
                print("\t{:15}{:<30}".format(
                    utils.bytes_fmt(globaltops[i].st_size),
                    globaltops[i].path))

    treewalk.epilogue()
    treewalk.cleanup()
    circle.finalize()

示例#18

0

显示文件

文件： fsum.py 项目： vikramkhatri/pcircle

def main():
    global args, comm
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel
    #G.use_store = args.use_store
    G.reduce_interval = args.interval
    G.memitem_threshold = args.item

    hosts_cnt = tally_hosts()
    circle = Circle()

    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FSUM version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:",
                                      MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))
        print("\t{:<20}{:<20}".format("Items in memory:", G.memitem_threshold))

    fwalk = FWalk(circle, G.src)
    circle.begin(fwalk)
    if G.use_store:
        fwalk.flushdb()

    fwalk.epilogue()
    circle.finalize()

    # by default, we use adaptive chunksize
    chunksize = utils.calc_chunksize(T.total_filesize)
    if args.chunksize:
        chunksize = conv_unit(args.chunksize)

    if circle.rank == 0:
        print("Chunksize = ", chunksize)

    circle = Circle()
    fcheck = Checksum(circle, fwalk, chunksize, T.total_filesize,
                      T.total_files)

    circle.begin(fcheck)
    circle.finalize()

    if circle.rank == 0:
        sys.stdout.write("\nAggregating ... ")
    """
    chunkl = circle.comm.gather(fcheck.chunkq)

    if circle.rank == 0:
        chunks = [item for sublist in chunkl for item in sublist]
        chunks.sort()
        sys.stdout.write("%s chunks\n" % len(chunks))
        sha1val = do_checksum(chunks)
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % T.total_filesize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)
        if args.export_block_signatures:
            export_checksum2(chunks, args.output)
            print("Exporting block signatures ... \n")
    """
    if circle.rank > 0:
        circle.comm.send(fcheck.bfsign.bitarray, dest=0)
    else:
        for p in xrange(1, circle.comm.size):
            other_bitarray = circle.comm.recv(source=p)
            fcheck.bfsign.or_bf(other_bitarray)
    circle.comm.Barrier()

    if circle.comm.rank == 0:
        sha1val = fcheck.bfsign.gen_signature()
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % T.total_filesize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)

    fcheck.epilogue()

    if circle.comm.rank == 0:
        if os.path.exists(G.tempdir):
            shutil.rmtree(G.tempdir, ignore_errors=True)

示例#19

0

显示文件

文件： fsum.py 项目： verolero86/pcircle

def main():
    global args, comm
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel
    G.use_store = args.use_store
    G.reduce_interval = args.interval



    hosts_cnt = tally_hosts()
    circle = Circle()

    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FSUM version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))

    fwalk = FWalk(circle, G.src)
    circle.begin(fwalk)
    if G.use_store:
        fwalk.flushdb()
    totalsize = fwalk.epilogue()
    circle.finalize()

    # by default, we use adaptive chunksize
    chunksize = utils.calc_chunksize(totalsize)
    if args.chunksize:
        chunksize = conv_unit(args.chunksize)

    if circle.rank == 0:
        print("Chunksize = ", chunksize)

    circle = Circle()
    fcheck = Checksum(circle, fwalk, chunksize, totalsize)

    circle.begin(fcheck)
    circle.finalize()

    if circle.rank == 0:
        sys.stdout.write("\nAggregating ... ")

    chunkl = circle.comm.gather(fcheck.chunkq)

    if circle.rank == 0:
        chunks = [item for sublist in chunkl for item in sublist]
        chunks.sort()
        sys.stdout.write("%s chunks\n" % len(chunks))
        sha1val = do_checksum(chunks)
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % totalsize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)
        if args.export_block_signatures:
            export_checksum2(chunks, args.output)
            print("Exporting block signatures ... \n")

    fcheck.epilogue()