Python parse примеры, ijson.backends.yajl2_cffi.parse Python примеры использования

Пример #1

0

Показать файл

    def _autocomplete_request(self, view, cache, request, 
            text, offset, included=lambda item: True):
        """
        """
        # this should not happen, but just in case, do not
        # overload the system with too many requests
        if len(self.current_requests) > self.get_settings(view, "concurrent_request_limit", 4):
            raise AutocompleteRequestError("Request denied: too many concurrent requests.")

        # prevent duplicate requests
        if request in self.current_requests:
            raise AutocompleteRequestError(
                "Request denied: completion for \"{request}\" "
                "already in progress.".format(request=request)
            )

        # start request
        self.current_requests.add(request)

        # get completion command
        cmd = self.get_completion_cmd(view, text, offset)

        # run completion command
        p = Popen(cmd, shell=True, stdout=PIPE, stderr=STDOUT)
        parser = ijson.parse(p.stdout)
        completions = list(self._parse_completions(parser, included=included))

        # finish request
        self.current_requests.discard(request)

        return completions

Пример #2

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_entropy_data(json):
    entropy = []
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.entropy'):
            entropy.append(float(value))

    return entropy

Пример #3

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_dylibs_data(json):
    dylibs = Counter()
    total = 0
    # execute = None
    # temp_dylibs = []
    for prefix, event, value in ijson.parse(json):
        #if prefix.endswith('.macho') or prefix.endswith('.machos.item'):
            #if execute:
            #    for d in set(temp_dylibs):
            #        dylibs[d] += 1
            #    del temp_dylibs[:]
            #execute = None
        if prefix.endswith('.filetype'):
            #if value == 'EXECUTE':
            #    execute = True
            total += 1
            #else:
            #    execute = False
        if prefix.endswith('.dylibs.item'):
            #if execute or execute is None:
            #    temp_dylibs.append(value)
            dylibs[value] += 1

    #if execute:
    #    for d in set(temp_dylibs):
    #        dylibs[d] += 1

    for i in dylibs:
        dylibs[i] = dylibs[i] * (1.0 / total)

    return dylibs

Пример #4

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_imports_data(json):
    imports = Counter()
    total = 0
    function = False
    # execute = None
    # temp_imports = []
    for prefix, event, value in ijson.parse(json):
        #if prefix.endswith('.macho') or prefix.endswith('.machos.item'):
        #    if execute:
        #        for t in set(temp_imports):
        #            imports[t] += 1
        #        del temp_imports[:]
        #    execute = None
        if prefix.endswith('.filetype'):
            #if value == 'EXECUTE':
            #    execute = True
            total += 1
            #else:
            #    execute = False
        if prefix.endswith('.imports.item'):
            function = True
        if prefix.endswith('.imports.item.item') and function:
            #if execute or execute is None:
            #    temp_imports.append(value)
            imports[value] += 1
            function = False

    for i in imports:
        imports[i] = imports[i] * (1.0 / total)

    return imports

Пример #5

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_lcs_data(json):
    lcs = Counter()
    total = 0
    # execute = None
    temp = None
    segment = False
    segment_64 = False
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.filetype'):
            total += 1
        if prefix.endswith('.lcs.item.cmd'):
            if value == 'LOAD_DYLIB':
                continue
            elif value == 'SEGMENT':
                segment = True
            elif value == 'SEGMENT_64':
                segment_64 = True
            else:
                lcs[value] += 1
        if prefix.endswith('.lcs.item.name'):
            if segment:
                lcs['SEGMENT (' + value + ')'] += 1
                segment = False
            elif segment_64:
                lcs['SEGMENT_64 (' + value + ')'] += 1
                segment_64 = False

    print 'Samples parsed:', total

    for l in lcs.keys():
        lcs[l] = lcs[l] * (1.0 / total)

    return lcs

Пример #6

0

Показать файл

Файл: stats.py Проект: pyq881120/macholibre

def gen_lcs_data(json):
    lcs = Counter()
    total = 0
    # execute = None
    temp = None
    segment = False
    segment_64 = False
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.filetype'):
            total += 1
        if prefix.endswith('.lcs.item.cmd'):
            if value == 'LOAD_DYLIB':
                continue
            elif value == 'SEGMENT':
                segment = True
            elif value == 'SEGMENT_64':
                segment_64 = True
            else:
                lcs[value] += 1
        if prefix.endswith('.lcs.item.name'):
            if segment:
                lcs['SEGMENT (' + value + ')'] += 1
                segment = False
            elif segment_64:
                lcs['SEGMENT_64 (' + value + ')'] += 1
                segment_64 = False

    print 'Samples parsed:', total

    for l in lcs.keys():
        lcs[l] = lcs[l] * (1.0 / total)

    return lcs

Пример #7

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_ndylibs_data(json):
    data = []
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.ndylibs'):
            data.append(value)

    return data

Пример #8

0

Показать файл

Файл: stats.py Проект: pyq881120/macholibre

def gen_imports_data(json):
    imports = Counter()
    total = 0
    function = False
    # execute = None
    # temp_imports = []
    for prefix, event, value in ijson.parse(json):
        #if prefix.endswith('.macho') or prefix.endswith('.machos.item'):
        #    if execute:
        #        for t in set(temp_imports):
        #            imports[t] += 1
        #        del temp_imports[:]
        #    execute = None
        if prefix.endswith('.filetype'):
            #if value == 'EXECUTE':
            #    execute = True
            total += 1
            #else:
            #    execute = False
        if prefix.endswith('.imports.item'):
            function = True
        if prefix.endswith('.imports.item.item') and function:
            #if execute or execute is None:
            #    temp_imports.append(value)
            imports[value] += 1
            function = False

    for i in imports:
        imports[i] = imports[i] * (1.0 / total)

    return imports

Пример #9

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_slcs_data(json):
    slcs = []
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.slcs'):
            slcs.append(value)

    return slcs

Пример #10

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_sects_data(json):
    sects = Counter()
    total = 0
    segname = None
    name = None
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.filetype'):
            total += 1
        elif prefix.endswith('.sects.item.segname'):
            if name is None:
                segname = value
            else:
                sects[value + ', ' + name] += 1
                name = None
        elif prefix.endswith('.sects.item.name'):
            if segname is None:
                name = value
            else:
                sects[segname + ', ' + value] += 1
                segname = None

    for s in sects.keys():
        sects[s] = sects[s] * (1.0 / total)

    return sects

Пример #11

0

Показать файл

Файл: functionsex.py Проект: Renkar/Spider_robot

def json2generator(data, arrayKey=None):
   """
   Функция конвертирует переданный json в генератор. Это позволяет избежать утечки памяти на огромных обьемах данных. Может выдать генератор только для массива (неважно какой вложенности и сложности). arrayKey должен указывать на массив, может быть цепочкой (key1.key2)
   """
   from ijson import common
   # from io import BytesIO
   from cStringIO import StringIO
   #! yajl2 беккенд работает значительно быстрее, но на первый сервак так и не удалось его установить, пишет "Yajl shared object cannot be found"
   try: import ijson.backends.yajl2_cffi as ijson
   except:
      try: from ijson.backends import yajl2 as ijson
      except:
         try: from ijson.backends import yajl as ijson
         except: from ijson.backends import python as ijson
   try: f=StringIO(data)
   except: f=StringIO(data.encode('utf-8'))
   def _fixJSON(event):
      # функция исправляет "фичу" декодинга, Которая пытается все цифровые типы привести к decimal()
      if event[1]=='number':
         return (event[0], event[1], float(event[2]) if math.modf(event[2])[0] else int(event[2]))
      else: return event
   events=imap(_fixJSON, ijson.parse(f))
   g=common.items(events, (arrayKey+'.item' if arrayKey else 'item'))
   # g=ijson.items(f, (arrayKey+'.item' if arrayKey else 'item'))
   return g

Пример #12

0

Показать файл

Файл: stats.py Проект: pyq881120/macholibre

def gen_dylibs_data(json):
    dylibs = Counter()
    total = 0
    # execute = None
    # temp_dylibs = []
    for prefix, event, value in ijson.parse(json):
        #if prefix.endswith('.macho') or prefix.endswith('.machos.item'):
        #if execute:
        #    for d in set(temp_dylibs):
        #        dylibs[d] += 1
        #    del temp_dylibs[:]
        #execute = None
        if prefix.endswith('.filetype'):
            #if value == 'EXECUTE':
            #    execute = True
            total += 1
            #else:
            #    execute = False
        if prefix.endswith('.dylibs.item'):
            #if execute or execute is None:
            #    temp_dylibs.append(value)
            dylibs[value] += 1

    #if execute:
    #    for d in set(temp_dylibs):
    #        dylibs[d] += 1

    for i in dylibs:
        dylibs[i] = dylibs[i] * (1.0 / total)

    return dylibs

Пример #13

0

Показать файл

Файл: vidjilparser.py Проект: NikaAb/FaIR

 def validate(self, filepath):
     with open(filepath, 'rb') as vfile:
         parser = ijson.parse(vfile)
         model = list(self._model_prefixes)
         for prefix, event, value in parser:
             pair = (prefix, event)
             if pair in model:
                 model.remove(pair)
         return len(model) == 0

Пример #14

0

Показать файл

def get_proportions(account_stats, conf, silent=True):
    """
    We have a fixed amount of CREA to give out, specified by total_port_balance
    This needs to be given out subject to the following constraints:
    - The ratio of vesting : liquid CREA is the same on testnet,
    - Everyone's testnet balance is proportional to their mainnet balance
    - Everyone has at least min_vesting_per_account
    """

    total_vests = account_stats["total_vests"]
    total_crea = account_stats["total_crea"]
    account_names = account_stats["account_names"]
    num_accounts = len(account_names)

    with open(conf["snapshot_file"], "rb") as f:
        for prefix, event, value in ijson.parse(f):
            if prefix == "dynamic_global_properties.total_vesting_fund_crea.amount":
                total_vesting_crea = int(value)
                break

    min_vesting_per_account = satoshis(conf["min_vesting_per_account"])
    total_port_balance = satoshis(conf["total_port_balance"])
    avail_port_balance = total_port_balance - min_vesting_per_account * num_accounts
    if avail_port_balance < 0:
        raise RuntimeError(
            "Increase total_port_balance or decrease min_vesting_per_account")
    total_port_vesting = (avail_port_balance * total_vesting_crea) // (
        total_crea + total_vesting_crea)
    total_port_liquid = (avail_port_balance *
                         total_crea) // (total_crea + total_vesting_crea)

    if total_vests == 0:
        vest_conversion_factor = 1
    else:
        vest_conversion_factor = (DENOM * total_port_vesting) // total_vests

    if total_crea == 0:
        crea_conversion_factor = 1
    else:
        crea_conversion_factor = (DENOM * total_port_liquid) // total_crea

    if not silent:
        print("total_vests:", total_vests)
        print("total_crea:", total_crea)
        print("total_vesting_crea:", total_vesting_crea)
        print("total_port_balance:", total_port_balance)
        print("total_port_vesting:", total_port_vesting)
        print("total_port_liquid:", total_port_liquid)
        print("vest_conversion_factor:", vest_conversion_factor)
        print("crea_conversion_factor:", crea_conversion_factor)

    return {
        "min_vesting_per_account": min_vesting_per_account,
        "vest_conversion_factor": vest_conversion_factor,
        "crea_conversion_factor": crea_conversion_factor
    }

Пример #15

0

Показать файл

def do_filter(input_file, output_file, filters, verbose):
    logging.basicConfig(level=logging.INFO if verbose else logging.WARNING)
    logger = logging.getLogger('JSON-FILTER')
    json_filter = JsonEventFilter(ijson.parse(input_file), filters)
    writer = ObjectWriter(output_file)
    start_time = time.time()
    for prefix, event, value in json_filter:
        logger.info('%s(%s): %s' % (prefix, event.upper(), value))
        writer.event(event, value)
    logger.info('Finished in: %s seconds.' % (time.time() - start_time))

Пример #16

0

Показать файл

Файл: pp.py Проект: thebinarybot/parsers

def gprocess(i, fns):
    """Iteratively parse the file object and generate the output

	Arguments:
	i: the index of the file to process
	fns: a list of filenames
	"""
    dbs = list()
    for fn in fns:
        try:
            db = rocksdb.DB(fn + '.db',
                            rocksdb.Options(create_if_missing=False),
                            read_only=True)
            dbs.append(db)
        except:
            raise ValueError(
                "Given DB: {}.db does not exist. Are you sure the name is correct?"
                .format(fn))

    fileobj = open(os.path.join(args.input, fns[i]), 'r')

    ofilename = args.trace + '-out.' + str(i) + '.txt'
    if args.verbose:
        print("\x1b[6;30;43m[i]\x1b[0m opening output file {} for writing...".
              format(ofilename))
    ofile = open(ofilename, 'a+')

    parser = ijson.common.items(ijson.parse(fileobj, multiple_values=True), '')

    if args.trace == 'camflow':
        if args.verbose:
            print("\x1b[6;30;42m[+]\x1b[0m parsing file {} in CAMFLOW mode...".
                  format(i))
        ptj.gencf(parser, i, dbs, ofile)

    elif args.trace == 'darpa':
        if args.verbose:
            print("\x1b[6;30;42m[+]\x1b[0m parsing file {} in DARPA mode...".
                  format(i))
        ptj.gendp(parser, i, dbs, ofile)

    elif args.trace == 'cadets2' or args.trace == 'fivedirections':
        if args.verbose:
            print(
                "\x1b[6;30;42m[+]\x1b[0m parsing file {} in CADETS2/FIVEDIRECTIONS mode..."
                .format(i))
        ptj.gencd(parser, i, dbs, ofile)

    else:
        raise NotImplementedError("cannot run traces from an unknown system")

    fileobj.close()
    ofile.close()
    return

Пример #17

0

Показать файл

Файл: pp.py Проект: thebinarybot/parsers

def process(fn):
    """Iteratively process an file object.

	Arguments:
	fn - file name
	"""
    if args.profile:
        if args.verbose:
            print("\x1b[6;30;43m[i]\x1b[0m profiling is on...")
        yappi.clear_stats()
        yappi.set_clock_type('cpu')
        yappi.start(builtins=True)

    db = initdb(fn)

    with open(os.path.join(args.input, fn), 'r') as fileobj:
        parser = ijson.common.items(ijson.parse(fileobj, multiple_values=True),
                                    '')

        if args.trace == 'camflow':
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m parsing file {} in CAMFLOW mode..."
                    .format(fn))
            ptj.parsecf(parser, db, fn)

        elif args.trace == 'darpa':
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m parsing file {} in DARPA mode...".
                    format(fn))
            ptj.parsedp(parser, db, fn)

        elif args.trace == 'cadets2' or args.trace == 'fivedirections':
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m parsing file {} in CADETS2/FIVEDIRECTIONS mode..."
                    .format(fn))
            ptj.parsecd(parser, db, fn)

        else:
            raise NotImplementedError(
                "cannot run traces from an unknown system")

    if args.profile:
        yappi.stop()
        if args.verbose:
            print("\x1b[6;30;43m[i]\x1b[0m profiling is done...")
        stat = yappi.get_func_stats()
        stat.save(fn + '.prof', type='callgrind')

    fileobj.close()
    return

Пример #18

0

Показать файл

Файл: update_gnd.py Проект: waehniger/efre-lod-elasticsearch-tools

def yield_obj(path, basepath):
    with gzip.open(path, "r") as fin:
        builder = ijson.common.ObjectBuilder()
        for prefix, event, val in ijson.parse(fin):
            try:
                builder.event(event, val)
            except:
                if hasattr(builder, "value"):
                    print(builder.value)
            if prefix == basepath and event == "end_map":
                if hasattr(builder, "value"):
                    yield builder.value
                builder = ijson.common.ObjectBuilder()

Пример #19

0

Показать файл

Файл: sync.py Проект: JessaWitzel/gratipay.com

def serialize(args):
    """Consume raw JSON from the npm registry and spit out CSV for Postgres.
    """
    import ijson.backends.yajl2_cffi as ijson

    path = args.path
    parser = ijson.parse(open(path))
    start = time.time()
    package = None
    nprocessed = 0
    out = csv.writer(sys.stdout)

    def log_stats():
        log("processed {} packages in {:3.0f} seconds".format(
            nprocessed,
            time.time() - start))

    for prefix, event, value in parser:

        if not prefix and event == b'map_key':

            # Flush the current package. We count on the first package being garbage.
            processed = serialize_one(out, package)
            nprocessed += processed
            if processed and not (nprocessed % 1000):
                log_stats()

            # Start a new package.
            package = {
                'package_manager': b'npm',
                'name': value,
                'description': b'',
                'emails': []
            }

        key = lambda k: package['name'] + b'.' + k

        if event == b'string':
            assert type(
                value
            ) is unicode  # Who knew? Seems to decode only for `string`.
            value = value.encode('utf8')
            if prefix == key(b'description'):
                package['description'] = value
            elif prefix in (key(b'author.email'),
                            key(b'maintainers.item.email')):
                package['emails'].append(value)

    nprocessed += serialize_one(out, package)  # Don't forget the last one!
    log_stats()

Пример #20

0

Показать файл

def load_geojson(file_name):
    with open(file_name, 'r') as fd:
        parser = ijson.parse(fd)
        for prefix, event, value in parser:
            if (prefix, event) == ('features.item', 'start_map'):
                feature = Feature()
            elif (prefix, event) == ('features.item', 'end_map'):
                yield feature
            if (prefix, event) == ('features.item.properties', 'start_map'):
                properties = JpCityProperties()
            elif (prefix, event) == ('features.item.properties', 'end_map'):
                feature.properties = properties.__dict__
            elif (prefix, event, value) == ('features.item.properties', 'map_key', 'A27_005'):
                properties.a27_005 = parser.next()[2]
            elif (prefix, event, value) == ('features.item.properties', 'map_key', 'A27_006'):
                properties.a27_006 = parser.next()[2]
            elif (prefix, event, value) == ('features.item.properties', 'map_key', 'A27_007'):
                properties.a27_007 = parser.next()[2]
            elif (prefix, event, value) == ('features.item.properties', 'map_key', 'A27_008'):
                properties.a27_008 = parser.next()[2]
            elif (prefix, event) == ('features.item.geometry', 'start_map'):
                geometry = MultiPolygon()
            elif (prefix, event) == ('features.item.geometry.type', 'string'):
                if value == "MultiPolygon":
                    geometry = MultiPolygon()
                elif value == "Polygon":
                    geometry = Polygon()
                else:
                    raise Exception
            elif (prefix, event) == ('features.item.geometry', 'end_map'):
                feature.geometry = geometry
            elif (prefix, event) == ('features.item.geometry.coordinates', 'start_array'):
                coordinates = []
            elif (prefix, event) == ('features.item.geometry.coordinates', 'end_array'):
                geometry.coordinates = coordinates
            elif (prefix, event) == ('features.item.geometry.coordinates.item', 'start_array'):
                coordinates_item = []
            elif (prefix, event) == ('features.item.geometry.coordinates.item', 'end_array'):
                coordinates.append(coordinates_item)
            elif (prefix, event) == ('features.item.geometry.coordinates.item.item', 'start_array'):
                if isinstance(geometry, MultiPolygon):
                    coordinates_item_item = []
                else:
                    coordinates_item.append((parser.next()[2], parser.next()[2]))
            elif (prefix, event) == ('features.item.geometry.coordinates.item.item', 'end_array'):
                if isinstance(geometry, MultiPolygon):
                    coordinates_item.append(coordinates_item_item)
            elif (prefix, event) == ('features.item.geometry.coordinates.item.item.item', 'start_array'):
                if isinstance(geometry, MultiPolygon):
                    coordinates_item_item.append((parser.next()[2], parser.next()[2]))

Пример #21

0

Показать файл

def objects(file):
    key = '-'
    # for prefix, event, value in islice(ijson.parse(file), 10000):
    for prefix, event, value in ijson.parse(file):
        if prefix == '' and event == 'map_key':  # found new object at the root
            key = value  # mark the key value
            builder = ObjectBuilder()
        elif prefix.startswith(key):  # while at this key, build the object
            # if value == 'p' or value == 'pct':
            builder.event(event, value)
            if event == 'end_map':  # found the end of an object at the current key, yield
                value_dict = builder.value
                builder.value = {key: value_dict[key] for key in ['p', 'pct']}
                yield {key: builder.value}

Пример #22

0

Показать файл

Файл: bracket_functions.py Проект: awooddoughty/bracket-predictions

def bracket_objects(file):
    """Parse the saved bracket json to be able to create an html bracket."""
    key = '-'
    # for prefix, event, value in islice(ijson.parse(file), 10000):
    for prefix, event, value in ijson.parse(file):
        if prefix == '' and event == 'map_key':  # found new object at the root
            key = value  # mark the key value
            builder = ObjectBuilder()
        elif prefix.startswith(key):  # while at this key, build the object
            # if value == 'p' or value == 'pct':
            builder.event(event, value)
            if event == 'end_array':  # found the end of an object at the current key, yield
                # value_dict = builder.value
                # builder.value = {key: value_dict[key] for key in ['p', 'pct']}
                yield {key: builder.value}

Пример #23

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_dylibs_count_data(json):
    dylibs = Counter()
    total = 0
    dylib = False
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.filetype'):
            total += 1
        elif prefix.endswith('.imports.item.item'):
            if dylib:
                dylibs[value] += 1
                dylib = False
            else:
                dylib = True

    for d in dylibs:
        dylibs[d] = dylibs[d] * (1.0 / total)

    return dylibs

Пример #24

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_abnormalities_data(json):
    abnormalities = Counter()
    total = 0
    temp = []
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.filetype'):
            total += 1
        elif prefix.endswith('.abnormalities'):
            del temp[:]
        elif prefix.endswith('.abnormalities.item.title'):
            if value not in temp:
                abnormalities[value] += 1
                temp.append(value)

    for a in abnormalities.keys():
        abnormalities[a] = abnormalities[a] * (1.0 / total)

    return abnormalities

Пример #25

0

Показать файл

Файл: stats.py Проект: pyq881120/macholibre

def gen_abnormalities_data(json):
    abnormalities = Counter()
    total = 0
    temp = []
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.filetype'):
            total += 1
        elif prefix.endswith('.abnormalities'):
            del temp[:]
        elif prefix.endswith('.abnormalities.item.title'):
            if value not in temp:
                abnormalities[value] += 1
                temp.append(value)

    for a in abnormalities.keys():
        abnormalities[a] = abnormalities[a] * (1.0 / total)

    return abnormalities

Пример #26

0

Показать файл

Файл: stats.py Проект: pyq881120/macholibre

def gen_dylibs_count_data(json):
    dylibs = Counter()
    total = 0
    dylib = False
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.filetype'):
            total += 1
        elif prefix.endswith('.imports.item.item'):
            if dylib:
                dylibs[value] += 1
                dylib = False
            else:
                dylib = True

    for d in dylibs:
        dylibs[d] = dylibs[d] * (1.0 / total)

    return dylibs

Пример #27

0

Показать файл

Файл: serialize.py Проект: PeterDaveHello/gratipay.com

def serialize(env, args, db):
    ijson = import_ijson(env)

    path = args.path
    parser = ijson.parse(open(path))
    start = time.time()
    package = None
    nprocessed = 0
    out = csv.writer(sys.stdout)

    def log_stats():
        log("processed {} packages in {:3.0f} seconds"
            .format(nprocessed, time.time() - start))

    for prefix, event, value in parser:

        if not prefix and event == b'map_key':

            # Flush the current package. We count on the first package being garbage.
            processed = serialize_one(out, package)
            nprocessed += processed
            if processed and not(nprocessed % 1000):
                log_stats()

            # Start a new package.
            package = { 'package_manager': b'npm'
                      , 'name': value
                      , 'description': b''
                      , 'emails': []
                       }

        key = lambda k: package['name'] + b'.' + k

        if event == b'string':
            assert type(value) is unicode  # Who knew? Seems to decode only for `string`.
            value = value.encode('utf8')
            if prefix == key(b'description'):
                package['description'] = value
            elif prefix in (key(b'author.email'), key(b'maintainers.item.email')):
                package['emails'].append(value)

    nprocessed += serialize_one(out, package)  # Don't forget the last one!
    log_stats()

Пример #28

0

Показать файл

Файл: stats.py Проект: fxfactorial/macholibre

def gen_nimports_data(json):
    data = []
    execute = None
    temp = None
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.macho') or prefix.endswith('.machos.item'):
            if execute and temp is not None:
                data.append(temp)
                temp = None
            execute = None
        if prefix.endswith('.filetype'):
            if value == 'EXECUTE':
                execute = True
            else:
                execute = False
        if prefix.endswith('.nimps'):
            temp = value

    if temp is not None and execute:
        data.append(temp)

    return data

Пример #29

0

Показать файл

Файл: stats.py Проект: pyq881120/macholibre

def gen_nimports_data(json):
    data = []
    execute = None
    temp = None
    for prefix, event, value in ijson.parse(json):
        if prefix.endswith('.macho') or prefix.endswith('.machos.item'):
            if execute and temp is not None:
                data.append(temp)
                temp = None
            execute = None
        if prefix.endswith('.filetype'):
            if value == 'EXECUTE':
                execute = True
            else:
                execute = False
        if prefix.endswith('.nimps'):
            temp = value

    if temp is not None and execute:
        data.append(temp)

    return data

Пример #30

0

Показать файл

Файл: create_per_user_paragraph.py Проект: mrknight21/CASCADE--Contextual-Sarcasm-Detection

def main():
    users_comments_dict = collections.defaultdict(list)

    with tqdm(desc="Grouping comments by user",
              total=12704751) as progress_bar:
        inside_comment = False
        comment_text = None
        comment_username = None

        with open(COMMENTS_DATASET_FILE_PATH, 'rb') as file_:
            # As the JSON file is large (2.5GB) and everything is in one line, is better to read it as a stream,
            # using a SAX-like approach.
            for prefix, type_, value in ijson.parse(file_):
                if inside_comment:
                    if prefix.endswith('.text'):
                        comment_text = value
                    elif prefix.endswith('.author'):
                        comment_username = value
                    elif type_ == 'end_map':  # This assumes there are no nested maps inside the comment maps.
                        if comment_text and comment_username and comment_text != 'nan' \
                                and comment_username != '[deleted]':
                            users_comments_dict[comment_username].append(
                                comment_text)

                        inside_comment = False
                        comment_text = None
                        comment_username = None

                        progress_bar.update()
                elif type_ == 'start_map' and prefix:
                    inside_comment = True

    with open(USER_COMMENTS_FILE_PATH, 'w') as output_file:
        writer = csv.writer(output_file, quoting=csv.QUOTE_ALL)
        writer.writerows(
            (user, " <END> ".join(comments_texts))
            for user, comments_texts in iteritems(users_comments_dict))

Пример #31

0

Показать файл

Файл: analyze.py Проект: kmh11/hangouts-analyzer

files = []

print("Actions:\n1. Process Logs\n2. Analyze Proccesed Logs")
action = input("Enter number of action to perform: ")
if action == "1":
    print("=" * 50 + "\nLog Files: ")
    i = 1
    for fn in os.listdir("."):
        if fn[-5:] == ".json" and fn[-len("_conversations.json"
                                          ):] != "_conversations.json":
            files.append(fn)
            print(str(i) + ". " + fn)
            i += 1
    fn = files[int(input("Enter number of file to read logs from: ")) - 1]
    data = ijson.parse(open(fn, "rb"))
    conversations = []
    conversation = False
    message = False
    member = False
    for prefix, event, value in data:
        if prefix == "conversations.item.conversation.conversation_id.id":
            print("Processing chat...")
            if conversation:
                if len(conversation["members"]
                       ) == 2 and conversation["name"] == None:
                    conversation["name"] = (
                        lambda a, b: a[0]["name"] if len(a) > 0 else b)(list(
                            filter(lambda m: m["id"] != conversation["self"],
                                   conversation["members"])), "")
                conversations.append(conversation)

Пример #32

0

Показать файл

Файл: txgen.py Проект: TokenAires/tinman

def port_snapshot(conf, keydb, silent=True):
    total_vests = 0
    total_steem = 0

    system_account_names = set(get_system_account_names(conf))

    if not silent and not YAJL2_CFFI_AVAILABLE:
        print(
            "Warning: could not load yajl, falling back to default backend for ijson."
        )

    snapshot_file = open(conf["snapshot_file"], "rb")

    account_names = set()
    num_accounts = 0
    for acc in ijson.items(snapshot_file, "accounts.item"):
        if acc["name"] in system_account_names:
            continue

        account_names.add(acc["name"])
        total_vests += satoshis(acc["vesting_shares"])
        total_steem += satoshis(acc["balance"])
        num_accounts += 1

        if not silent:
            if num_accounts % 100000 == 0:
                print("Accounts read:", num_accounts)

    # We have a fixed amount of STEEM to give out, specified by total_port_balance
    # This needs to be given out subject to the following constraints:
    # - The ratio of vesting : liquid STEEM is the same on testnet,
    # - Everyone's testnet balance is proportional to their mainnet balance
    # - Everyone has at least min_vesting_per_account

    snapshot_file.seek(0)
    for prefix, event, value in ijson.parse(snapshot_file):
        if prefix == "dynamic_global_properties.total_vesting_fund_steem.amount":
            total_vesting_steem = int(value)
            break

    denom = 10**12  # we need stupidly high precision because VESTS
    min_vesting_per_account = satoshis(conf["min_vesting_per_account"])
    total_port_balance = satoshis(conf["total_port_balance"])
    avail_port_balance = total_port_balance - min_vesting_per_account * num_accounts
    if avail_port_balance < 0:
        raise RuntimeError(
            "Increase total_port_balance or decrease min_vesting_per_account")
    total_port_vesting = (avail_port_balance * total_vesting_steem) // (
        total_steem + total_vesting_steem)
    total_port_liquid = (avail_port_balance *
                         total_steem) // (total_steem + total_vesting_steem)
    vest_conversion_factor = (denom * total_port_vesting) // total_vests
    steem_conversion_factor = (denom * total_port_liquid) // total_steem

    if not silent:
        print("total_vests:", total_vests)
        print("total_steem:", total_steem)
        print("total_vesting_steem:", total_vesting_steem)
        print("total_port_balance:", total_port_balance)
        print("total_port_vesting:", total_port_vesting)
        print("total_port_liquid:", total_port_liquid)
        print("vest_conversion_factor:", vest_conversion_factor)
        print("steem_conversion_factor:", steem_conversion_factor)

    porter = conf["accounts"]["porter"]["name"]
    tnman = conf["accounts"]["manager"]["name"]

    yield {
        "operations": [{
            "type": "transfer_operation",
            "value": {
                "from": "initminer",
                "to": porter,
                "amount": conf["total_port_balance"],
                "memo": "Fund porting balances",
            }
        }],
        "wif_sigs": [keydb.get_privkey("initminer")]
    }

    porter_wif = keydb.get_privkey("porter")

    create_auth = {
        "account_auths": [["porter", 1]],
        "key_auths": [],
        "weight_threshold": 1
    }

    snapshot_file.seek(0)
    accounts_created = 0
    for a in ijson.items(snapshot_file, "accounts.item"):
        if a["name"] in system_account_names:
            continue

        vesting_amount = (satoshis(a["vesting_shares"]) *
                          vest_conversion_factor) // denom
        transfer_amount = (satoshis(a["balance"]) *
                           steem_conversion_factor) // denom
        name = a["name"]

        ops = [{
            "type": "account_create_operation",
            "value": {
                "fee": amount(max(vesting_amount, min_vesting_per_account)),
                "creator": porter,
                "new_account_name": name,
                "owner": create_auth,
                "active": create_auth,
                "posting": create_auth,
                "memo_key": "TST" + a["memo_key"][3:],
                "json_metadata": "",
            }
        }]
        if transfer_amount > 0:
            ops.append({
                "type": "transfer_operation",
                "value": {
                    "from": porter,
                    "to": name,
                    "amount": amount(transfer_amount),
                    "memo": "Ported balance",
                }
            })

        accounts_created += 1
        if not silent:
            if accounts_created % 100000 == 0:
                print("Accounts created:", accounts_created)
                print(
                    "\t", '%.2f%% complete' %
                    (accounts_created / num_accounts * 100.0))

        yield {"operations": ops, "wif_sigs": [porter_wif]}

    if not silent:
        print("Accounts created:", accounts_created)
        print("\t100.00%% complete")

    snapshot_file.seek(0)
    accounts_updated = 0
    for a in ijson.items(snapshot_file, "accounts.item"):
        if a["name"] in system_account_names:
            continue

        cur_owner_auth = a["owner"]
        new_owner_auth = cur_owner_auth.copy()
        cur_active_auth = a["active"]
        new_active_auth = cur_active_auth.copy()
        cur_posting_auth = a["posting"]
        new_posting_auth = cur_posting_auth.copy()

        # filter to only include existing accounts
        for aw in cur_owner_auth["account_auths"]:
            if (aw[0] not in account_names) or (aw[0] in system_account_names):
                new_owner_auth["account_auths"].remove(aw)
        for aw in cur_active_auth["account_auths"]:
            if (aw[0] not in account_names) or (aw[0] in system_account_names):
                new_active_auth["account_auths"].remove(aw)
        for aw in cur_posting_auth["account_auths"]:
            if (aw[0] not in account_names) or (aw[0] in system_account_names):
                new_posting_auth["account_auths"].remove(aw)

        # add tnman to account_auths
        new_owner_auth["account_auths"].append(
            [tnman, cur_owner_auth["weight_threshold"]])
        new_active_auth["account_auths"].append(
            [tnman, cur_active_auth["weight_threshold"]])
        new_posting_auth["account_auths"].append(
            [tnman, cur_posting_auth["weight_threshold"]])

        # substitute prefix for key_auths
        new_owner_auth["key_auths"] = [["TST" + k[3:], w]
                                       for k, w in new_owner_auth["key_auths"]]
        new_active_auth["key_auths"] = [
            ["TST" + k[3:], w] for k, w in new_active_auth["key_auths"]
        ]
        new_posting_auth["key_auths"] = [
            ["TST" + k[3:], w] for k, w in new_posting_auth["key_auths"]
        ]

        ops = [{
            "type": "account_update_operation",
            "value": {
                "account": a["name"],
                "owner": new_owner_auth,
                "active": new_active_auth,
                "posting": new_posting_auth,
                "memo_key": "TST" + a["memo_key"][3:],
                "json_metadata": a["json_metadata"],
            }
        }]

        accounts_updated += 1
        if not silent:
            if accounts_updated % 100000 == 0:
                print("Accounts updated:", accounts_updated)
                print(
                    "\t", '%.2f%% complete' %
                    (accounts_updated / num_accounts * 100.0))

        yield {"operations": ops, "wif_sigs": [porter_wif]}

    if not silent:
        print("Accounts updated:", accounts_updated)
        print("\t100.00%% complete")

    snapshot_file.close()
    return

Пример #33

0

Показать файл

Файл: streaming_converter.py Проект: thebinarybot/parsers

# edgeUUID = set()

if input_format == 'avro':
	raise NotImplementedError('CDM avro format is not supported as of 01-04-09.')
elif input_format == 'json':
	files = os.listdir(input_source)	# all the dataset files needede to be parsed together

# Start processing CDM records
for data_file in files:
	with tf.open(os.path.join(input_source, data_file), 'r:gz') as f:
		names = f.getnames()
		sorted_files = sorted(names, key=lambda item: (int(item.split('.')[-1]) if item[-1].isdigit() else int(0), item))

		for sorted_file in sorted_files:
			file_obj = f.extractfile(f.getmember(sorted_file))
			parser = ijson.common.items(ijson.parse(file_obj, multiple_values=True), '')
			for cdm_record in parser:
				if input_format == 'avro':
					raise ValueError('This is a streaming JSON parser implementation.')
				elif input_format == 'json':
					# cdm_record = json.loads(line.strip())
					cdm_record_type = cdm_record['datum'].keys()[0]
					cdm_record_value = cdm_record['datum'][cdm_record_type]
				
				if cdm_record_type == CDM_TYPE_SRCSINK:
					uuid = cdm_record_value['uuid']
					values = process_cdm_srcsink(cdm_record_value, input_format, next_id)

					if uuid in nodes:
						logging.debug('CDM_TYPE_SRCSINK: UUID is not unique. UUID: ' + repr(uuid))
					nodes[uuid] = values

Пример #34

0

Показать файл

def build_actions(conf, silent=True):
    keydb = prockey.ProceduralKeyDatabase()
    account_stats_start = datetime.datetime.utcnow()
    account_stats = get_account_stats(conf, silent)
    account_stats_elapsed = datetime.datetime.utcnow() - account_stats_start
    account_names = account_stats["account_names"]
    num_accounts = len(account_names)
    transactions_per_block = conf["transactions_per_block"]
    crea_block_interval = conf.get("crea_block_interval", CREA_BLOCK_INTERVAL)
    transaction_witness_setup_pad = conf.get("transaction_witness_setup_pad",
                                             TRANSACTION_WITNESS_SETUP_PAD)

    genesis_time = datetime.datetime.utcfromtimestamp(CREA_GENESIS_TIMESTAMP)

    # Three transactions per account (create, trasnfer_to_vesting, and update).
    predicted_transaction_count = num_accounts * 3

    # The predicted number of blocks for accounts.
    predicted_block_count = predicted_transaction_count // transactions_per_block

    # The number of seconds required to setup transactions is a multiple of
    # the initial time it takes to do the get_account_stats() call.
    predicted_transaction_setup_seconds = (account_stats_elapsed.seconds * 2)

    # Pad for update witnesses, vote witnesses, clear rounds, and transaction
    # setup processing time
    predicted_block_count += transaction_witness_setup_pad + (
        predicted_transaction_setup_seconds // crea_block_interval)

    now = datetime.datetime.utcnow()
    start_time = now - datetime.timedelta(seconds=predicted_block_count *
                                          crea_block_interval)
    miss_blocks = int(
        (start_time - genesis_time).total_seconds()) // crea_block_interval
    miss_blocks = max(miss_blocks - 1, 0)
    origin_api = None
    snapshot_head_block_num = None
    snapshot_semver = None
    has_backfill = False

    metadata = {
        "txgen:semver": __version__,
        "txgen:transactions_per_block": transactions_per_block,
        "epoch:created": str(now),
        "actions:count": predicted_transaction_count,
        "recommend:miss_blocks": miss_blocks
    }

    with open(conf["snapshot_file"], "rb") as f:
        for prefix, event, value in ijson.parse(f):
            if prefix == "metadata.snapshot:origin_api":
                metadata["snapshot:origin_api"] = value
            if prefix == "metadata.snapshot:semver":
                metadata["snapshot:semver"] = value
            if prefix == "dynamic_global_properties.head_block_number":
                metadata["snapshot:head_block_num"] = value

            if not prefix == '' and not prefix.startswith(
                    "metadata") and not prefix.startswith(
                        "dynamic_global_properties"):
                break

    semver = metadata.get("snapshot:semver", '0.0')
    major_version, minor_version = semver.split('.')
    major_version = int(major_version)
    minor_version = int(minor_version)
    backfill_file = conf.get("backfill_file", None)

    if major_version == SNAPSHOT_MAJOR_VERSION_SUPPORTED:
        if not silent:
            print("metadata:", metadata)
    else:
        raise RuntimeError("Unsupported snapshot:", metadata)

    if minor_version < SNAPSHOT_MINOR_VERSION_SUPPORTED:
        print("WARNING: Older snapshot encountered.", file=sys.stderr)

    if backfill_file and os.path.exists(backfill_file) and os.path.isfile(
            backfill_file):
        with open(backfill_file, "r") as f:
            num_lines = sum(1 for line in f)

        if num_lines > 0:
            metadata["backfill_actions:count"] = num_lines
            metadata["actions:count"] += num_lines
            miss_blocks -= max(num_lines // transactions_per_block,
                               CREA_BLOCKS_PER_DAY * 30)
            metadata["recommend:miss_blocks"] = miss_blocks
            has_backfill = True

    yield ["metadata", metadata]
    yield ["wait_blocks", {"count": 1, "miss_blocks": miss_blocks}]
    yield ["submit_transaction", {"tx": build_initminer_tx(conf, keydb)}]
    for b in util.batch(
            build_setup_transactions(account_stats, conf, keydb, silent),
            transactions_per_block):
        for tx in b:
            yield ["submit_transaction", {"tx": tx}]

    if has_backfill:
        with open(backfill_file, "r") as f:
            for line in f:
                yield json.loads(line)

        yield ["metadata", {"post_backfill": True}]

    for tx in update_witnesses(conf, keydb, "init"):
        yield ["submit_transaction", {"tx": tx}]
    for tx in vote_accounts(conf, keydb, "elector", "init"):
        yield ["submit_transaction", {"tx": tx}]

    yield [
        "wait_blocks", {
            "count":
            conf.get("num_blocks_to_clear_witness_round",
                     NUM_BLOCKS_TO_CLEAR_WITNESS_ROUND)
        }
    ]
    return

Пример #35

0

Показать файл

#! /usr/bin/env python2.7

import sys

import ijson.backends.yajl2_cffi as ijson

p_map = {}
p_count = 0
e_count = 0

with open(sys.argv[1]) as jfile:
    parser = ijson.common.items(ijson.parse(jfile, multiple_values=True), '')
    for evt in parser:
        if evt['subjprocuuid'] not in p_map:
            p_map[evt['subjprocuuid']] = True
            p_count += 1
        if evt['event'] == "audit:event:aue_execve:":
            if p_map[evt['subjprocuuid']]:
                p_map[evt['subjprocuuid']] = False
            else:
                p_count += 1
        elif evt['event'] in ["audit:event:aue_fork:", "audit:event:aue_vfork:"]:
            if evt['ret_objuuid1'] not in p_map:
                p_map[evt['ret_objuuid1']] = True
                p_count += 1
        e_count += 1

print("{} Events Processed".format(e_count))
print("{} Process Nodes Observed".format(p_count))
print("{} Unique UUIDs Observed".format(len(p_map)))

Пример #36

0

Показать файл

Файл: dcijson_events.py Проект: gwierzchowski/datconv

    def Iterate(self, inpath, outpath=None, rfrom=1, rto=0):
        if self._backend == 'yajl2_cffi':
            import ijson.backends.yajl2_cffi as ijson
        elif self._backend == 'yajl2':
            import ijson.backends.yajl2 as ijson
        else:
            import ijson

        _recno = 1
        _lp_rec = 0
        if self._mode in [1, 2]:
            _unique = set()
        elif self._mode == 3:
            _unique = None
        else:
            Log.error(
                'Invalid value of key mode (=%d); allowed values [1,2,3]' %
                self._mode)
            return
        if self._lp_step > 0 and Log.isEnabledFor(logging.INFO):
            _lp_rec = self._lp_step

        try:
            header = [{'_tag_': 'ijson_events', '_bra_': True}]
            if self._flt is not None:
                if hasattr(self._flt, 'setHeader'):
                    self._flt.setHeader(header)
            self._wri.writeHeader(header)

            with open(inpath, 'r') as fd:
                parser = ijson.parse(fd)
                for prefix, event, value in parser:
                    if rto > 0 and _recno > rto:
                        raise ToLimitBreak
                    if prefix in [
                            'item', ''
                    ] and not event in ['start_array', 'start_map', 'map_key']:
                        _recno = _recno + 1
                        if _recno == _lp_rec:
                            Log.info('Processed %d records' % _recno)
                            _lp_rec = _lp_rec + self._lp_step
                    if _recno < rfrom:
                        return

                    if self._mode == 1:
                        if prefix in _unique:
                            continue
                        _unique.add(prefix)
                        rec = etree.Element(self._rec_tag)
                        p_xml = etree.SubElement(rec, 'prefix')
                        p_xml.text = str(prefix)
                    elif self._mode == 2:
                        if (prefix, event) in _unique:
                            continue
                        _unique.add((prefix, event))
                        rec = etree.Element(self._rec_tag)
                        p_xml = etree.SubElement(rec, 'prefix')
                        p_xml.text = str(prefix)
                        e_xml = etree.SubElement(rec, 'event')
                        e_xml.text = str(event)
                    elif self._mode == 3:
                        rec = etree.Element(self._rec_tag)
                        p_xml = etree.SubElement(rec, 'prefix')
                        p_xml.text = str(prefix)
                        e_xml = etree.SubElement(rec, 'event')
                        e_xml.text = str(event)
                        v_xml = etree.SubElement(rec, 'value')
                        v_xml.text = str(value)

                    if self._flt is not None:
                        while True:
                            # OBLIGATORY
                            res = self._flt.filterRecord(rec)
                            if res & WRITE:
                                yield self._wri.writeRecord(rec)
                            if res & REPEAT:
                                continue
                            if res & BREAK:
                                Log.info(
                                    'Filter caused Process to stop on record %d'
                                    % _recno)
                                raise FilterBreak
                            break
                    else:
                        # OBLIGATORY
                        yield self._wri.writeRecord(rec)
        except FilterBreak:
            pass
        except ToLimitBreak:
            pass
        finally:
            # OBLIGATORY
            footer = []
            if self._flt is not None:
                if hasattr(self._flt, 'setFooter'):
                    self._flt.setFooter(footer)
            self._wri.writeFooter(footer)

Пример #37

0

Показать файл

Файл: dcijson_events.py Проект: gwierzchowski/datconv

    def Process(self, inpath, outpath=None, rfrom=1, rto=0):
        """Parameters are usually passed from YAML file as subkeys of ``Reader:PArg`` key.
        
        :param inpath: Path to input file.
        :param outpath: Path to output file passed to Writer (fall-back if output connector is not defined).
        :param rfrom-rto: specifies scope of records to be processed.
        
        For more detailed descriptions see :ref:`readers_conf_template`.
        """
        if self._backend == 'yajl2_cffi':
            import ijson.backends.yajl2_cffi as ijson
        elif self._backend == 'yajl2':
            import ijson.backends.yajl2 as ijson
        else:
            import ijson

        _recno = 1
        _lp_rec = 0
        if self._mode in [1, 2]:
            _unique = set()
        elif self._mode == 3:
            _unique = None
        else:
            Log.error(
                'Invalid value of key mode (=%d); allowed values [1,2,3]' %
                self._mode)
            return
        if self._lp_step > 0 and Log.isEnabledFor(logging.INFO):
            _lp_rec = self._lp_step

        try:
            header = [{'_tag_': 'ijson_events', '_bra_': True}]
            if self._flt is not None:
                if hasattr(self._flt, 'setHeader'):
                    self._flt.setHeader(header)
            self._wri.writeHeader(header)

            with open(inpath, 'r') as fd:
                parser = ijson.parse(fd)
                for prefix, event, value in parser:
                    if rto > 0 and _recno > rto:
                        raise ToLimitBreak
                    if prefix in [
                            'item', ''
                    ] and not event in ['start_array', 'start_map', 'map_key']:
                        _recno = _recno + 1
                        if _recno == _lp_rec:
                            Log.info('Processed %d records' % _recno)
                            _lp_rec = _lp_rec + self._lp_step
                    if _recno < rfrom:
                        return

                    if self._mode == 1:
                        if prefix in _unique:
                            continue
                        _unique.add(prefix)
                        rec = etree.Element(self._rec_tag)
                        p_xml = etree.SubElement(rec, 'prefix')
                        p_xml.text = str(prefix)
                    elif self._mode == 2:
                        if (prefix, event) in _unique:
                            continue
                        _unique.add((prefix, event))
                        rec = etree.Element(self._rec_tag)
                        p_xml = etree.SubElement(rec, 'prefix')
                        p_xml.text = str(prefix)
                        e_xml = etree.SubElement(rec, 'event')
                        e_xml.text = str(event)
                    elif self._mode == 3:
                        rec = etree.Element(self._rec_tag)
                        p_xml = etree.SubElement(rec, 'prefix')
                        p_xml.text = str(prefix)
                        e_xml = etree.SubElement(rec, 'event')
                        e_xml.text = str(event)
                        v_xml = etree.SubElement(rec, 'value')
                        v_xml.text = str(value)

                    if self._flt is not None:
                        while True:
                            # OBLIGATORY
                            res = self._flt.filterRecord(rec)
                            if res & WRITE:
                                self._wri.writeRecord(rec)
                            if res & REPEAT:
                                continue
                            if res & BREAK:
                                Log.info(
                                    'Filter caused Process to stop on record %d'
                                    % _recno)
                                raise FilterBreak
                            break
                    else:
                        # OBLIGATORY
                        self._wri.writeRecord(rec)
        except FilterBreak:
            pass
        except ToLimitBreak:
            pass
        finally:
            # OBLIGATORY
            footer = []
            if self._flt is not None:
                if hasattr(self._flt, 'setFooter'):
                    self._flt.setFooter(footer)
            self._wri.writeFooter(footer)

Пример #38

0

Показать файл

Файл: pp.py Проект: thebinarybot/parsers

def cprocess(fileobj, ds, fn, out=None):
    """Iteratively process/scan an file object.

	Arguments:
	fileobj - file object
	ds - a database (for node parsing) or a sanitylog (for scanning)
	fn - file name
	"""
    parser = ijson.common.items(ijson.parse(fileobj, multiple_values=True), '')

    if args.trace == 'camflow':
        if args.scan:
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m scanning file {} in CAMFLOW mode..."
                    .format(fn))
            ptc.sanitycheckcf(parser, ds)

        elif out == None:
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m parsing compressed file {} in CAMFLOW mode..."
                    .format(fn))
            ptj.parsecf(parser, ds, fn)

        else:
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m generating output for file {} in CAMFLOW mode..."
                    .format(fn))
                print(
                    "\x1b[6;30;43m[i]\x1b[0m initiating logging. Check error.log afterwards..."
                )
            ptj.cgencf(parser, ds, out)

    elif args.trace == 'darpa':
        if args.scan:
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m scanning file {} in DARPA mode..."
                    .format(fn))
            ptc.sanitycheckdp(parser, ds)

        elif out == None:
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m parsing compressed file {} in DARPA mode..."
                    .format(fn))
            ptj.parsedp(parser, ds, fn)

        else:
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m generating output for file {} in DARPA mode..."
                    .format(fn))
                print(
                    "\x1b[6;30;43m[i]\x1b[0m initiating logging. Check error.log afterwards..."
                )
            ptj.cgendp(parser, ds, out)

    elif args.trace == 'cadets2' or args.trace == 'fivedirections':
        if args.scan:
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m scanning file {} in CADETS2/FIVEDIRECTIONS mode..."
                    .format(fn))
            ptc.sanitycheckcd(parser, ds)

        elif out == None:
            raise NotImplementedError(
                "no support for processing {} compact files at the moment".
                format(args.trace))

        else:
            if args.verbose:
                print(
                    "\x1b[6;30;42m[+]\x1b[0m generating output for file {} in CADETS2/FIVEDIRECTIONS mode..."
                    .format(fn))
                print(
                    "\x1b[6;30;43m[i]\x1b[0m initiating logging. Check error.log afterwards..."
                )
            ptj.cgencd(parser, ds, out)

    else:
        raise NotImplementedError("cannot run traces from an unknown system")

    fileobj.close()
    return

Пример #39

0

Показать файл

state = ParserState.START
dirs = []
key = None
obj = {}

argp = argparse.ArgumentParser()
argp.add_argument("file", type=argparse.FileType("rb"), help="ncdu export filename")
argp.add_argument("--dirs", choices=["array", "string"], default="string", help="directory name format output to flat file")
argp.add_argument("--verbose", action="store_true", help="enable verbose mode (inc. ijson variant)")
options = argp.parse_args()

if options.verbose:
    sys.stderr.write("ijson module variant: {}\n".format(ijson.__name__))

parser = ijson.parse(options.file)
for prefix, event, value in parser:
    if event == "start_array":
        if state != ParserState.START:
            # started non-header array (directory listing)
            state = ParserState.ARRAY_START
        else:
            # started header, omit this map
            state = ParserState.HEADER
    elif event == "end_array":
        # array means a (sub)directory so it was at least a second entry
        # (first entry is the directory's meta-data)
        state = ParserState.SUBSEQ_MAP
        if dirs:
            dirs.pop()
    elif state == ParserState.ARRAY_START and event == "start_map":

Пример #40

0

Показать файл

def main(argv):
    parser = argparse.ArgumentParser(
        prog=argv[0], description="Generate transactions for Steem testnet")
    parser.add_argument("-i",
                        "--infile",
                        default="",
                        dest="infile",
                        metavar="FILE",
                        help="Specify input snapshot, - means stdin")
    parser.add_argument("-o",
                        "--outfile",
                        default="-",
                        dest="outfile",
                        metavar="FILE",
                        help="Specify output snapshot, - means stdout")
    args = parser.parse_args(argv[1:])

    sample_size = 2000

    if args.infile == "-":
        # We do not have random access, so we must load the whole thing in
        # memory.  And we cannot output messages.

        infile = sys.stdin

        snapshot = json.load(infile, object_pairs_hook=collections.OrderedDict)
        snapshot["witnesses"] = []
        snapshot["accounts"] = heapq.nlargest(
            sample_size,
            snapshot["accounts"],
            key=lambda a: int(a["balance"]["amount"]))
    else:
        # We have random access!

        try:
            import ijson.backends.yajl2_cffi as ijson
            from cffi import FFI
            YAJL2_CFFI_AVAILABLE = True
        except ImportError:
            import ijson
            YAJL2_CFFI_AVAILABLE = False

        if not YAJL2_CFFI_AVAILABLE:
            print(
                "Warning: could not load yajl, falling back to default backend for ijson."
            )

        infile = open(args.infile, "rb")

        account_balances = {}
        snapshot = {
            "dynamic_global_properties": {
                "total_vesting_fund_steem": {}
            },
            "accounts": [],
            "witnesses": []
        }

        fund = snapshot["dynamic_global_properties"][
            "total_vesting_fund_steem"]
        for prefix, event, value in ijson.parse(infile):
            if prefix == "dynamic_global_properties.total_vesting_fund_steem.amount":
                fund["amount"] = value
            elif prefix == "dynamic_global_properties.total_vesting_fund_steem.precision":
                fund["precision"] = value
            elif prefix == "dynamic_global_properties.total_vesting_fund_steem.nai":
                fund["nai"] = value
            if len(fund.keys()) > 2:
                break

        print("Captured:", snapshot["dynamic_global_properties"])

        infile.seek(0)
        for a in ijson.items(infile, "accounts.item"):
            account_balances[a["name"]] = a["balance"]["amount"]

            if len(account_balances) % 100000 == 0:
                print("Balances so far:", len(account_balances))

        top_accounts = heapq.nlargest(sample_size,
                                      account_balances,
                                      key=lambda a: int(account_balances[a]))

        print('Found top accounts:', len(top_accounts))

        infile.seek(0)
        for a in ijson.items(infile, "accounts.item"):
            t = len(top_accounts)
            s = len(snapshot["accounts"])

            if s >= t:
                break

            if a["name"] in top_accounts:
                snapshot["accounts"].append(a)

                if s > 0 and s % 100 == 0:
                    print("Samples created:", s)
                    print("\t", '%.2f%% complete' % (s / t * 100.0))

        infile.close()

    if args.outfile == "-":
        outfile = sys.stdout
    else:
        print("Dumping sample ...")
        outfile = open(args.outfile, "w")
    json.dump(snapshot, outfile, separators=(",", ":"))

    if args.outfile != "-":
        outfile.close()

    return

Пример #41

0

Показать файл

Файл: vidjilparser.py Проект: NikaAb/FaIR

 def extract(self, filepath):
     vidjilfile = open(filepath, 'rb')
     parser = ijson.parse(vidjilfile)
     with self.writer() as writer:
         return self._extract(parser, writer)

Пример #42

0

Показать файл

Файл: vidjilparser.py Проект: NikaAb/FaIR

 def initModel(self, model_path):
     with open(model_path, 'rb') as model:
         parser = ijson.parse(model)
         for prefix, event, value in parser:
             if (prefix, event) not in self._model_prefixes:
                 self._model_prefixes.append((prefix, event))

Пример #43

0

Показать файл

Файл: classify.py Проект: fxfactorial/macholibre

def build_dataframe(good, bad):
    df = pd.DataFrame(columns = features)
    current = 0
    row = {}
    symbol = False
    cmd = None
    segment_name = None
    section_name = None

    print 'Parsing good json'
    for prefix, event, value in ijson.parse(good):
        if prefix.endswith('.macho') or prefix.endswith('.machos.item'):
            if len(row) > 0:
                row['alignment'] = 'good'
                df.loc[current] = build_row(row)
                current += 1
                row = {}
        elif prefix.endswith('.macho.size') or prefix.endswith('.machos.item.size'):
            row['m_size'] = value / 1024.0
        elif prefix.endswith('.strtab.size'):
            row['s_size'] = value / 1024.0
        elif prefix.endswith('.slcs'):
            row['slcs'] = value / 1024.0
        elif prefix.endswith('.signature.size'):
            row['sig_size'] = value / 1024.0
        elif prefix.endswith('.symtab.nsyms'):
            row['nsyms'] = value
        elif prefix.endswith('.nlcs'):
            row['nlcs'] = value
        elif prefix.endswith('.ndylibs'):
            row['ndylibs'] = value
        elif prefix.endswith('.nimps'):
            row['nimports'] = value
        elif prefix.endswith('.entropy'):
            row['entropy'] = value
        elif prefix.endswith('.strtab.strings'):
            row['nstrings'] = 0
        elif prefix.endswith('.strtab.strings.item'):
            row['nstrings'] += 1
        elif prefix.endswith('.macho.flags.item') or prefix.endswith('.machos.item.flags.item'):
            row[value] = 10
        elif prefix.endswith('.filetype'):
            row[value] = 10
        elif prefix.endswith('.lcs.item.cmd'):
            if value == 'SEGMENT' or 'SEGMENT_64':
                if segment_name is None:
                    cmd = value
                else:
                    lc = value + ' (' + segment_name + ')'
                    if lc in load_commands:
                        row[lc] = 10
                    segment_name = None
            else:
                if value in load_commands:
                    row[value] = 10
        elif prefix.endswith('.lcs.item.name'):
            if cmd is None:
                segment_name = value
            else:
                lc = cmd + ' (' + value + ')'
                if lc in load_commands:
                    row[lc] = 10
                cmd = None
        elif prefix.endswith('.sects.item.segname'):
            if section_name is None:
                segment_name = value
            else:
                s = value + ', ' + section_name
                if s in sections:
                    row[s] = 10
                section_name = None
        elif prefix.endswith('.sects.item.name'):
            if segment_name is None:
                section_name = value
            else:
                s = segment_name + ', ' + value
                if s in sections:
                    row[s] = 10
                segment_name = None
        elif prefix.endswith('.imports.item'):
            symbol = True
        elif prefix.endswith('.imports.item.item') and symbol:
            if value in imports:
                row[value] = 10
            symbol = False
        elif prefix.endswith('.imports.item.item') and not symbol:
            if value in dylib_counts:
                if value in row:
                    row[value] += 1
                else:
                    row[value] = 1
        #elif prefix.endswith('.dylibs.item'):
        #    if value in dylibs:
        #        row[value] = 10

    print 'Parsing bad json'
    for prefix, event, value in ijson.parse(bad):
        if prefix.endswith('.macho') or prefix.endswith('.machos.item'):
            if len(row) > 0:
                row['alignment'] = 'bad'
                df.loc[current] = build_row(row)
                current += 1
                row = {}
        elif prefix.endswith('.macho.size') or prefix.endswith('.machos.item.size'):
            row['m_size'] = value / 1024.0
        elif prefix.endswith('.strtab.size'):
            row['s_size'] = value / 1024.0
        elif prefix.endswith('.slcs'):
            row['slcs'] = value / 1024.0
        elif prefix.endswith('.signature.size'):
            row['sig_size'] = value / 1024.0
        elif prefix.endswith('.symtab.nsyms'):
            row['nsyms'] = value
        elif prefix.endswith('.nlcs'):
            row['nlcs'] = value
        elif prefix.endswith('.ndylibs'):
            row['ndylibs'] = value
        elif prefix.endswith('.nimps'):
            row['nimports'] = value
        elif prefix.endswith('.entropy'):
            row['entropy'] = value
        elif prefix.endswith('.strtab.strings'):
            row['nstrings'] = 0
        elif prefix.endswith('.strtab.strings.item'):
            row['nstrings'] += 1
        elif prefix.endswith('.macho.flags.item') or prefix.endswith('.machos.item.flags.item'):
            row[value] = 10
        elif prefix.endswith('.filetype'):
            row[value] = 10
        elif prefix.endswith('.lcs.item.cmd'):
            if value == 'SEGMENT' or 'SEGMENT_64':
                if segment_name is None:
                    cmd = value
                else:
                    lc = value + ' (' + segment_name + ')'
                    if lc in load_commands:
                        row[lc] = 10
                    segment_name = None
            else:
                if value in load_commands:
                    row[value] = 10
        elif prefix.endswith('.lcs.item.name'):
            if cmd is None:
                segment_name = value
            else:
                lc = cmd + ' (' + value + ')'
                if lc in load_commands:
                    row[lc] = 10
                cmd = None
        elif prefix.endswith('.sects.item.segname'):
            if section_name is None:
                segment_name = value
            else:
                s = value + ', ' + section_name
                if s in sections:
                    row[s] = 10
                section_name = None
        elif prefix.endswith('.sects.item.name'):
            if segment_name is None:
                section_name = value
            else:
                s = segment_name + ', ' + value
                if s in sections:
                    row[s] = 10
                segment_name = None
        elif prefix.endswith('.imports.item'):
            symbol = True
        elif prefix.endswith('.imports.item.item') and symbol:
            if value in imports:
                row[value] = 10
            symbol = False
        elif prefix.endswith('.imports.item.item') and not symbol:
            if value in dylib_counts:
                if value in row:
                    row[value] += 1
                else:
                    row[value] = 1
        #elif prefix.endswith('.dylibs.item'):
        #    if value in dylibs:
        #        row[value] = 10

    return df

Python parse примеры использования