Пример #1
0
def test_sketch_from_sequence(sketches_json):
    for sketch_json in sketches_json:
        sketch = Sketch.from_fs_json(sketch_json)
        seq = sketch_to_sequence(sketch)
        sketch2 = sketch_from_sequence(seq)
        seq2 = sketch_to_sequence(sketch)

        assert len(seq) == len(seq2)
        for op1, op2 in zip(seq, seq2):
            assert op1 == op2
Пример #2
0
def test_sketch_from_sequence(sketches_json):
    for sketch_json in sketches_json:
        sketch = Sketch.from_fs_json(sketch_json, include_external_constraints=False)
        seq = sketch_to_sequence(sketch)

        if any(s.label in _UNSUPPORTED_CONSTRAINTS for s in seq):
            # Skip not supported constraints for now
            continue

        sketch2 = sketch_from_sequence(seq)
        seq2 = sketch_to_sequence(sketch2)

        assert len(seq) == len(seq2)
        for op1, op2 in zip(seq, seq2):
            assert op1 == op2
Пример #3
0
def _worker(paths_queue, processed_sketches, max_sketches, sketch_counter):
    num_filtered = 0
    num_invalid = 0

    while max_sketches is None or sketch_counter.value < max_sketches:
        paths = paths_queue.get()

        if paths is None:
            break

        sketches = []

        for path in paths:
            sketch_list = _load_json(path)

            for sketch_json in sketch_list:
                try:
                    sketch = Sketch.from_fs_json(sketch_json)
                except Exception as err:
                    num_invalid += 1
                    print('Error processing sketch in file {0}'.format(path))
                    traceback.print_exception(type(err), err,
                                              err.__traceback__)

                if filter_sketch(sketch):
                    num_filtered += 1
                    continue

                sketches.append(sketch)

        offsets, data = flat_array.raw_list_flat(sketches)

        processed_sketches.put((offsets, data))

        with sketch_counter.get_lock():
            sketch_counter.value += len(sketches)

    processed_sketches.put({
        'num_filtered': num_filtered,
        'num_invalid': num_invalid
    })
Пример #4
0
def load_json_tarball(path):
    """Loads a json tarball as an iterable of sketches.

    Parameters
    ----------
    path : str
        A path to the location of a single shard

    Returns
    -------
    iterable of `Sketch`
        An iterable of `Sketch` representing all the sketches present in the tarball.
    """
    with open(path, 'rb') as base_file:
        dctx = zstd.ZstdDecompressor()
        with dctx.stream_reader(base_file) as tarball:
            with tarfile.open(fileobj=tarball, mode='r|') as directory:
                while True:
                    json_file = directory.next()
                    if json_file is None:
                        break

                    if not json_file.isfile():
                        continue

                    document_id, part_id = parse_sketch_id(json_file.name)
                    data = directory.extractfile(json_file).read()
                    if len(data) == 0:
                        # skip empty files
                        continue

                    try:
                        sketches_json = json.loads(data)
                    except json.JSONDecodeError as exc:
                        raise ValueError(
                            'Error decoding JSON for document {0} part {1}.'.
                            format(document_id, part_id))
                    for i, sketch_json in enumerate(sketches_json):
                        yield (document_id, part_id,
                               i), Sketch.from_fs_json(sketch_json)
Пример #5
0
def sketches(sketches_json):
    """Return a list of sample sketches."""
    return [Sketch.from_fs_json(j) for j in sketches_json]
Пример #6
0
def test_sketch_from_json(sketches_json):
    for sketch_json in sketches_json:
        Sketch.from_fs_json(sketch_json)
Пример #7
0
def test_plot_sketch(sketches_json):
    sketch_json_list = sketches_json[:10]

    for sketch_json in sketch_json_list:
        fig = render_sketch(Sketch.from_fs_json(sketch_json))
        assert fig is not None
Пример #8
0
def test_sequence_from_sketch(sketches_json):

    for sketch_json in sketches_json:
        sketch = Sketch.from_fs_json(sketch_json)
        seq = sketch_to_sequence(sketch)
Пример #9
0
def sketches(sketches_json):
    return [Sketch.from_fs_json(j) for j in sketches_json]