示例#1
0
    def _pandas_to_bucket(self, df, symbol):
        start = to_dt(df.index[0].to_datetime())
        end = to_dt(df.index[0].to_datetime())
        rtn = {START: start, END: end, SYMBOL: symbol}
        rtn[VERSION] = CHUNK_VERSION_NUMBER
        rtn[COUNT] = len(df)
        rtn[COLUMNS] = {}

        logger.warn("NB treating all values as 'exists' - no longer sparse")
        rowmask = Binary(lz4.compressHC(np.packbits(np.ones(len(df), dtype="uint8"))))

        recs = df.to_records(convert_datetime64=False)
        for col in df:
            array = self._ensure_supported_dtypes(recs[col])
            col_data = {}
            col_data[DATA] = Binary(lz4.compressHC(array.tostring()))
            col_data[ROWMASK] = rowmask
            col_data[DTYPE] = self._str_dtype(array.dtype)
            rtn[COLUMNS][col] = col_data
        rtn[INDEX] = Binary(
            lz4.compressHC(
                np.concatenate(
                    (
                        [recs["index"][0].astype("datetime64[ms]").view("uint64")],
                        np.diff(recs["index"].astype("datetime64[ms]").view("uint64")),
                    )
                ).tostring()
            )
        )
        return rtn
示例#2
0
    def _pandas_to_bucket(self, df, symbol):
        start = self._to_dt(df.index[0].to_datetime())
        end = self._to_dt(df.index[0].to_datetime())
        rtn = {START: start, END: end, SYMBOL: symbol}
        rtn[VERSION] = CHUNK_VERSION_NUMBER
        rtn[COUNT] = len(df)
        rtn[COLUMNS] = {}

        logger.warn("NB treating all values as 'exists' - no longer sparse")
        rowmask = Binary(
            lz4.compressHC(np.packbits(np.ones(len(df), dtype='uint8'))))

        recs = df.to_records(convert_datetime64=False)
        for col in df:
            array = self._ensure_supported_dtypes(recs[col])
            col_data = {}
            col_data[DATA] = Binary(lz4.compressHC(array.tostring()))
            col_data[ROWMASK] = rowmask
            col_data[DTYPE] = self._str_dtype(array.dtype)
            rtn[COLUMNS][col] = col_data
        rtn[INDEX] = Binary(
            lz4.compressHC(
                np.concatenate(([
                    recs['index'][0].astype('datetime64[ms]').view('uint64')
                ],
                                np.diff(recs['index'].astype('datetime64[ms]').
                                        view('uint64')))).tostring()))
        return rtn
示例#3
0
    def _pandas_to_bucket(df, symbol, initial_image):
        rtn = {SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(df)}
        end = to_dt(df.index[-1].to_datetime())
        if initial_image :
            if 'index' in initial_image:
                start = min(to_dt(df.index[0].to_datetime()), initial_image['index'])
            else:
                start = to_dt(df.index[0].to_datetime())
            image_start = initial_image.get('index', start)
            image = {k: v for k, v in initial_image.items() if k != 'index'}
            rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image}
            final_image = TickStore._pandas_compute_final_image(df, initial_image, end)
        else:
            start = to_dt(df.index[0].to_datetime())
            final_image = {}
        rtn[END] = end
        rtn[START] = start

        logger.warning("NB treating all values as 'exists' - no longer sparse")
        rowmask = Binary(lz4.compressHC(np.packbits(np.ones(len(df), dtype='uint8'))))

        recs = df.to_records(convert_datetime64=False)
        for col in df:
            array = TickStore._ensure_supported_dtypes(recs[col])
            col_data = {}
            col_data[DATA] = Binary(lz4.compressHC(array.tostring()))
            col_data[ROWMASK] = rowmask
            col_data[DTYPE] = TickStore._str_dtype(array.dtype)
            rtn[COLUMNS][col] = col_data
        rtn[INDEX] = Binary(lz4.compressHC(np.concatenate(([recs['index'][0].astype('datetime64[ms]').view('uint64')],
                                                           np.diff(recs['index'].astype('datetime64[ms]').view('uint64')))
                                                          ).tostring()))
        return rtn, final_image
示例#4
0
def test_write_object():
    arctic_lib = Mock()
    self = create_autospec(PickleStore)
    version = {"_id": ObjectId()}
    PickleStore.write(self, arctic_lib, version, "sentinel.symbol", sentinel.item, sentinel.previous_version)
    assert "data" not in version

    assert version["blob"] == "__chunked__"
    coll = arctic_lib.get_top_level_collection.return_value
    assert coll.update_one.call_args_list == [
        call(
            {
                "sha": checksum(
                    "sentinel.symbol",
                    {"data": Binary(lz4.compressHC(cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL)))},
                ),
                "symbol": "sentinel.symbol",
            },
            {
                "$set": {
                    "segment": 0,
                    "data": Binary(lz4.compressHC(cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL)), 0),
                },
                "$addToSet": {"parent": version["_id"]},
            },
            upsert=True,
        )
    ]
示例#5
0
    def _to_bucket(ticks, symbol, initial_image):
        rtn = {
            SYMBOL: symbol,
            VERSION: CHUNK_VERSION_NUMBER,
            COLUMNS: {},
            COUNT: len(ticks)
        }
        data = {}
        rowmask = {}
        start = to_dt(ticks[0]['index'])
        end = to_dt(ticks[-1]['index'])
        final_image = copy.copy(initial_image) if initial_image else {}
        for i, t in enumerate(ticks):
            if initial_image:
                final_image.update(t)
            for k, v in iteritems(t):
                try:
                    if k != 'index':
                        rowmask[k][i] = 1
                    else:
                        v = TickStore._to_ms(v)
                        if data[k][-1] > v:
                            raise UnorderedDataException(
                                "Timestamps out-of-order: %s > %s" %
                                (ms_to_datetime(data[k][-1]), t))
                    data[k].append(v)
                except KeyError:
                    if k != 'index':
                        rowmask[k] = np.zeros(len(ticks), dtype='uint8')
                        rowmask[k][i] = 1
                    data[k] = [v]

        rowmask = dict([(k, Binary(lz4.compressHC(np.packbits(v).tostring())))
                        for k, v in iteritems(rowmask)])
        for k, v in iteritems(data):
            if k != 'index':
                v = np.array(v)
                v = TickStore._ensure_supported_dtypes(v)
                rtn[COLUMNS][k] = {
                    DATA: Binary(lz4.compressHC(v.tostring())),
                    DTYPE: TickStore._str_dtype(v.dtype),
                    ROWMASK: rowmask[k]
                }

        if initial_image:
            image_start = initial_image.get('index', start)
            if image_start > start:
                raise UnorderedDataException(
                    "Image timestamp is after first tick: %s > %s" %
                    (image_start, start))
            start = min(start, image_start)
            rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image}
        rtn[END] = end
        rtn[START] = start
        rtn[INDEX] = Binary(
            lz4.compressHC(
                np.concatenate(
                    ([data['index'][0]], np.diff(data['index']))).tostring()))
        return rtn, final_image
def test_write_object():
    arctic_lib = Mock()
    self = create_autospec(PickleStore)
    version = {'_id': ObjectId()}
    PickleStore.write(self, arctic_lib, version, 'sentinel.symbol', sentinel.item, sentinel.previous_version)
    assert 'data' not in version

    assert version['blob'] == '__chunked__'
    coll = arctic_lib.get_top_level_collection.return_value
    assert coll.update_one.call_args_list == [call({'sha': checksum('sentinel.symbol',
                                                                    {'data': Binary(lz4.compressHC(cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL)))}), 'symbol': 'sentinel.symbol'},
                                               {'$set': {'segment': 0,
                                                         'data': Binary(lz4.compressHC(cPickle.dumps(sentinel.item, cPickle.HIGHEST_PROTOCOL)), 0)},
                                                         '$addToSet': {'parent': version['_id']}}, upsert=True)]
示例#7
0
文件: tickstore.py 项目: r0k3/arctic
    def _to_bucket(ticks, symbol, initial_image):
        rtn = {SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(ticks)}
        data = {}
        rowmask = {}
        start = to_dt(ticks[0]["index"])
        end = to_dt(ticks[-1]["index"])
        final_image = copy.copy(initial_image) if initial_image else {}
        for i, t in enumerate(ticks):
            if initial_image:
                final_image.update(t)
            for k, v in iteritems(t):
                try:
                    if k != "index":
                        rowmask[k][i] = 1
                    else:
                        v = TickStore._to_ms(v)
                        if data[k][-1] > v:
                            raise UnorderedDataException(
                                "Timestamps out-of-order: %s > %s" % (ms_to_datetime(data[k][-1]), t)
                            )
                    data[k].append(v)
                except KeyError:
                    if k != "index":
                        rowmask[k] = np.zeros(len(ticks), dtype="uint8")
                        rowmask[k][i] = 1
                    data[k] = [v]

        rowmask = dict([(k, Binary(lz4.compressHC(np.packbits(v).tostring()))) for k, v in iteritems(rowmask)])
        for k, v in iteritems(data):
            if k != "index":
                v = np.array(v)
                v = TickStore._ensure_supported_dtypes(v)
                rtn[COLUMNS][k] = {
                    DATA: Binary(lz4.compressHC(v.tostring())),
                    DTYPE: TickStore._str_dtype(v.dtype),
                    ROWMASK: rowmask[k],
                }

        if initial_image:
            image_start = initial_image.get("index", start)
            if image_start > start:
                raise UnorderedDataException("Image timestamp is after first tick: %s > %s" % (image_start, start))
            start = min(start, image_start)
            rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image}
        rtn[END] = end
        rtn[START] = start
        rtn[INDEX] = Binary(lz4.compressHC(np.concatenate(([data["index"][0]], np.diff(data["index"]))).tostring()))
        return rtn, final_image
示例#8
0
def zip_compress(plain, level=9):
	if not USE_LZ4:
		compressed = zlib.compress(plain, level)
		return compressed[2:]
	else:		
		compressed = lz4.compress(plain) if level < 9 else lz4.compressHC(plain)
		return compressed[4:]
示例#9
0
 def dump_cache(self):
     batch = leveldb.WriteBatch()
     for pattern, triple_id_pairs in self.cache.iteritems():
         try:
             pattern_triples = self.leveldb.Get(pattern)
             pattern_triples = lz4.decompress(pattern_triples)
             pattern_triples = pattern_triples.split(
                 MERGING_INDEX_TRIPLE_LINE_DELIMITER)
         except KeyError:
             pattern_triples = []
         logging.info("Merging bin from %d to %d (%d new)." % (
             len(pattern_triples),
             len(pattern_triples) + len(triple_id_pairs),
             len(triple_id_pairs),
         ))
         for triple_id_pair in triple_id_pairs:
             pattern_triples.append(
                 MERGING_INDEX_TRIPLE_ID_DELIMITER.join(triple_id_pair))
         pattern_triples_dump = MERGING_INDEX_TRIPLE_LINE_DELIMITER.join(
             pattern_triples)
         batch.Put(pattern, lz4.compressHC(pattern_triples_dump))
     self.leveldb.Write(batch)
     logging.info("Dump %d bins." % len(self.cache))
     self.cache = {}
     self.cache_size = 0
     gc.collect()
示例#10
0
def test_read_backward_compatibility():
    """Test backwards compatibility with a pickled file that's created with Python 2.7.3,
    Numpy 1.7.1_ahl2 and Pandas 0.14.1
    """
    fname = path.join(path.dirname(__file__), "data", "test-data.pkl")

    # For newer versions; verify that unpickling fails when using cPickle
    if PANDAS_VERSION >= LooseVersion("0.16.1"):
        if sys.version_info[0] >= 3:
            with pytest.raises(UnicodeDecodeError), open(fname) as fh:
                cPickle.load(fh)
        else:
            with pytest.raises(TypeError), open(fname) as fh:
                cPickle.load(fh)

    # Verify that PickleStore() uses a backwards compatible unpickler.
    store = PickleStore()

    with open(fname) as fh:
        # PickleStore compresses data with lz4
        version = {'blob': lz4.compressHC(fh.read())}
    df = store.read(sentinel.arctic_lib, version, sentinel.symbol)

    expected = pd.DataFrame(range(4), pd.date_range(start="20150101",
                                                    periods=4))
    assert (df == expected).all().all()
示例#11
0
    def to_response(self, save=True, enable_debug=False):
        if self.task_error_code != 0:
            if self.response_body_blob is None:
                response_body = self.request_body
                self.response_body = response_body
            else:
                response_body = self.response_body
        else:
            if self.response_body_blob is None:
                response_body = json.dumps({
                    "error_code": self.task_error_code,
                    "error_message": self.task_error_message,
                })
                self.response_body = response_body
            else:
                response_body = self.response_body

        if save:
            log_str = self.log.getvalue()
            self.task_log_blob = lz4.compressHC(log_str)
            self.response_time = datetime.now()
            self.save()

        if enable_debug:
            response_body = json.dumps({
                "log": self.log_body,
                "henry": self.henry_out,
                "parse": self.parse_out,
                "graph": self.dot_out,
                "response": json.loads(response_body),
            }, indent=4)

        return HttpResponse(response_body,
                            content_type="application/json",
                            status=self.response_status)
    def open_db(self):
        self.terms_ldb = leveldb.LevelDB(self.terms_fl)
        self.docs_ldb = leveldb.LevelDB(self.docs_fl)

        self.doc_buffer_size = 0
        self.term_buffer_size = 0

        #self.doc_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype="S%d" % self.max_doc_size)
        self.doc_flush_buffer = [None] * self.max_doc_flush_buffer_size
        self.term_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype="S%d" % self.max_term_size)
        self.doc_id_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype=np.int64)
        self.term_id_flush_buffer = np.empty(self.max_term_flush_buffer_size, dtype=np.int64)

        if self.compression == COMPRESSION.NONE:
            self.compress = lambda string: string
            self.decompress = lambda string: string
        elif self.compression == COMPRESSION.ZLIB:
            import zlib
            self.compress = lambda string: zlib.compress(string, self.compression_level)
            self.decompress = lambda string: zlib.decompress(string)
        elif self.compression == COMPRESSION.LZMA:
            import backports.lzma as lzma
            self.compress = lambda string: lzma.compress(bytearray(string), format=lzma.FORMAT_RAW)
            self.decompress = lambda data: lzma.decompress(data, format=lzma.FORMAT_RAW)
        elif self.compression == COMPRESSION.LZ4R:
            import lz4
            self.compress = lambda string: lz4.compress(string)
            self.decompress = lambda string: lz4.decompress(string)
        elif self.compression == COMPRESSION.LZ4H:
            import lz4
            self.compress = lambda string: lz4.compressHC(string)
            self.decompress = lambda string: lz4.decompress(string)
        else:
            raise Exception("Wrong compression type %r" % self.compression)
示例#13
0
def test_read_backward_compatibility():
    """Test backwards compatibility with a pickled file that's created with Python 2.7.3,
    Numpy 1.7.1_ahl2 and Pandas 0.14.1
    """
    fname = path.join(path.dirname(__file__), "data", "test-data.pkl")

    # For newer versions; verify that unpickling fails when using cPickle
    if PANDAS_VERSION >= LooseVersion("0.16.1"):
        if sys.version_info[0] >= 3:
            with pytest.raises(UnicodeDecodeError), open(fname) as fh:
                pickle.load(fh)
        else:    
            with pytest.raises(TypeError), open(fname) as fh:
                pickle.load(fh)

    # Verify that PickleStore() uses a backwards compatible unpickler.
    store = PickleStore()

    with open(fname) as fh:
        # PickleStore compresses data with lz4
        version = {'blob': lz4.compressHC(fh.read())}
    df = store.read(sentinel.arctic_lib, version, sentinel.symbol)

    expected = pd.DataFrame(range(4), pd.date_range(start="20150101", periods=4))
    assert (df == expected).all().all()
示例#14
0
def test_pickle_chunk_V1_read():
    data = {'foo': b'abcdefghijklmnopqrstuvwxyz'}
    version = {'_id': sentinel._id, 'blob': '__chunked__'}
    coll = Mock()
    arctic_lib = Mock()
    datap = lz4.compressHC(
        cPickle.dumps(data, protocol=cPickle.HIGHEST_PROTOCOL))
    data_1 = datap[0:5]
    data_2 = datap[5:]
    coll.find.return_value = [
        {
            'data': Binary(data_1),
            'symbol': 'sentinel.symbol',
            'segment': 0
        },
        {
            'data': Binary(data_2),
            'symbol': 'sentinel.symbol',
            'segment': 1
        },
    ]
    arctic_lib.get_top_level_collection.return_value = coll

    ps = PickleStore()
    assert (data == ps.read(arctic_lib, version, sentinel.symbol))
示例#15
0
    def write(self, arctic_lib, version, symbol, item, previous_version):
        try:
            # If it's encodeable, then ship it
            b = bson.BSON.encode({'data': item})
            if len(b) < _MAX_BSON_ENCODE:
                version['data'] = item
                return
        except InvalidDocument:
            pass

        # Pickle, chunk and store the data
        collection = arctic_lib.get_top_level_collection()
        # Try to pickle it. This is best effort
        version['blob'] = _MAGIC_CHUNKED
        pickled = lz4.compressHC(
            cPickle.dumps(item, protocol=cPickle.HIGHEST_PROTOCOL))

        for i in xrange(int(len(pickled) / _CHUNK_SIZE + 1)):
            segment = {
                'data': Binary(pickled[i * _CHUNK_SIZE:(i + 1) * _CHUNK_SIZE])
            }
            sha = checksum(symbol, segment)
            segment['segment'] = i
            collection.update_one({
                'symbol': symbol,
                'sha': sha
            }, {
                '$set': segment,
                '$addToSet': {
                    'parent': version['_id']
                }
            },
                                  upsert=True)
示例#16
0
def test_pickle_store_future_version():
    data = {'foo': b'abcdefghijklmnopqrstuvwxyz'}
    version = {'_id': sentinel._id, 'blob': '__chunked__VERSION_ONE_MILLION'}
    coll = Mock()
    arctic_lib = Mock()
    datap = lz4.compressHC(
        cPickle.dumps(data, protocol=cPickle.HIGHEST_PROTOCOL))
    data_1 = datap[0:5]
    data_2 = datap[5:]
    coll.find.return_value = [
        {
            'data': Binary(data_1),
            'symbol': 'sentinel.symbol',
            'segment': 0
        },
        {
            'data': Binary(data_2),
            'symbol': 'sentinel.symbol',
            'segment': 1
        },
    ]
    arctic_lib.get_top_level_collection.return_value = coll

    ps = PickleStore()
    with pytest.raises(UnsupportedPickleStoreVersion) as e:
        ps.read(arctic_lib, version, sentinel.symbol)
    assert ('unsupported version of pickle store' in str(e))
示例#17
0
文件: ciso.py 项目: raehik/procfw
def zip_compress(plain, level=9):
    if not USE_LZ4:
        compressed = zlib.compress(plain, level)
        return compressed[2:]
    else:
        compressed = lz4.compress(plain) if level < 9 else lz4.compressHC(plain)
        return compressed[4:]
示例#18
0
def loadCOCOAndOverSeg(im_set="test", detector="sf", N_SPIX=1000, fold=0):
    from pickle import dumps, loads
    try:
        import lz4, pickle
        decompress = lambda s: pickle.loads(lz4.decompress(s))
        compress = lambda o: lz4.compressHC(pickle.dumps(o))
    except:
        compress = lambda x: x
        decompress = lambda x: x
    from gop import contour, dataset, segmentation
    FILE_NAME = '/tmp/coco_%s_%s_%d_%d.dat' % (im_set, detector, N_SPIX, fold)
    try:
        with open(FILE_NAME, 'rb') as f:
            over_segs, segmentations = loads(f.read())
            f.close()
            over_seg = segmentation.ImageOverSegmentationVec()
            for i in over_segs:
                over_seg.append(decompress(i))
            return over_seg, [decompress(i) for i in segmentations], []
            #return over_segs,segmentations,[]
    except FileNotFoundError:
        pass

    # Load the dataset
    data = dataset.loadCOCO2014(im_set == "train", im_set == "valid", fold)

    # COCO has some pretty gray scale images (WTF!!!)
    images = [
        e['image'] if e['image'].C == 3 else e['image'].tileC(3) for e in data
    ]
    try:
        segmentations = [e['segmentation'] for e in data]
    except:
        segmentations = []

    # Do the over-segmentation
    if detector == 'sf':
        detector = contour.StructuredForest()
        detector.load('../data/sf.dat')
    elif detector == "mssf":
        detector = contour.MultiScaleStructuredForest()
        detector.load("../data/sf.dat")
    elif detector == 'st':
        detector = contour.SketchTokens()
        detector.load('../data/st_full_c.dat')
    else:
        detector = contour.DirectedSobel()

    if detector != None:
        over_segs = segmentation.generateGeodesicKMeans(
            detector, images, N_SPIX)
    with open(FILE_NAME, 'wb') as f:
        #f.write( dumps( (over_segs,segmentations) ) )
        f.write(
            dumps(
                ([compress(i)
                  for i in over_segs], [compress(i) for i in segmentations])))
        f.close()

    return over_segs, segmentations, []
示例#19
0
 def tostring(self):
     return lz4.compressHC(
         json.dumps({
             "u": self.url,
             "t": self.title,
             "c": self.content,
             "i": self.idx_terms,
         }))
示例#20
0
def roundtrip(size=None):
    if size is None:
        size = struct.unpack(">I", b"\0" + os.urandom(3))[0]
    data = os.urandom(size)
    assert rustlz4.decompress(pylz4.compress(data)) == data
    assert pylz4.decompress(buffer(rustlz4.compress(data))) == data
    assert rustlz4.decompress(pylz4.compressHC(data)) == data
    assert pylz4.decompress(buffer(rustlz4.compresshc(data))) == data
示例#21
0
def loadVOCAndOverSeg( im_set="test", detector="sf", N_SPIX=1000, EVAL_DIFFICULT=False, year="2012" ):
	from pickle import dumps,loads
	try:
		import lz4, pickle
		decompress = lambda s: pickle.loads( lz4.decompress( s ) )
		compress = lambda o: lz4.compressHC( pickle.dumps( o ) )
	except:
		compress = lambda x: x
		decompress = lambda x: x
	from gop import contour,dataset,segmentation
	FILE_NAME = '/tmp/%s_%s_%d_%d_%s.dat'%(im_set,detector,N_SPIX,EVAL_DIFFICULT,year)
	try:
		with open(FILE_NAME,'rb') as f:
			over_segs,segmentations,boxes = loads( f.read() )
			f.close()
			over_seg = segmentation.ImageOverSegmentationVec()
			for i in over_segs:
				over_seg.append( decompress(i) )
			return over_seg,[decompress(i) for i in segmentations],[decompress(i) for i in boxes]
	except IOError:
		pass
	
	# Load the dataset
	#data = eval("dataset.loadVOC2012_small")(im_set=="train",im_set=="valid",im_set=="test")
	data = eval("dataset.loadVOC%s"%year)(im_set=="train",im_set=="valid",im_set=="test")

	
	images = [e['image'] for e in data]
	try:
		segmentations = [e['segmentation'] for e in data]
	except:
		segmentations = []
	boxes = [[a['bbox'] for a in e['annotation'] if not a['difficult'] or EVAL_DIFFICULT] for e in data]

	# Do the over-segmentation
	if detector=='sf':
		detector = contour.StructuredForest()
		detector.load( '../data/sf.dat' )
	elif detector == "mssf":
		detector = contour.MultiScaleStructuredForest()
		detector.load( "../data/sf.dat" )
	elif detector=='st':
		detector = contour.SketchTokens()
		detector.load( '../data/st_full_c.dat' )
	else:
		detector = contour.DirectedSobel()
	
	if detector != None:
		over_segs = segmentation.generateGeodesicKMeans( detector, images, N_SPIX )
	#try:
	with open(FILE_NAME,'wb') as f:
		f.write( dumps( ([compress(i) for i in over_segs],[compress(i) for i in segmentations],[compress(i) for i in boxes]) ) )
		f.close()
	#except FileNotFoundError:
		#pass
	
	return over_segs,segmentations,boxes
示例#22
0
 def lz4_compress(packet, level):
     if level >= 9:
         return level | LZ4_FLAG, compressHC(packet)
     #clamp it: 0->17, 1->12, 2->7, 3->2, >=4->1
     if level <= 2:
         #clamp it: 0->17, 1->12, 2->7, 3->2, >=4->1
         accel = max(1, 17 - level * 5)
         return level | LZ4_FLAG, LZ4_compress_fast(packet, accel)
     return level | LZ4_FLAG, LZ4_compress(packet)
    def streamer():
        fin = proto.fin
        opener = repo.sopener

        cachepath = repo.ui.config("remotefilelog", "servercachepath")
        if not cachepath:
            cachepath = os.path.join(repo.path, "remotefilelogcache")

        # everything should be user & group read/writable
        oldumask = os.umask(0o002)
        try:
            while True:
                request = fin.readline()[:-1]
                if not request:
                    break

                node = bin(request[:40])
                if node == nullid:
                    yield '0\n'
                    continue

                path = request[40:]

                filecachepath = os.path.join(cachepath, path, hex(node))
                if not os.path.exists(filecachepath):
                    filectx = repo.filectx(path, fileid=node)
                    if filectx.node() == nullid:
                        repo.changelog = changelog.changelog(repo.sopener)
                        filectx = repo.filectx(path, fileid=node)

                    text = createfileblob(filectx)
                    text = lz4.compressHC(text)

                    dirname = os.path.dirname(filecachepath)
                    if not os.path.exists(dirname):
                        os.makedirs(dirname)
                    try:
                        with open(filecachepath, "w") as f:
                            f.write(text)
                    except IOError:
                        # Don't abort if the user only has permission to read,
                        # and not write.
                        pass
                else:
                    with open(filecachepath, "r") as f:
                        text = f.read()

                yield '%d\n%s' % (len(text), text)

                # it would be better to only flush after processing a whole batch
                # but currently we don't know if there are more requests coming
                proto.fout.flush()
        finally:
            os.umask(oldumask)
示例#24
0
def test_unpickle_highest_protocol():
    """Pandas version 0.14.1 fails to unpickle a pandas.Series() in compat mode if the
    container has been pickled with HIGHEST_PROTOCOL.
    """
    version = {"blob": lz4.compressHC(cPickle.dumps(pd.Series(), protocol=cPickle.HIGHEST_PROTOCOL))}

    store = PickleStore()
    ps = store.read(sentinel.arctic_lib, version, sentinel.symbol)

    expected = pd.Series()
    assert (ps == expected).all()
示例#25
0
    def streamer():
        fin = proto.fin
        opener = repo.sopener

        cachepath = repo.ui.config("remotefilelog", "servercachepath")
        if not cachepath:
            cachepath = os.path.join(repo.path, "remotefilelogcache")

        # everything should be user & group read/writable
        oldumask = os.umask(0o002)
        try:
            while True:
                request = fin.readline()[:-1]
                if not request:
                    break

                node = bin(request[:40])
                if node == nullid:
                    yield '0\n'
                    continue

                path = request[40:]

                filecachepath = os.path.join(cachepath, path, hex(node))
                if not os.path.exists(filecachepath):
                    filectx = repo.filectx(path, fileid=node)
                    if filectx.node() == nullid:
                        repo.changelog = changelog.changelog(repo.sopener)
                        filectx = repo.filectx(path, fileid=node)

                    text = createfileblob(filectx)
                    text = lz4.compressHC(text)

                    dirname = os.path.dirname(filecachepath)
                    if not os.path.exists(dirname):
                        os.makedirs(dirname)
                    try:
                        with open(filecachepath, "w") as f:
                            f.write(text)
                    except IOError:
                        # Don't abort if the user only has permission to read,
                        # and not write.
                        pass
                else:
                    with open(filecachepath, "r") as f:
                        text = f.read()

                yield '%d\n%s' % (len(text), text)

                # it would be better to only flush after processing a whole batch
                # but currently we don't know if there are more requests coming
                proto.fout.flush()
        finally:
            os.umask(oldumask)
示例#26
0
文件: util.py 项目: ILoveFree2/gop
def loadCOCOAndOverSeg( im_set="test", detector="sf", N_SPIX=1000, fold=0 ):
	from pickle import dumps,loads
	try:
		import lz4, pickle
		decompress = lambda s: pickle.loads( lz4.decompress( s ) )
		compress = lambda o: lz4.compressHC( pickle.dumps( o ) )
	except:
		compress = lambda x: x
		decompress = lambda x: x
	from gop import contour,dataset,segmentation
	FILE_NAME = '/tmp/coco_%s_%s_%d_%d.dat'%(im_set,detector,N_SPIX,fold)
	try:
		with open(FILE_NAME,'rb') as f:
			over_segs,segmentations = loads( f.read() )
			f.close()
			over_seg = segmentation.ImageOverSegmentationVec()
			for i in over_segs:
				over_seg.append( decompress(i) )
			return over_seg,[decompress(i) for i in segmentations],[]
			#return over_segs,segmentations,[]
	except FileNotFoundError:
		pass
	
	# Load the dataset
	data = dataset.loadCOCO2014( im_set=="train",im_set=="valid", fold)
	
	# COCO has some pretty gray scale images (WTF!!!)
	images = [e['image'] if e['image'].C==3 else e['image'].tileC(3)  for e in data]
	try:
		segmentations = [e['segmentation'] for e in data]
	except:
		segmentations = []
	
	# Do the over-segmentation
	if detector=='sf':
		detector = contour.StructuredForest()
		detector.load( '../data/sf.dat' )
	elif detector == "mssf":
		detector = contour.MultiScaleStructuredForest()
		detector.load( "../data/sf.dat" )
	elif detector=='st':
		detector = contour.SketchTokens()
		detector.load( '../data/st_full_c.dat' )
	else:
		detector = contour.DirectedSobel()
	
	if detector != None:
		over_segs = segmentation.generateGeodesicKMeans( detector, images, N_SPIX )
	with open(FILE_NAME,'wb') as f:
		#f.write( dumps( (over_segs,segmentations) ) )
		f.write( dumps( ([compress(i) for i in over_segs],[compress(i) for i in segmentations]) ) )
		f.close()
	
	return over_segs,segmentations,[]
示例#27
0
    def _to_bucket(ticks, symbol, initial_image):
        rtn = {SYMBOL: symbol, VERSION: CHUNK_VERSION_NUMBER, COLUMNS: {}, COUNT: len(ticks)}
        data = {}
        rowmask = {}
        start = to_dt(ticks[0]['index'])
        end = to_dt(ticks[-1]['index'])
        final_image = copy.copy(initial_image) if initial_image else {}
        for i, t in enumerate(ticks):
            if initial_image:
                final_image.update(t)
            for k, v in iteritems(t):
                try:
                    if k != 'index':
                        rowmask[k][i] = 1
                    else:
                        v = TickStore._to_ms(v)
                    data[k].append(v)
                except KeyError:
                    if k != 'index':
                        rowmask[k] = np.zeros(len(ticks), dtype='uint8')
                        rowmask[k][i] = 1
                    data[k] = [v]

        rowmask = dict([(k, Binary(lz4.compressHC(np.packbits(v).tostring())))
                        for k, v in iteritems(rowmask)])
        for k, v in iteritems(data):
            if k != 'index':
                v = np.array(v)
                v = TickStore._ensure_supported_dtypes(v)
                rtn[COLUMNS][k] = {DATA: Binary(lz4.compressHC(v.tostring())),
                                   DTYPE: TickStore._str_dtype(v.dtype),
                                   ROWMASK: rowmask[k]}

        if initial_image:
            image_start = initial_image.get('index', start)
            start = min(start, image_start)
            rtn[IMAGE_DOC] = {IMAGE_TIME: image_start, IMAGE: initial_image}
        rtn[END] = end
        rtn[START] =  start
        rtn[INDEX] = Binary(lz4.compressHC(np.concatenate(([data['index'][0]], np.diff(data['index']))).tostring()))
        return rtn, final_image
示例#28
0
    def _to_bucket(self, ticks, symbol):
        data = {}
        rowmask = {}
        start = self._to_dt(ticks[0]['index'])
        end = self._to_dt(ticks[-1]['index'])
        for i, t in enumerate(ticks):
            for k, v in t.iteritems():
                try:
                    if k != 'index':
                        rowmask[k][i] = 1
                    else:
                        v = self._to_ms(v)
                    data[k].append(v)
                except KeyError:
                    if k != 'index':
                        rowmask[k] = np.zeros(len(ticks), dtype='uint8')
                        rowmask[k][i] = 1
                    data[k] = [v]

        rowmask = dict([(k, Binary(lz4.compressHC(np.packbits(v).tostring())))
                        for k, v in rowmask.iteritems()])

        rtn = {START: start, END: end, SYMBOL: symbol}
        rtn[VERSION] = CHUNK_VERSION_NUMBER
        rtn[COUNT] = len(ticks)
        rtn[COLUMNS] = {}
        for k, v in data.iteritems():
            if k != 'index':
                v = np.array(v)
                v = self._ensure_supported_dtypes(v)
                rtn[COLUMNS][k] = {
                    DATA: Binary(lz4.compressHC(v.tostring())),
                    DTYPE: self._str_dtype(v.dtype),
                    ROWMASK: rowmask[k]
                }

        rtn[INDEX] = Binary(
            lz4.compressHC(
                np.concatenate(
                    ([data['index'][0]], np.diff(data['index']))).tostring()))
        return rtn
示例#29
0
def test_read_object_2():
    self = create_autospec(PickleStore)
    version = {"_id": sentinel._id, "blob": "__chunked__"}
    coll = Mock()
    arctic_lib = Mock()
    coll.find.return_value = [{"data": Binary(lz4.compressHC(cPickle.dumps(object))), "symbol": "sentinel.symbol"}]
    arctic_lib.get_top_level_collection.return_value = coll

    assert PickleStore.read(self, arctic_lib, version, sentinel.symbol) == object
    assert coll.find.call_args_list == [
        call({"symbol": sentinel.symbol, "parent": sentinel._id}, sort=[("segment", 1)])
    ]
示例#30
0
def test_write_object():
    arctic_lib = Mock()
    self = create_autospec(PickleStore)
    version = {'_id': ObjectId()}
    PickleStore.write(self, arctic_lib, version, 'sentinel.symbol',
                      sentinel.item, sentinel.previous_version)
    assert 'data' not in version

    assert version['blob'] == '__chunked__'
    coll = arctic_lib.get_top_level_collection.return_value
    assert coll.update_one.call_args_list == [
        call(
            {
                'sha':
                checksum(
                    'sentinel.symbol', {
                        'data':
                        Binary(
                            lz4.compressHC(
                                cPickle.dumps(sentinel.item,
                                              cPickle.HIGHEST_PROTOCOL)))
                    }),
                'symbol':
                'sentinel.symbol'
            }, {
                '$set': {
                    'segment':
                    0,
                    'data':
                    Binary(
                        lz4.compressHC(
                            cPickle.dumps(sentinel.item,
                                          cPickle.HIGHEST_PROTOCOL)), 0)
                },
                '$addToSet': {
                    'parent': version['_id']
                }
            },
            upsert=True)
    ]
示例#31
0
def test_read_object_2():
    self = create_autospec(PickleStore)
    version = {'_id': sentinel._id,
               'blob': '__chunked__'}
    coll = Mock()
    arctic_lib = Mock()
    coll.find.return_value = [{'data': Binary(lz4.compressHC(cPickle.dumps(object))),
                               'symbol': 'sentinel.symbol'}
                              ]
    arctic_lib.get_top_level_collection.return_value = coll

    assert PickleStore.read(self, arctic_lib, version, sentinel.symbol) == object
    assert coll.find.call_args_list == [call({'symbol': sentinel.symbol, 'parent': sentinel._id}, sort=[('segment', 1)])]
示例#32
0
    def _to_bucket(self, ticks, symbol):
        data = {}
        rowmask = {}
        start = to_dt(ticks[0]["index"])
        end = to_dt(ticks[-1]["index"])
        for i, t in enumerate(ticks):
            for k, v in t.iteritems():
                try:
                    if k != "index":
                        rowmask[k][i] = 1
                    else:
                        v = self._to_ms(v)
                    data[k].append(v)
                except KeyError:
                    if k != "index":
                        rowmask[k] = np.zeros(len(ticks), dtype="uint8")
                        rowmask[k][i] = 1
                    data[k] = [v]

        rowmask = dict([(k, Binary(lz4.compressHC(np.packbits(v).tostring()))) for k, v in rowmask.iteritems()])

        rtn = {START: start, END: end, SYMBOL: symbol}
        rtn[VERSION] = CHUNK_VERSION_NUMBER
        rtn[COUNT] = len(ticks)
        rtn[COLUMNS] = {}
        for k, v in data.iteritems():
            if k != "index":
                v = np.array(v)
                v = self._ensure_supported_dtypes(v)
                rtn[COLUMNS][k] = {
                    DATA: Binary(lz4.compressHC(v.tostring())),
                    DTYPE: self._str_dtype(v.dtype),
                    ROWMASK: rowmask[k],
                }

        rtn[INDEX] = Binary(lz4.compressHC(np.concatenate(([data["index"][0]], np.diff(data["index"]))).tostring()))
        return rtn
示例#33
0
def test_unpickle_highest_protocol():
    """Pandas version 0.14.1 fails to unpickle a pandas.Series() in compat mode if the
    container has been pickled with HIGHEST_PROTOCOL.
    """
    version = {
        'blob':
        lz4.compressHC(
            cPickle.dumps(pd.Series(), protocol=cPickle.HIGHEST_PROTOCOL)),
    }

    store = PickleStore()
    ps = store.read(sentinel.arctic_lib, version, sentinel.symbol)

    expected = pd.Series()
    assert (ps == expected).all()
示例#34
0
def compressGr(dat, version) :
    if ord(dat[1]) < version :
        dat = dat[0] + chr(version) + dat[2:]
    datc = lz4.compressHC(dat[:-4])[4:]  # strip initial length and last 4 bytes
    # now find the final tuple
    end = len(datc)
    start = 0
    curr = lz4tuple(start)
    while curr.end < end :
        start = curr.end
        curr = parseTuple(datc, start, end)
    if curr.end > end :
        print "Sync error: %s" % (curr)
    newend = write_literal(curr.literal_len + 4, 4) + datc[curr.literal:curr.literal+curr.literal_len+1] + dat[-4:]
    lz4hdr = struct.pack(">L", (1 << 27) + (len(dat) & 0x7FFFFFF))
    return dat[0:4] + lz4hdr + datc[0:curr.start] + newend
示例#35
0
def test_performance_sequential(n, length):
    _str = random_string(length)
    _strarr = [_str for _ in range(n)]
    now = dt.now()
    [c.decompress(y) for y in [c.compressHC(x) for x in _strarr]]
    clz4_time = (dt.now() - now).total_seconds()
    now = dt.now()
    c.decompressarr(c.compressarrHC(_strarr))
    clz4_time_p = (dt.now() - now).total_seconds()
    now = dt.now()
    [lz4.decompress(y) for y in [lz4.compressHC(x) for x in _strarr]]
    lz4_time = (dt.now() - now).total_seconds()
    print()
    print("LZ4 Test %sx len:%s" % (n, length))
    print("    Cython LZ4 %s s" % clz4_time)
    print("    Cython LZ4 Parallel %s s" % clz4_time_p)
    print("    LZ4 %s s" % lz4_time)
示例#36
0
def incominghook(ui, repo, node, source, url, **kwargs):
    """Server hook that produces the shallow file blobs immediately after
    a commit, in anticipation of them being requested soon.
    """
    cachepath = repo.ui.config("remotefilelog", "servercachepath")
    if not cachepath:
        cachepath = os.path.join(repo.path, "remotefilelogcache")

    heads = repo.revs("heads(%s::)" % node)

    # everything should be user & group read/writable
    oldumask = os.umask(0o002)
    try:
        count = 0
        for head in heads:
            mf = repo[head].manifest()
            for filename, filenode in mf.iteritems():
                filecachepath = os.path.join(cachepath, filename,
                                             hex(filenode))
                if os.path.exists(filecachepath):
                    continue

                # This can be a bit slow. Don't block the commit returning
                # for large commits.
                if count > 500:
                    break
                count += 1

                filectx = repo.filectx(filename, fileid=filenode)

                text = createfileblob(filectx)
                text = lz4.compressHC(text)

                dirname = os.path.dirname(filecachepath)
                if not os.path.exists(dirname):
                    os.makedirs(dirname)
                f = open(filecachepath, "w")
                try:
                    f.write(text)
                finally:
                    f.close()
    finally:
        os.umask(oldumask)
示例#37
0
def compress_array(str_list):
    """
    Compress an array of strings

    By default LZ4 mode is standard in interactive mode,
    and high compresion in applications/scripts
    """
    if _should_use_lz4hc():
        # Less than 5 chunks its quicker to compress sequentially..
        if len(str_list) > LZ4HC_N_PARALLEL:
            return clz4.compressarrHC(str_list)
        else:
            return [clz4.compressHC(s) for s in str_list]
    else:
        # Less than 50 chunks its quicker to compress sequentially..
        if len(str_list) > LZ4_N_PARALLEL:
            return clz4.compressarr(str_list)
        else:
            return [clz4.compress(s) for s in str_list]
def incominghook(ui, repo, node, source, url, **kwargs):
    """Server hook that produces the shallow file blobs immediately after
    a commit, in anticipation of them being requested soon.
    """
    cachepath = repo.ui.config("remotefilelog", "servercachepath")
    if not cachepath:
        cachepath = os.path.join(repo.path, "remotefilelogcache")

    heads = repo.revs("heads(%s::)" % node)

    # everything should be user & group read/writable
    oldumask = os.umask(0o002)
    try:
        count = 0
        for head in heads:
            mf = repo[head].manifest()
            for filename, filenode in mf.iteritems():
                filecachepath = os.path.join(cachepath, filename, hex(filenode))
                if os.path.exists(filecachepath):
                    continue

                # This can be a bit slow. Don't block the commit returning
                # for large commits.
                if count > 500:
                    break
                count += 1

                filectx = repo.filectx(filename, fileid=filenode)

                text = createfileblob(filectx)
                text = lz4.compressHC(text)

                dirname = os.path.dirname(filecachepath)
                if not os.path.exists(dirname):
                    os.makedirs(dirname)
                f = open(filecachepath, "w")
                try:
                    f.write(text)
                finally:
                    f.close()
    finally:
        os.umask(oldumask)
    def flush(self, keep_closed=False):
        buffered_bytes = self.buffer.getvalue()
        self.buffer.truncate(0)

        if len(buffered_bytes) == 0:
            # we don't want to write an empty file because decompressing it does weird things
            if keep_closed:
                name = self.outputstream.name
                self.outputstream.close()
                os.unlink(name)
                return
            else:
                # if we're still writing, this can be a noop
                return

        compressed_bytes = lz4.compressHC(buffered_bytes)

        self.outputstream.write(compressed_bytes)
        self.outputstream.close()
        if not keep_closed:
            self.outputstream = open(self._get_next_file(), 'w')
示例#40
0
    def open_db(self):
        self.terms_ldb = leveldb.LevelDB(self.terms_fl)
        self.docs_ldb = leveldb.LevelDB(self.docs_fl)

        self.doc_buffer_size = 0
        self.term_buffer_size = 0

        #self.doc_flush_buffer = np.empty(self.max_doc_flush_buffer_size, dtype="S%d" % self.max_doc_size)
        self.doc_flush_buffer = [None] * self.max_doc_flush_buffer_size
        self.term_flush_buffer = np.empty(self.max_term_flush_buffer_size,
                                          dtype="S%d" % self.max_term_size)
        self.doc_id_flush_buffer = np.empty(self.max_doc_flush_buffer_size,
                                            dtype=np.int64)
        self.term_id_flush_buffer = np.empty(self.max_term_flush_buffer_size,
                                             dtype=np.int64)

        if self.compression == COMPRESSION.NONE:
            self.compress = lambda string: string
            self.decompress = lambda string: string
        elif self.compression == COMPRESSION.ZLIB:
            import zlib
            self.compress = lambda string: zlib.compress(
                string, self.compression_level)
            self.decompress = lambda string: zlib.decompress(string)
        elif self.compression == COMPRESSION.LZMA:
            import backports.lzma as lzma
            self.compress = lambda string: lzma.compress(
                bytearray(string), format=lzma.FORMAT_RAW)
            self.decompress = lambda data: lzma.decompress(
                data, format=lzma.FORMAT_RAW)
        elif self.compression == COMPRESSION.LZ4R:
            import lz4
            self.compress = lambda string: lz4.compress(string)
            self.decompress = lambda string: lz4.decompress(string)
        elif self.compression == COMPRESSION.LZ4H:
            import lz4
            self.compress = lambda string: lz4.compressHC(string)
            self.decompress = lambda string: lz4.decompress(string)
        else:
            raise Exception("Wrong compression type %r" % self.compression)
示例#41
0
def test_read_object_2():
    self = create_autospec(PickleStore)
    version = {'_id': sentinel._id, 'blob': '__chunked__'}
    coll = Mock()
    arctic_lib = Mock()
    coll.find.return_value = [{
        'data':
        Binary(lz4.compressHC(cPickle.dumps(object))),
        'symbol':
        'sentinel.symbol'
    }]
    arctic_lib.get_top_level_collection.return_value = coll

    assert PickleStore.read(self, arctic_lib, version,
                            sentinel.symbol) == object
    assert coll.find.call_args_list == [
        call({
            'symbol': sentinel.symbol,
            'parent': sentinel._id
        },
             sort=[('segment', 1)])
    ]
示例#42
0
    def write(self, arctic_lib, version, symbol, item, previous_version):
        try:
            # If it's encodeable, then ship it
            bson.BSON.encode({'data': item})
            version['data'] = item
            return
        except InvalidDocument:
            pass

        # Pickle, chunk and store the data
        collection = arctic_lib.get_top_level_collection()
        # Try to pickle it. This is best effort
        version['blob'] = _MAGIC_CHUNKED
        pickled = lz4.compressHC(cPickle.dumps(item, protocol=cPickle.HIGHEST_PROTOCOL))

        for i in xrange(len(pickled) / _CHUNK_SIZE + 1):
            segment = {'data': Binary(pickled[i * _CHUNK_SIZE : (i + 1) * _CHUNK_SIZE])}
            sha = checksum(symbol, segment)
            segment['segment'] = i
            collection.update_one({'symbol': symbol, 'sha': sha}, {'$set': segment,
                                                               '$addToSet': {'parent': version['_id']}},
                                       upsert=True)
示例#43
0
 def dump_cache(self):
     batch = leveldb.WriteBatch()
     for pattern, triple_id_pairs in self.cache.iteritems():
         try:
             pattern_triples = self.leveldb.Get(pattern)
             pattern_triples = lz4.decompress(pattern_triples)
             pattern_triples = pattern_triples.split(MERGING_INDEX_TRIPLE_LINE_DELIMITER)
         except KeyError:
             pattern_triples = []
         logging.info("Merging bin from %d to %d (%d new)." % (
             len(pattern_triples),
             len(pattern_triples) + len(triple_id_pairs),
             len(triple_id_pairs),
         ))
         for triple_id_pair in triple_id_pairs:
             pattern_triples.append(MERGING_INDEX_TRIPLE_ID_DELIMITER.join(triple_id_pair))
         pattern_triples_dump = MERGING_INDEX_TRIPLE_LINE_DELIMITER.join(pattern_triples)
         batch.Put(pattern, lz4.compressHC(pattern_triples_dump))
     self.leveldb.Write(batch)
     logging.info("Dump %d bins." % len(self.cache))
     self.cache = {}
     self.cache_size = 0
     gc.collect()
示例#44
0
def test_read_object_backwards_compat():
    self = create_autospec(PickleStore)
    version = {'blob': Binary(lz4.compressHC(cPickle.dumps(object)))}
    assert PickleStore.read(self, sentinel.arctic_lib, version,
                            sentinel.symbol) == object
示例#45
0
 def encode_posting_list(plist):
     return lz4.compressHC(numencode.encode_plist(plist))
示例#46
0
 def update_posting_list(old_plist_blob, new_plist):
     plist_blob = lz4.decompress(old_plist_blob)
     updated_plist = numencode.update_plist(plist_blob, new_plist)
     return lz4.compressHC(updated_plist)
示例#47
0
 def lz4_compress(packet, level):
     if level>=9:
         return level | LZ4_FLAG, compressHC(packet)
     return level | LZ4_FLAG, LZ4_compress(packet)
示例#48
0
 def parse_out(self, value):
     self.parse_out_blob = lz4.compressHC(value)
示例#49
0
 def dot_out(self, value):
     self.dot_out_blob = lz4.compressHC(json.dumps(value))
示例#50
0
 def response_body(self, value):
     self.response_body_blob = lz4.compressHC(value)
示例#51
0
 def lz4_compress(packet, level):
     if level>=9:
         return level | LZ4_FLAG, compressHC(packet)
     return level | LZ4_FLAG, LZ4_compress(packet)
示例#52
0
 def lz4_compress(packet, level):
     if level>=9:
         return level | LZ4_FLAG, compressHC(packet)
     #clamp it: 0->17, 1->12, 2->7, 3->2, >=4->1
     accel = max(1, 17-level*5)
     return level | LZ4_FLAG, LZ4_compress_fast(packet, accel)
示例#53
0
    def _add_indicator(self, score, indicator, value):
        type_ = value['type']
        type_mapper = _TYPE_MAPPING.get(type_, None)
        if type_mapper is None:
            LOG.error('%s - Unsupported indicator type: %s', self.name, type_)
            return

        nsdict = {}
        nsdict[self.namespaceuri] = self.namespace
        stix.utils.set_id_namespace(nsdict)

        spid = '{}:indicator-{}'.format(
            self.namespace,
            uuid.uuid4()
        )
        sp = stix.core.STIXPackage(id_=spid)

        observables = type_mapper['mapper'](self.namespace, indicator, value)

        for o in observables:
            id_ = '{}:indicator-{}'.format(
                self.namespace,
                uuid.uuid4()
            )

            if value['type'] == 'URL':
                eindicator = werkzeug.urls.iri_to_uri(indicator, safe_conversion=True)
            else:
                eindicator = indicator

            sindicator = stix.indicator.indicator.Indicator(
                id_=id_,
                title='{}: {}'.format(
                    value['type'],
                    eindicator
                ),
                description='{} indicator from {}'.format(
                    value['type'],
                    ', '.join(value['sources'])
                ),
                timestamp=datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
            )

            confidence = value.get('confidence', None)
            if confidence is None:
                LOG.error('%s - indicator without confidence', self.name)
                sindicator.confidence = "Unknown"  # We shouldn't be here
            elif confidence < 50:
                sindicator.confidence = "Low"
            elif confidence < 75:
                sindicator.confidence = "Medium"
            else:
                sindicator.confidence = "High"

            sindicator.add_indicator_type(type_mapper['indicator_type'])

            sindicator.add_observable(o)

            sp.add_indicator(sindicator)

        spackage = 'lz4'+lz4.compressHC(sp.to_json())
        with self.SR.pipeline() as p:
            p.multi()

            p.zadd(self.redis_skey, score, spid)
            p.hset(self.redis_skey_value, spid, spackage)

            result = p.execute()[0]

        self.statistics['added'] += result
示例#54
0
    DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
	 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
	 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""

from __future__ import print_function

from sys import stdout

try:
    import lz4, pickle
    decompress = lambda s: pickle.loads(lz4.decompress(s))
    compress = lambda o: lz4.compressHC(pickle.dumps(o))
except:
    compress = lambda x: x
    decompress = lambda x: x


def getDetector(detector="sf"):
    from lpo import contour
    from os import path
    basedir = path.dirname(path.dirname(path.abspath(__file__)))
    if detector == 'sf':
        r = contour.StructuredForest()
        r.load(path.join(basedir, 'data', 'sf.dat'))
    elif detector == "mssf":
        r = contour.MultiScaleStructuredForest()
        r.load(path.join(basedir, 'data', 'sf.dat'))