class VedisBackend(DBPlugin): def connect(self): path = os.path.join(self.project.outdir, DB_NAME) self._db = Vedis(path) def get_sample(self, sha256): s = self._samples[sha256] j = json.loads(s) return Sample(self.project, j['file'], j['tags'], j['mutations'], j.get('base_sample', None)) def put_sample(self, sample): self._samples[sample.sha256] = sample.to_json() # keep reference of the sample for each tag :) for t in sample.tags: self.put_tag(t, sample.sha256) self._db.commit() def put_scan(self, scan, sample=None, sha256=None): if sha256: h = sha256 else: h = sample.sha256 self._scans[h] = json.dumps(scan) self._db.commit() def get_scan(self, sample=None, sha256=None): if sha256: h = sha256 else: h = sample.sha256 res = self._scans[h] if res: return json.loads(res) return None def put_tag(self, tag, sha256): t = self._db.Set('tag_' + tag) t.add(sha256) def get_tagged_samples(self, tag): return self._db.Set('tag_' + tag) @property def _samples(self): return self._db.Hash('samples') @property def _scans(self): return self._db.Hash('scans') @property def all_samples(self): # no way to use a generator with vedis # still ok for small analyses, we'll need to # support other DBs to reduce memory usage for s in self._samples.keys(): yield self.get_sample(s)
class RedisCacheProxy(object): def __init__(self, backend="Vedis"): self._backend = backend self._conn = None self.get_connection() def get_connection(self): if self._backend == "Vedis": self._conn = Vedis(":mem:") def _get_hash(self, key): if self._backend == "Vedis": return self._conn.Hash(key) def _get_list(self, key): if self._backend == "Vedis": return self._conn.List(key) @staticmethod def _get_timestamp(dt): return int(time.mktime(dt.timetuple())) def get_ticker_bar_data(self, func): @wraps(func) def wrapper(ticker, timeframe, start, end): prefix = ticker + "." + timeframe dct = self._get_hash(prefix + ".index") si = dct[self._get_timestamp(start)] ei = dct[self._get_timestamp(end)] if si and ei: si = int(si) ei = int(ei) df = pd.DataFrame(columns=_BAR_FIELDS) for field in _BAR_FIELDS: df[field] = [ self._get_list(prefix + "." + field)[index] for index in range(si, ei + 1) ] else: df = func(ticker, timeframe, start, end) dt = df["datetime"].apply(lambda x: self._get_timestamp(x)) temp = pd.Series(df.index.astype(str), index=dt.astype(str)) try: del self._conn[prefix + ".index"] except KeyError: pass dct = self._get_hash(prefix + ".index") for key, value in temp.iteritems(): dct[key] = value for field in _BAR_FIELDS: try: del self._conn[prefix + "." + field] except KeyError: pass lst = self._get_list(prefix + "." + field) lst.extend(list(df[field].values)) return df return wrapper
class StorageBackendVedis(StorageBackend): __backend_name__ = 'vedis' def __init__(self, **kwargs): if kwargs is None or kwargs.get('database_path') is None: raise Exception("Vedis backend requires path argument") self.database_path = kwargs.get('database_path') self.db = Vedis(self.database_path) def store(self, key, field, value): if not all(map(lambda x: isinstance(x, str), [key, field, value])): raise Exception('key, field, value must be string') _hash = self.db.Hash(key) _hash[field] = value return True def load(self, key, field): if not all(map(lambda x: isinstance(x, str), [key, field])): raise Exception('key, field must be string') _hash = self.db.Hash(key) return _hash[field] def delete(self, key, field): if not all(map(lambda x: isinstance(x, str), [key, field])): raise Exception('key, field must be string') _hash = self.db.Hash(key) del _hash[field] return True def cleanup(self, key): if not isinstance(key, str): raise Exception('key must be string') _hash = self.db.Hash(key) try: for _hkey in _hash: del _hash[_hkey] except: pass
class VedisBackend(DBPlugin): def connect(self): path = os.path.join(self.project.outdir, DB_NAME) self._db = Vedis(path) def get_sample(self, sha256): s = self._samples[sha256] j = json.loads(s) bs = j.get('base_sample', None) if bs is not None: bs = self.get_sample(bs) #watch out for recursion! return Sample(self.project, j['file'], j['tags'], j['mutations'], bs) @commit_on_success def put_sample(self, sample): self._samples[sample.sha256] = sample.to_json() # keep reference of the sample for each tag :) for t in sample.tags: self.put_tag(t, sample.sha256) @commit_on_success def put_tag(self, tag, sha256): t = self._db.Set('tag_' + tag) t.add(sha256) def get_tagged_samples(self, tag): if isinstance(tag, basestring): tag = [ tag, ] for t in tag: for s in self._db.Set('tag_' + t): yield self.get_sample(s) @property def _samples(self): return self._db.Hash('samples') @property def _scans(self): return self._db.Hash('scans') def _pending_scans(self, scanner): return self._db.Set('{}_pending'.format(scanner)) def _done_scans(self, scanner): return self._db.Set('{}_done'.format(scanner)) def get_pending_scans(self, scanner): pending = self._pending_scans(scanner) if len(pending) == 0: return [] scans = [] for p in pending: d = self._get_scan(p) scans.append( Scan(scanner=scanner, uuid=d['uuid'], sample=d['sample'], scan_id=d['scan_id'], scan_results=d['scan_results'])) return scans @property def all_samples(self): # TODO: fix this for s in self._samples.keys(): yield self.get_sample(s) @commit_on_success def _put_scan_results(self, scan_result): self._scanresults[res.uuid] = scan_result # "update" the set of scanresults for a given scan @staticmethod def res_to_dict(scan): return { 'label': scan.label, 'sample': scan.sample.sha256, 'av': scan.av, 'extra': scan.extra } def _get_scan(self, uuid): return self._db.Hash(uuid) def _get_scan_result(self, scan): return self._db.Set(scan.uuid + '_results') def _res_by_sample(self, sample): return self._db.Set(scan.sample.sha + '_results') def _res_by_av(self, av): return self._db.Set(av + '_results') def _scan_by_sample(self, sample): return self._db.Set(sample.sha256 + '_scans') @commit_on_success def put_scan(self, scan): scanner = scan.scanner.short_name if scan.pending: # add to pending Set self._pending_scans(scanner).add(scan.uuid) else: self._pending_scans(scanner).remove(scan.uuid) self._put_scan_results(scan.scan_results) self._done_scan(scanner).add(scan.uuid) self._scan_by_sample(scan.sample).add(scan.uuid) for res in scan.scan_results: self._results(res.uuid).update(**res.to_dict()) # add its reference to sets for quick querying self._res_by_sample(scan.sample).add(res.uuid) self._res_by_av(res.av).add(res.uuid) self._get_scan(scan.uuid).update(**scan.to_dict()) l.debug("Scan %s stored in database", scan) def get_scan_results(self, av): pass def close(self): self._db.close()