def __init__(self, src_file, title='ENSEMBL', version='', tx_mode=TranscriptProvider.TX_MODE_CANONICAL, protocol="file", is_thread_safe=False, tx_filter="dummy", custom_canonical_txs=None): super(EnsemblTranscriptDatasource, self).__init__(src_file=src_file, title=title, version=version) ensembl_index_fname = src_file + ".transcript.idx" ensembl_gene_to_transcript_index_fname = src_file + ".transcript_by_gene.idx" ensembl_genomic_position_bins_to_transcript_index_fname = src_file + ".transcript_by_gp_bin.idx" # Seconds before a cache entry should be cleared out timeout = 1000 max_entries = 25000 cache_protocol = "memory" if not is_thread_safe: logging.getLogger(__name__).info("%s %s is being set up in faster, NOT thread-safe mode (for annotation). " % (title, version)) cache_protocol = "simple" # Contains a key of transcript id and value of a Transcript class, with sequence data where possible. # By specifying "memory" for the cache, this is thread safe. Otherwise, use "simple" self.transcript_db = shove.Shove(protocol + '://%s' % ensembl_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries) self.gene_db = shove.Shove(protocol + '://%s' % ensembl_gene_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries) self.gp_bin_db = shove.Shove(protocol + '://%s' % ensembl_genomic_position_bins_to_transcript_index_fname, cache_protocol + "://", timeout=timeout, max_entries=max_entries) tmp = self.gp_bin_db.keys() logging.getLogger(__name__).info("%s %s is being set up with default tx-mode: %s. " % (title, version, tx_mode)) self.set_tx_mode(tx_mode) logging.getLogger(__name__).info("%s %s is being set up with %s filtering. " % (title, version, tx_filter)) self._tx_filter = TranscriptFilterFactory.create_instance(tx_filter) self._hgvs_xformer = HgvsChangeTransformer() # Store a list of the custom canonical transcripts self._custom_canonical_txs = custom_canonical_txs or []
def __init__(self, server_connection, config): BotScript.__init__(self, server_connection, config) self.store_path = config['store_path'] self.channels = [config['channels']] if isinstance(config['channels'], "".__class__) else config['channels'] self.dbs = {} for channel in self.channels: self.dbs[channel] = shove.Shove("bsddb://" + self.store_path + "/" + channel + ".db")
def main(urls=[]): if not urls: print 'Specify the URLs to a few RSS or Atom feeds on the command line.' return # Decide how many threads to start num_threads = min(len(urls), MAX_THREADS) # Add the URLs to a queue url_queue = Queue.Queue() for url in urls: url_queue.put(url) # Add poison pills to the url queue to cause # the worker threads to break out of their loops for i in range(num_threads): url_queue.put(None) # Track the entries in the feeds being fetched entry_queue = Queue.Queue() print 'Saving feed data to', OUTPUT_DIR storage = shove.Shove('file://' + OUTPUT_DIR) try: # Start a few worker threads worker_threads = [] for i in range(num_threads): t = threading.Thread(target=fetch_urls, args=(storage, url_queue, entry_queue,)) worker_threads.append(t) t.setDaemon(True) t.start() # Start a thread to print the results printer_thread = threading.Thread(target=print_entries, args=(entry_queue,)) printer_thread.setDaemon(True) printer_thread.start() # Wait for all of the URLs to be processed url_queue.join() # Wait for the worker threads to finish for t in worker_threads: t.join() # Poison the print thread and wait for it to exit entry_queue.put((None,None)) entry_queue.join() printer_thread.join() finally: storage.close() return
def test(self): # First fetch the data through the cache storage = shove.Shove('file://' + self.shove_dirname) try: fc = Cache(storage) parsed_data = fc.fetch(self.TEST_URL) self.failUnlessEqual(parsed_data.feed.title, 'CacheTest test data') finally: storage.close() # Now retrieve the same data directly from the shelf storage = shove.Shove('file://' + self.shove_dirname) try: modified, shelved_data = storage[self.TEST_URL] finally: storage.close() # The data should be the same self.failUnlessEqual(parsed_data, shelved_data) return
def index_gaf_fastas(gaf_transcript_seqs_fname, output_fname, protocol="sqlite"): fh_transcripts = SeqIO.parse(gaf_transcript_seqs_fname, 'fasta') transcripts_shv = shove.Shove(protocol + ':///%s' % output_fname) j = 0 for transcript in fh_transcripts: if j % 1000 == 0: print j j += 1 raw_seq = str(transcript.seq) transcripts_shv[transcript.name] = raw_seq transcripts_shv.close()
def _get_shove_db(path): """Get or create a sqlite-backed shove db. Parameters ---------- path : string Path to sqlite db, opened or created as needed. Returns ------- Shove object A dict-like object backed by the db at path, attached to flask context. """ attr_name = "_" + basename(path) db = getattr(g, attr_name, None) if db is None: db = shove.Shove("lite://" + path) setattr(g, attr_name, db) return db
from werkzeug.wsgi import SharedDataMiddleware app.wsgi_app = SharedDataMiddleware(app.wsgi_app, {'/': app.config['CLIENT_PATH']}) @app.route('/', methods=['GET']) def index(): with open(path.join(app.config['CLIENT_PATH'], 'index.html'), 'r') as f: return f.read() # Configure database if app.config.get('USE_SHOVE', False): import shove db = shove.Shove(app.config['DATABASE']) @contextmanager def database(): yield db else: import shelve from lockfile import FileLock @contextmanager def database(): dbfilename = app.config['DATABASE'] folder = path.dirname(dbfilename) if folder and not path.exists(folder): os.mkdir(folder) with FileLock(dbfilename):
TTL=3600 cache_opts = { 'cache.type': 'file', 'cache.data_dir': OUTPUT_DIR+'/cache/data', 'cache.lock_dir': OUTPUT_DIR+'/cache/lock' } # Default expires next day at 1AM def timedeltaUntilDays(days=1, hour=1) : today = datetime.utcnow() expires = datetime.replace(today + timedelta(days=days), hour=hour, minute=0, second=0) return dict({"expires" : expires-today, "date" : expires, 'seconds' : (expires-today).total_seconds() }) def setCacheControl(header, expiresInSeconds): today = datetime.utcnow() expires = today + timedelta(seconds=expiresInSeconds) header.add("Expires", int((expires - datetime.utcfromtimestamp(0)).total_seconds())) header.add("Max-Age" , int(expiresInSeconds)) header.add("Last-Modified" , today.strftime("%a, %d %b %Y %H:%M:%S +0000")) header.add("Date-Expires" , expires.strftime("%a, %d %b %Y %H:%M:%S +0000")) return header; methodcache = CacheManager(**parse_cache_config_options(cache_opts)) storage = shove.Shove("file://"+OUTPUT_DIR+'/storage', optimize=False) locations = shove.Shove("file://"+OUTPUT_DIR+'/locations', optimize=False)
OUTPUT_DIR = os.environ['OUTPUT_DIR'] except KeyError: OUTPUT_DIR = '/home/charts/cache' HTTP_CACHE_DIR = OUTPUT_DIR + '/http' MAX_THREADS = 5 TTL = 3600 cache_opts = { 'cache.type': 'file', 'cache.data_dir': OUTPUT_DIR + '/cache/data', 'cache.lock_dir': OUTPUT_DIR + '/cache/lock' } methodcache = CacheManager(**parse_cache_config_options(cache_opts)) storage = shove.Shove("file://" + OUTPUT_DIR + '/sources', optimize=False) newreleases = shove.Shove("file://" + OUTPUT_DIR + '/newreleases', optimize=False) # Note: Weekday starts on 0. eg. 3 = Thursday def timedeltaUntilWeekday(weekday, hour): today = datetime.utcnow() expires = today + reldate.relativedelta( minute=0, hour=hour, weekday=weekday) return { 'expires': expires - today, 'date': expires, 'seconds': (expires - today).total_seconds() }
def __init__(self, gaf_fname, gaf_transcript_sequences_fname, title='Gaf', version='3.0', tx_mode="CANONICAL", protocol="sqlite"): super(Gaf, self).__init__(src_file=gaf_fname, title=title, version=version) self.logger = logging.getLogger(__name__) if os.path.exists(gaf_fname): if not gaf_fname.endswith('.idx') and os.path.exists(gaf_fname + '.idx'): gaf_fname = gaf_fname + '.idx' else: raise Exception( 'Missing index for gaf file. Index file with oncotator-index.py first.' ) else: raise Exception('Gaf file does not exist! -- %s' % gaf_fname) if os.path.exists(gaf_transcript_sequences_fname): if not gaf_transcript_sequences_fname.endswith( '.idx') and os.path.exists(gaf_transcript_sequences_fname + '.idx'): gaf_transcript_sequences_fname = gaf_transcript_sequences_fname + '.idx' else: raise Exception( 'Missing index for gaf file. Index file with oncotator-index.py first.' ) else: raise Exception( 'Gaf transcript sequences file does not exist! -- %s' % gaf_transcript_sequences_fname) # 'Loading GAF...' self.logger.info("Loading GAF...") self.Transcripts, self.Genes = cPickle.load(open(gaf_fname, 'rb')) # 'Loading transcript sequences...' self.logger.info("Loading transcript sequences (" + protocol + ")...") self.gaf_transcript_sequences = shove.Shove( protocol + ':///%s' % gaf_transcript_sequences_fname, "memory://") # "Indexing Transcript IDs..." self.logger.info("Indexing transcript IDs...") self.transcript_id_idx = dict() for k in self.Transcripts: for b in self.Transcripts[k]: for i, t in enumerate(self.Transcripts[k][b]): self.transcript_id_idx[t['transcript_id']] = (k, b, i) self.add_padding_to_GAF_transcripts(fiveprime_padding=3000, threeprime_padding=0) # "Indexing Gene IDs..." self.logger.info("Indexing gene IDs...") self.gene_id_idx = dict() for k in self.Genes: for b in self.Genes[k]: for i, t in enumerate(self.Genes[k][b]): self.gene_id_idx[t['gene']] = (k, b, i) self.logger.info("Datasource " + self.title + " " + self.version + " finished initialization") # TODO: Check for valid values. self.tx_mode = tx_mode
def __init__(self, cache_uri=""): self.connection = shove.Shove(cache_uri, optimize=False)
import shove_counter import shove from multiprocessing import Process, Value, Array def f(name, sc, sh, n, a): print 'hello', name sh['a'] = 5 sc['b'] = 5 print sc print sh n.value = 3.1415927 for i in range(len(a)): a[i] = -a[i] if __name__ == '__main__': sc = shove_counter.Counter() sh = shove.Shove(store='memory://', cache='memory://') num = Value('d', 0.0) arr = Array('i', range(10)) p = Process(target=f, args=('sam', sc, sh, num, arr)) p.start() p.join() print sc print sh print num.value print arr[:]
def __init__(self, id): self.id = id self.store = shove.Shove("file://{}".format(DB.db_path + self.id))