def test_show(self): self.assertTrue( Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all', False)) self.assertTrue( Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all', True)) self.assertFalse(Snapshot.show('xyzabc', '_all', False))
def add_arguments(self, parser): parser.add_argument('--snapshot', dest='snapshot', default='_all', help='Snapshot name') parser.add_argument('--repo', dest='repo', default=ElasticSettings.getattr('REPOSITORY'), metavar=ElasticSettings.getattr('REPOSITORY'), help='Repository name') parser.add_argument('--all', dest='all', action='store_true', help='List all repositories')
def add_arguments(self, parser): parser.add_argument('snapshot', type=str, help='New snapshot name.') parser.add_argument( '--indices', dest='indices', default=None, metavar="idx1,idx2", help='Indices (comma separated) to create a snapshot for.') parser.add_argument('--repo', dest='repo', default=ElasticSettings.getattr('REPOSITORY'), metavar=ElasticSettings.getattr('REPOSITORY'), help='Repository name') parser.add_argument('--delete', dest='delete', action='store_true', help='Delete snapshot.')
def add_arguments(self, parser): parser.add_argument('snapshot', type=str, help='Snapshot to restore.') parser.add_argument('--url', dest='url', default=ElasticSettings.url(), metavar="ELASTIC_URL", help='Elastic URL to restore to.') parser.add_argument('--repo', dest='repo', default=ElasticSettings.getattr('REPOSITORY'), metavar=ElasticSettings.getattr('REPOSITORY'), help='Repository name') parser.add_argument( '--indices', dest='indices', default=None, metavar="idx1,idx2", help= 'Indices (comma separated) to be restored from a snapshot (default all).' )
def filter_queryset(self, request, queryset, view): ''' Override this method to request feature locations. ''' try: filterable = getattr(view, 'filter_fields', []) filters = dict([(k, v) for k, v in request.GET.items() if k in filterable]) query_str = filters.get('feature', 'PTPN22') build = self._get_build(filters.get('build', settings.DEFAULT_BUILD)) if query_str is None or query_str == '': return [ElasticObject(initial={'error': 'No feature name provided.'})] search_fields = ['id', 'symbol', 'dbxrefs.ensembl', 'region_name'] sources = ['start', 'stop', 'seqid', 'chromosome', 'disease_loci'] idxs = ElasticSettings.getattr('IDX') MARKER_IDX = '' if build == ElasticSettings.get_label('MARKER', label='build'): MARKER_IDX = 'MARKER' if MARKER_IDX == '': for idx in idxs: if 'MARKER' in idx: if build == ElasticSettings.get_label(idx, label='build'): MARKER_IDX = idx (idx, idx_type) = ElasticSettings.idx_names(MARKER_IDX, 'MARKER') (idx_r, idx_type_r) = ElasticSettings.idx_names('REGION', 'REGION') (idx_g, idx_type_g) = ElasticSettings.idx_names('GENE', 'GENE') idx += ',' + idx_r + ',' + idx_g idx_type += ',' + idx_type_r + ',' + idx_type_g equery = BoolQuery(must_arr=Query.query_string(query_str, fields=search_fields)) elastic = Search(search_query=ElasticQuery(equery, sources), size=10, idx=idx, idx_type=idx_type) docs = elastic.search().docs locs = [] for doc in docs: if isinstance(doc, RegionDocument): doc = Region.pad_region_doc(doc) loc = doc.get_position(build=build).split(':') pos = loc[1].replace(',', '').split('-') locs.append(ElasticObject( {'feature': query_str, 'chr': loc[0], 'start': int(pos[0]), 'end': int(pos[1]) if len(pos) > 1 else int(pos[0]), 'locusString': query_str+" ("+str(loc[1])+")"})) return locs except (TypeError, ValueError, IndexError, ConnectionError): raise Http404
def _get_old_dbsnps(marker): ''' Get markers from old versions of DBSNP. Assumes the index key is prefixed by 'MARKER_'. ''' old_dbsnps_names = sorted([ElasticSettings.idx(k) for k in ElasticSettings.getattr('IDX').keys() if 'MARKER_' in k], reverse=True) old_dbsnp_docs = [] if len(old_dbsnps_names) > 0: search_query = ElasticQuery(Query.query_string(marker, fields=['id', 'rscurrent'])) for idx_name in old_dbsnps_names: elastic2 = Search(search_query=search_query, idx=idx_name, idx_type='marker') docs = elastic2.search().docs if len(docs) > 0: old_doc = docs[0] old_doc.marker_build = _get_marker_build(idx_name) old_dbsnp_docs.append(old_doc) return old_dbsnp_docs
def test_top_hits_sub_aggs(self): ''' Test a sub-aggregation of top hits return a PydginDocument/FeatureDocument. ''' idx1 = PydginTestSettings.IDX['GENE']['indexName'] idx2 = PydginTestSettings.IDX['PUBLICATION']['indexName'] idx = idx1 + ',' + idx2 sub_agg = Agg('idx_top_hits', 'top_hits', {"size": 55, "_source": ['symbol']}) aggs = Aggs([Agg("idxs", "terms", {"field": "_index"}, sub_agg=sub_agg)]) res = Search(aggs=aggs, idx=idx, size=0).search() top_hits = res.aggs['idxs'].get_docs_in_buckets(obj_document=ElasticSettings.getattr('DOCUMENT_FACTORY')) for doc in top_hits[idx1]['docs']: self.assertTrue(isinstance(doc, GeneDocument)) self.assertTrue(isinstance(doc, FeatureDocument)) for doc in top_hits[idx2]['docs']: self.assertTrue(isinstance(doc, PydginDocument))
def _top_hits(result): ''' Return the top hit docs in the aggregation 'idxs'. ''' top_hits = result.aggs['idxs'].get_docs_in_buckets(obj_document=ElasticSettings.getattr('DOCUMENT_FACTORY')) idx_names = list(top_hits.keys()) for idx in idx_names: idx_key = ElasticSettings.get_idx_key_by_name(idx) if idx_key.lower() != idx: if idx_key.lower() == 'marker': top_hits[idx]['doc_count'] = _collapse_marker_docs(top_hits[idx]['docs']) elif idx_key.lower() == 'region': top_hits[idx]['doc_count'] = _collapse_region_docs(top_hits[idx]['docs']) elif idx_key.lower() == 'publication': # sort by date top_hits[idx]['docs'].sort(key=attrgetter('date'), reverse=True) top_hits[idx_key.lower()] = top_hits[idx] del top_hits[idx] return top_hits
def test_create_restore_delete_snapshot(self): self.wait_for_running_snapshot() snapshot = 'test_' + ElasticSettings.getattr('TEST') repo = SnapshotTest.TEST_REPO # create a snapshot call_command('snapshot', snapshot, indices=IDX['MARKER']['indexName'], repo=repo) Snapshot.wait_for_snapshot(repo, snapshot) self.assertTrue(Snapshot.exists(repo, snapshot), "Created snapshot " + snapshot) # snapshot already exist so return false self.assertFalse( Snapshot.create_snapshot(repo, snapshot, IDX['MARKER']['indexName'])) # delete index requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName']) self.assertFalse(Search.index_exists(IDX['MARKER']['indexName']), "Removed index") # restore from snapshot call_command('restore_snapshot', snapshot, repo=repo, indices=IDX['MARKER']['indexName']) Search.index_refresh(IDX['MARKER']['indexName']) self.assertTrue(Search.index_exists(IDX['MARKER']['indexName']), "Restored index exists") # remove snapshot call_command('snapshot', snapshot, delete=True, repo=repo) Snapshot.wait_for_snapshot(repo, snapshot, delete=True, count=10) self.assertFalse(Snapshot.exists(repo, snapshot), "Deleted snapshot " + snapshot)
''' Tests for the download module. ''' from django.test import TestCase from django.core.management import call_command from data_pipeline.download import HTTPDownload, FTPDownload, MartDownload from django.utils.six import StringIO from elastic.elastic_settings import ElasticSettings import os import requests import data_pipeline from data_pipeline.utils import IniParser from elastic.search import Search, ElasticQuery import shutil IDX_SUFFIX = ElasticSettings.getattr('TEST') MY_PUB_INI_FILE = os.path.join(os.path.dirname(__file__), IDX_SUFFIX + '_test_publication.ini') TEST_DATA_DIR = os.path.dirname(data_pipeline.__file__) + '/tests/data' def setUpModule(): ''' Change ini config (MY_PUB_INI_FILE) to use the test suffix when creating publication pipeline index. ''' ini_file = os.path.join(os.path.dirname(__file__), 'test_publication.ini') if os.path.isfile(MY_PUB_INI_FILE): return with open(MY_PUB_INI_FILE, 'w') as new_file: with open(ini_file) as old_file: for line in old_file: new_file.write(line.replace('auto_tests', IDX_SUFFIX))
''' Settings used for the tests. ''' import os from elastic.elastic_settings import ElasticSettings SEARCH_BASE_DIR = os.path.dirname(os.path.dirname(__file__)) SEARCH_TEST_DATA_PATH = os.path.join(SEARCH_BASE_DIR, 'tests/data/') SEARCH_SUFFIX = ElasticSettings.getattr('TEST') if SEARCH_SUFFIX is None: SEARCH_SUFFIX = "test" NUMBER_OF_SHARDS = 1 IDX = { 'GENE': { 'indexName': 'test__gene_' + SEARCH_SUFFIX, 'indexGene': SEARCH_TEST_DATA_PATH + 'genenames.org.test.txt.gz', 'shards': NUMBER_OF_SHARDS }, 'MARKER': { 'indexName': 'test__snp_' + SEARCH_SUFFIX, 'indexSNP': SEARCH_TEST_DATA_PATH + 'dbsnp144_test.vcf.gz', 'shards': NUMBER_OF_SHARDS }, 'MARKER_RS_HISTORY': { 'indexName': 'test__snp_' + SEARCH_SUFFIX, 'indexType': 'rs_merge', 'indexSNPMerge': SEARCH_TEST_DATA_PATH + 'rs_merge_test.gz', 'shards': NUMBER_OF_SHARDS }, 'JSON': { 'indexName': 'test__json_' + SEARCH_SUFFIX,
from django.core.management import call_command import os import data_pipeline import shutil from data_pipeline.utils import IniParser from elastic.elastic_settings import ElasticSettings import requests from elastic.search import Search, ElasticQuery from data_pipeline.helper.gene import Gene import logging import json from elastic.query import Query, TermsFilter # Get an instance of a logger logger = logging.getLogger(__name__) IDX_SUFFIX = ElasticSettings.getattr("TEST") MY_INI_FILE = os.path.join(os.path.dirname(__file__), IDX_SUFFIX + "_test_download.ini") TEST_DATA_DIR = os.path.dirname(data_pipeline.__file__) + "/tests/data" def setUpModule(): """ Change ini config (MY_INI_FILE) to use the test suffix when creating pipeline indices. """ ini_file = os.path.join(os.path.dirname(__file__), "test_download.ini") if os.path.isfile(MY_INI_FILE): return with open(MY_INI_FILE, "w") as new_file: with open(ini_file) as old_file: for line in old_file: new_file.write(line.replace("auto_tests", IDX_SUFFIX))
class SnapshotTest(TestCase): ''' Test elastic snapshot and restore. ''' TEST_REPO = 'test_backup_' + ElasticSettings.getattr('TEST') TEST_REPO_DIR = ElasticSettings.getattr('TEST_REPO_DIR') def test_show(self): self.assertTrue( Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all', False)) self.assertTrue( Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all', True)) self.assertFalse(Snapshot.show('xyzabc', '_all', False)) def test_create_delete_repository(self): self.wait_for_running_snapshot() repo = SnapshotTest.TEST_REPO self.assertTrue(Snapshot.exists(repo, ''), 'Repository ' + repo + ' created') self.assertFalse( Snapshot.create_repository(repo, SnapshotTest.TEST_REPO_DIR), 'Repository already exists.') call_command('repository', repo, delete=True) self.assertFalse(Snapshot.exists(repo, ''), 'Repository ' + repo + ' deleted') self.assertFalse(Snapshot.delete_repository(repo), 'Repository ' + repo + ' deleted') call_command('repository', repo, dir=SnapshotTest.TEST_REPO_DIR) self.assertTrue(Snapshot.exists(repo, ''), 'Repository ' + repo + ' created') def test_create_restore_delete_snapshot(self): self.wait_for_running_snapshot() snapshot = 'test_' + ElasticSettings.getattr('TEST') repo = SnapshotTest.TEST_REPO # create a snapshot call_command('snapshot', snapshot, indices=IDX['MARKER']['indexName'], repo=repo) Snapshot.wait_for_snapshot(repo, snapshot) self.assertTrue(Snapshot.exists(repo, snapshot), "Created snapshot " + snapshot) # snapshot already exist so return false self.assertFalse( Snapshot.create_snapshot(repo, snapshot, IDX['MARKER']['indexName'])) # delete index requests.delete(ElasticSettings.url() + '/' + IDX['MARKER']['indexName']) self.assertFalse(Search.index_exists(IDX['MARKER']['indexName']), "Removed index") # restore from snapshot call_command('restore_snapshot', snapshot, repo=repo, indices=IDX['MARKER']['indexName']) Search.index_refresh(IDX['MARKER']['indexName']) self.assertTrue(Search.index_exists(IDX['MARKER']['indexName']), "Restored index exists") # remove snapshot call_command('snapshot', snapshot, delete=True, repo=repo) Snapshot.wait_for_snapshot(repo, snapshot, delete=True, count=10) self.assertFalse(Snapshot.exists(repo, snapshot), "Deleted snapshot " + snapshot) def wait_for_running_snapshot(self): ''' Wait for a running snapshot to complete. ''' for _ in range(10): if not Snapshot.is_running(): return time.sleep(2) logger.warn('Long running snapshot') self.assertTrue(False, 'Long running snapshot')