示例#1
0
 def test_show(self):
     self.assertTrue(
         Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all',
                       False))
     self.assertTrue(
         Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all', True))
     self.assertFalse(Snapshot.show('xyzabc', '_all', False))
 def add_arguments(self, parser):
     parser.add_argument('--snapshot',
                         dest='snapshot',
                         default='_all',
                         help='Snapshot name')
     parser.add_argument('--repo',
                         dest='repo',
                         default=ElasticSettings.getattr('REPOSITORY'),
                         metavar=ElasticSettings.getattr('REPOSITORY'),
                         help='Repository name')
     parser.add_argument('--all',
                         dest='all',
                         action='store_true',
                         help='List all repositories')
示例#3
0
 def add_arguments(self, parser):
     parser.add_argument('snapshot', type=str, help='New snapshot name.')
     parser.add_argument(
         '--indices',
         dest='indices',
         default=None,
         metavar="idx1,idx2",
         help='Indices (comma separated) to create a snapshot for.')
     parser.add_argument('--repo',
                         dest='repo',
                         default=ElasticSettings.getattr('REPOSITORY'),
                         metavar=ElasticSettings.getattr('REPOSITORY'),
                         help='Repository name')
     parser.add_argument('--delete',
                         dest='delete',
                         action='store_true',
                         help='Delete snapshot.')
 def add_arguments(self, parser):
     parser.add_argument('snapshot', type=str, help='Snapshot to restore.')
     parser.add_argument('--url',
                         dest='url',
                         default=ElasticSettings.url(),
                         metavar="ELASTIC_URL",
                         help='Elastic URL to restore to.')
     parser.add_argument('--repo',
                         dest='repo',
                         default=ElasticSettings.getattr('REPOSITORY'),
                         metavar=ElasticSettings.getattr('REPOSITORY'),
                         help='Repository name')
     parser.add_argument(
         '--indices',
         dest='indices',
         default=None,
         metavar="idx1,idx2",
         help=
         'Indices (comma separated) to be restored from a snapshot (default all).'
     )
示例#5
0
    def filter_queryset(self, request, queryset, view):
        ''' Override this method to request feature locations. '''
        try:
            filterable = getattr(view, 'filter_fields', [])
            filters = dict([(k, v) for k, v in request.GET.items() if k in filterable])
            query_str = filters.get('feature', 'PTPN22')
            build = self._get_build(filters.get('build', settings.DEFAULT_BUILD))
            if query_str is None or query_str == '':
                return [ElasticObject(initial={'error': 'No feature name provided.'})]

            search_fields = ['id',
                             'symbol', 'dbxrefs.ensembl',
                             'region_name']
            sources = ['start', 'stop', 'seqid', 'chromosome',
                       'disease_loci']
            idxs = ElasticSettings.getattr('IDX')
            MARKER_IDX = ''

            if build == ElasticSettings.get_label('MARKER', label='build'):
                MARKER_IDX = 'MARKER'
            if MARKER_IDX == '':
                for idx in idxs:
                    if 'MARKER' in idx:
                        if build == ElasticSettings.get_label(idx, label='build'):
                            MARKER_IDX = idx

            (idx, idx_type) = ElasticSettings.idx_names(MARKER_IDX, 'MARKER')
            (idx_r, idx_type_r) = ElasticSettings.idx_names('REGION', 'REGION')
            (idx_g, idx_type_g) = ElasticSettings.idx_names('GENE', 'GENE')
            idx += ',' + idx_r + ',' + idx_g
            idx_type += ',' + idx_type_r + ',' + idx_type_g

            equery = BoolQuery(must_arr=Query.query_string(query_str, fields=search_fields))
            elastic = Search(search_query=ElasticQuery(equery, sources), size=10, idx=idx, idx_type=idx_type)
            docs = elastic.search().docs
            locs = []
            for doc in docs:
                if isinstance(doc, RegionDocument):
                    doc = Region.pad_region_doc(doc)

                loc = doc.get_position(build=build).split(':')
                pos = loc[1].replace(',', '').split('-')
                locs.append(ElasticObject(
                    {'feature': query_str,
                     'chr': loc[0],
                     'start': int(pos[0]),
                     'end': int(pos[1]) if len(pos) > 1 else int(pos[0]),
                     'locusString': query_str+" ("+str(loc[1])+")"}))
            return locs
        except (TypeError, ValueError, IndexError, ConnectionError):
            raise Http404
示例#6
0
def _get_old_dbsnps(marker):
    ''' Get markers from old versions of DBSNP. Assumes the index key is
    prefixed by 'MARKER_'. '''
    old_dbsnps_names = sorted([ElasticSettings.idx(k) for k in ElasticSettings.getattr('IDX').keys()
                               if 'MARKER_' in k], reverse=True)
    old_dbsnp_docs = []
    if len(old_dbsnps_names) > 0:
        search_query = ElasticQuery(Query.query_string(marker, fields=['id', 'rscurrent']))
        for idx_name in old_dbsnps_names:
            elastic2 = Search(search_query=search_query, idx=idx_name, idx_type='marker')
            docs = elastic2.search().docs
            if len(docs) > 0:
                old_doc = docs[0]
                old_doc.marker_build = _get_marker_build(idx_name)
                old_dbsnp_docs.append(old_doc)
    return old_dbsnp_docs
示例#7
0
    def test_top_hits_sub_aggs(self):
        ''' Test a sub-aggregation of top hits return a PydginDocument/FeatureDocument. '''
        idx1 = PydginTestSettings.IDX['GENE']['indexName']
        idx2 = PydginTestSettings.IDX['PUBLICATION']['indexName']
        idx = idx1 + ',' + idx2
        sub_agg = Agg('idx_top_hits', 'top_hits', {"size": 55, "_source": ['symbol']})
        aggs = Aggs([Agg("idxs", "terms", {"field": "_index"}, sub_agg=sub_agg)])

        res = Search(aggs=aggs, idx=idx, size=0).search()
        top_hits = res.aggs['idxs'].get_docs_in_buckets(obj_document=ElasticSettings.getattr('DOCUMENT_FACTORY'))

        for doc in top_hits[idx1]['docs']:
            self.assertTrue(isinstance(doc, GeneDocument))
            self.assertTrue(isinstance(doc, FeatureDocument))
        for doc in top_hits[idx2]['docs']:
            self.assertTrue(isinstance(doc, PydginDocument))
示例#8
0
def _top_hits(result):
    ''' Return the top hit docs in the aggregation 'idxs'. '''
    top_hits = result.aggs['idxs'].get_docs_in_buckets(obj_document=ElasticSettings.getattr('DOCUMENT_FACTORY'))
    idx_names = list(top_hits.keys())
    for idx in idx_names:
        idx_key = ElasticSettings.get_idx_key_by_name(idx)
        if idx_key.lower() != idx:
            if idx_key.lower() == 'marker':
                top_hits[idx]['doc_count'] = _collapse_marker_docs(top_hits[idx]['docs'])
            elif idx_key.lower() == 'region':
                top_hits[idx]['doc_count'] = _collapse_region_docs(top_hits[idx]['docs'])
            elif idx_key.lower() == 'publication':
                # sort by date
                top_hits[idx]['docs'].sort(key=attrgetter('date'), reverse=True)

            top_hits[idx_key.lower()] = top_hits[idx]
            del top_hits[idx]
    return top_hits
示例#9
0
    def test_create_restore_delete_snapshot(self):
        self.wait_for_running_snapshot()
        snapshot = 'test_' + ElasticSettings.getattr('TEST')
        repo = SnapshotTest.TEST_REPO

        # create a snapshot
        call_command('snapshot',
                     snapshot,
                     indices=IDX['MARKER']['indexName'],
                     repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot)
        self.assertTrue(Snapshot.exists(repo, snapshot),
                        "Created snapshot " + snapshot)
        # snapshot already exist so return false
        self.assertFalse(
            Snapshot.create_snapshot(repo, snapshot,
                                     IDX['MARKER']['indexName']))

        # delete index
        requests.delete(ElasticSettings.url() + '/' +
                        IDX['MARKER']['indexName'])
        self.assertFalse(Search.index_exists(IDX['MARKER']['indexName']),
                         "Removed index")
        # restore from snapshot
        call_command('restore_snapshot',
                     snapshot,
                     repo=repo,
                     indices=IDX['MARKER']['indexName'])
        Search.index_refresh(IDX['MARKER']['indexName'])
        self.assertTrue(Search.index_exists(IDX['MARKER']['indexName']),
                        "Restored index exists")

        # remove snapshot
        call_command('snapshot', snapshot, delete=True, repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot, delete=True, count=10)
        self.assertFalse(Snapshot.exists(repo, snapshot),
                         "Deleted snapshot " + snapshot)
''' Tests for the download module. '''
from django.test import TestCase
from django.core.management import call_command
from data_pipeline.download import HTTPDownload, FTPDownload, MartDownload
from django.utils.six import StringIO
from elastic.elastic_settings import ElasticSettings
import os
import requests
import data_pipeline
from data_pipeline.utils import IniParser
from elastic.search import Search, ElasticQuery
import shutil

IDX_SUFFIX = ElasticSettings.getattr('TEST')
MY_PUB_INI_FILE = os.path.join(os.path.dirname(__file__), IDX_SUFFIX + '_test_publication.ini')
TEST_DATA_DIR = os.path.dirname(data_pipeline.__file__) + '/tests/data'


def setUpModule():
    ''' Change ini config (MY_PUB_INI_FILE) to use the test suffix when
    creating publication pipeline index. '''
    ini_file = os.path.join(os.path.dirname(__file__), 'test_publication.ini')
    if os.path.isfile(MY_PUB_INI_FILE):
        return

    with open(MY_PUB_INI_FILE, 'w') as new_file:
        with open(ini_file) as old_file:
            for line in old_file:
                new_file.write(line.replace('auto_tests', IDX_SUFFIX))

示例#11
0
''' Settings used for the tests. '''
import os
from elastic.elastic_settings import ElasticSettings

SEARCH_BASE_DIR = os.path.dirname(os.path.dirname(__file__))
SEARCH_TEST_DATA_PATH = os.path.join(SEARCH_BASE_DIR, 'tests/data/')
SEARCH_SUFFIX = ElasticSettings.getattr('TEST')
if SEARCH_SUFFIX is None:
    SEARCH_SUFFIX = "test"

NUMBER_OF_SHARDS = 1

IDX = {
    'GENE': {
        'indexName': 'test__gene_' + SEARCH_SUFFIX,
        'indexGene': SEARCH_TEST_DATA_PATH + 'genenames.org.test.txt.gz',
        'shards': NUMBER_OF_SHARDS
    },
    'MARKER': {
        'indexName': 'test__snp_' + SEARCH_SUFFIX,
        'indexSNP': SEARCH_TEST_DATA_PATH + 'dbsnp144_test.vcf.gz',
        'shards': NUMBER_OF_SHARDS
    },
    'MARKER_RS_HISTORY': {
        'indexName': 'test__snp_' + SEARCH_SUFFIX,
        'indexType': 'rs_merge',
        'indexSNPMerge': SEARCH_TEST_DATA_PATH + 'rs_merge_test.gz',
        'shards': NUMBER_OF_SHARDS
    },
    'JSON': {
        'indexName': 'test__json_' + SEARCH_SUFFIX,
from django.core.management import call_command
import os
import data_pipeline
import shutil
from data_pipeline.utils import IniParser
from elastic.elastic_settings import ElasticSettings
import requests
from elastic.search import Search, ElasticQuery
from data_pipeline.helper.gene import Gene
import logging
import json
from elastic.query import Query, TermsFilter

# Get an instance of a logger
logger = logging.getLogger(__name__)
IDX_SUFFIX = ElasticSettings.getattr("TEST")
MY_INI_FILE = os.path.join(os.path.dirname(__file__), IDX_SUFFIX + "_test_download.ini")
TEST_DATA_DIR = os.path.dirname(data_pipeline.__file__) + "/tests/data"


def setUpModule():
    """ Change ini config (MY_INI_FILE) to use the test suffix when
    creating pipeline indices. """
    ini_file = os.path.join(os.path.dirname(__file__), "test_download.ini")
    if os.path.isfile(MY_INI_FILE):
        return

    with open(MY_INI_FILE, "w") as new_file:
        with open(ini_file) as old_file:
            for line in old_file:
                new_file.write(line.replace("auto_tests", IDX_SUFFIX))
示例#13
0
class SnapshotTest(TestCase):
    ''' Test elastic snapshot and restore. '''

    TEST_REPO = 'test_backup_' + ElasticSettings.getattr('TEST')
    TEST_REPO_DIR = ElasticSettings.getattr('TEST_REPO_DIR')

    def test_show(self):
        self.assertTrue(
            Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all',
                          False))
        self.assertTrue(
            Snapshot.show(ElasticSettings.getattr('REPOSITORY'), '_all', True))
        self.assertFalse(Snapshot.show('xyzabc', '_all', False))

    def test_create_delete_repository(self):
        self.wait_for_running_snapshot()
        repo = SnapshotTest.TEST_REPO
        self.assertTrue(Snapshot.exists(repo, ''),
                        'Repository ' + repo + ' created')

        self.assertFalse(
            Snapshot.create_repository(repo, SnapshotTest.TEST_REPO_DIR),
            'Repository already exists.')

        call_command('repository', repo, delete=True)
        self.assertFalse(Snapshot.exists(repo, ''),
                         'Repository ' + repo + ' deleted')
        self.assertFalse(Snapshot.delete_repository(repo),
                         'Repository ' + repo + ' deleted')
        call_command('repository', repo, dir=SnapshotTest.TEST_REPO_DIR)
        self.assertTrue(Snapshot.exists(repo, ''),
                        'Repository ' + repo + ' created')

    def test_create_restore_delete_snapshot(self):
        self.wait_for_running_snapshot()
        snapshot = 'test_' + ElasticSettings.getattr('TEST')
        repo = SnapshotTest.TEST_REPO

        # create a snapshot
        call_command('snapshot',
                     snapshot,
                     indices=IDX['MARKER']['indexName'],
                     repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot)
        self.assertTrue(Snapshot.exists(repo, snapshot),
                        "Created snapshot " + snapshot)
        # snapshot already exist so return false
        self.assertFalse(
            Snapshot.create_snapshot(repo, snapshot,
                                     IDX['MARKER']['indexName']))

        # delete index
        requests.delete(ElasticSettings.url() + '/' +
                        IDX['MARKER']['indexName'])
        self.assertFalse(Search.index_exists(IDX['MARKER']['indexName']),
                         "Removed index")
        # restore from snapshot
        call_command('restore_snapshot',
                     snapshot,
                     repo=repo,
                     indices=IDX['MARKER']['indexName'])
        Search.index_refresh(IDX['MARKER']['indexName'])
        self.assertTrue(Search.index_exists(IDX['MARKER']['indexName']),
                        "Restored index exists")

        # remove snapshot
        call_command('snapshot', snapshot, delete=True, repo=repo)
        Snapshot.wait_for_snapshot(repo, snapshot, delete=True, count=10)
        self.assertFalse(Snapshot.exists(repo, snapshot),
                         "Deleted snapshot " + snapshot)

    def wait_for_running_snapshot(self):
        ''' Wait for a running snapshot to complete. '''
        for _ in range(10):
            if not Snapshot.is_running():
                return
            time.sleep(2)
        logger.warn('Long running snapshot')
        self.assertTrue(False, 'Long running snapshot')