def test_get_simbad_objects(self): '''Test to see if retrieval of SIMBAD objects method behaves as expected''' from SIMBAD import get_simbad_data identifiers = ["3133169", "1575544"] mockdata = { "data": [[1575544, "NAME ANDROMEDA", "NAME ANDROMEDA"], [3133169, "NAME LMC", "NAME LMC"]] } QUERY_URL = self.app.config.get('OBJECTS_SIMBAD_TAP_URL') httpretty.register_uri(httpretty.POST, QUERY_URL, content_type='application/json', status=200, body='%s' % json.dumps(mockdata)) result = get_simbad_data(identifiers, 'identifiers') expected = { 'data': { u'3133169': { 'id': '3133169', 'canonical': u'LMC' }, u'1575544': { 'id': '1575544', 'canonical': u'ANDROMEDA' } } } self.assertEqual(result, expected)
def post(self): stime = time.time() # Get the supplied list of identifiers identifiers = [] input_type = None # determine whether a source for the data was specified try: source = request.json['source'].lower() except: source = 'simbad' # We only deal with SIMBAD or NED as source if source not in ['simbad','ned']: current_app.logger.error('Unsupported source for object data specified: %s'%source) return {"Error": "Unable to get results!", "Error Info": "Unsupported source for object data specified: %s"%source}, 200 for itype in ['identifiers', 'objects']: try: identifiers = request.json[itype] identifiers = map(str, identifiers) input_type = itype except: pass if not input_type: current_app.logger.error('No identifiers and objects were specified for SIMBAD object query') return {"Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body"}, 200 # We should either have a list of identifiers or a list of object names if len(identifiers) == 0: current_app.logger.error('No identifiers or objects were specified for SIMBAD object query') return {"Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body"}, 200 # We have a known object data source and a list of identifiers. Let's start! # We have identifiers if source == 'simbad': result = get_simbad_data(identifiers, input_type) else: if input_type == 'identifiers': input_type = 'simple' result = get_ned_data(identifiers, input_type) if 'Error' in result: # An error was returned! err_msg = result['Error Info'] current_app.logger.error('Failed to find data for %s %s query (%s)!'%(source.upper(), input_type,err_msg)) return result else: # We have results! duration = time.time() - stime current_app.logger.info('Found objects for %s %s in %s user seconds.' % (source.upper(), input_type, duration)) # Now pick the entries in the results that correspond with the original object names if input_type == 'objects': # result['data'] = {k: result['data'].get(k.upper()) for k in identifiers} result['data'] = {k: result['data'].get(k) or result['data'].get(k.upper()) for k in identifiers} # Send back the results return result.get('data',{})
def get_object_data(identifiers, service): if service == 'simbad': object_data = get_simbad_data(identifiers, 'objects') elif service == 'ned': object_data = get_ned_data(identifiers, 'objects') else: object_data = { 'Error': 'Unable to get object data', 'Error Info': 'Do not have method to get object data for this service: {0}'. format(service) } return object_data
def post(self): stime = time.time() # Get the supplied list of identifiers identifiers = [] objects = [] facets = [] input_type = None for itype in ['identifiers', 'objects', 'facets']: try: identifiers = request.json[itype] identifiers = map(str, identifiers) input_type = itype except: pass if not input_type: current_app.logger.error( 'No identifiers and objects were specified for SIMBAD object query' ) return { "Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body" }, 200 # We should either have a list of identifiers, a list of object names or a list of facets if len(identifiers) == 0 and len(objects) == 0 and len(facets) == 0: current_app.logger.error( 'No identifiers, objects or facets were specified for SIMBAD object query' ) return { "Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body" }, 200 # How many iden identifiers do we have? id_num = len(identifiers) if id_num == 0: return { "Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body" }, 200 # Source to query source = 'simbad' # Now check if we have anything cached for them cached = { id: current_app.cache.get(id.upper()) for id in identifiers if current_app.cache.get(id.upper()) } if source in ['simbad', 'all'] and len(identifiers) > 0: # If we have cached values, filter those out from the initial list if cached: current_app.logger.debug( 'Received %s %s. Using %s entries from cache.' % (id_num, input_type, len(cached))) identifiers = [ id for id in identifiers if not current_app.cache.get(id.upper()) ] if identifiers: ident_upper = [i.upper() for i in identifiers] # We have identifiers, not found in the cache result = get_simbad_data(identifiers, input_type) if 'Error' in result: # An error was returned! current_app.logger.error( 'Failed to find data for SIMBAD %s query!' % input_type) return result else: # We have results! duration = time.time() - stime current_app.logger.info( 'Found objects for SIMBAD %s in %s user seconds.' % (input_type, duration)) # Before returning results, cache them for ident, value in result['data'].items(): current_app.cache.set(ident.upper(), value, timeout=current_app.config.get( 'OBJECTS_CACHE_TIMEOUT')) # Now pick the entries in the results that correspond with the original object names if input_type == 'objects': result['data'] = { k: result['data'].get(k.upper()) for k in identifiers } # If we had results from cache, merge these in if cached: res = cached.copy() res.update(result.get('data', {})) return res # Otherwise just send back the results else: return result.get('data', {}) elif cached: # We only had cached results return cached else: # This should never happen current_app.logger.error( 'No data found, even though we had %s! Should never happen!' % input_type) result = { "Error": "Failed to find data for SIMBAD %s query!" % input_type, "Error Info": "No results found, where results were expected! Needs attention!" } return result
def post(self): stime = time.time() # Get the supplied list of identifiers identifiers = [] query = None itype = None name2id = {} try: query = request.json['query'] input_type = 'query' except: current_app.logger.error( 'No query was specified for SIMBAD object search') return { "Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body" }, 200 # If we get the request from BBB, the value of 'query' is actually an array if isinstance(query, list): try: solr_query = query[0] except: solr_query = '' else: solr_query = query current_app.logger.info('Received SIMBAD object query: %s' % solr_query) new_query = solr_query.replace('object:', 'simbid:') # If we receive a (Solr) query string, we need to parse out the object names try: identifiers = get_objects_from_query_string(solr_query) except: current_app.logger.error( 'Parsing the identifiers out of the query string blew up!') return { "Error": "Unable to get results!", "Error Info": "No objects found in query string" }, 200 identifiers = [ iden for iden in identifiers if iden.lower() not in ['object', ':'] ] # How many object names did we fid? id_num = len(identifiers) # Keep a list with the object names we found identifiers_orig = identifiers # If we did not find any object names, there is nothing to do! if id_num == 0: return { "Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body" }, 200 # Source to query source = 'simbad' if source in ['simbad', 'all'] and len(identifiers) > 0: if identifiers: for ident in identifiers: result = get_simbad_data([ident], 'objects') if 'Error' in result: # An error was returned! current_app.logger.error( 'Failed to find data for SIMBAD %s query!' % input_type) return result try: SIMBADid = [ e.get('id', 0) for e in result['data'].values() ][0] except: SIMBADid = '0' name2id[ident] = SIMBADid for oname in identifiers: try: SIMBADid = name2id.get(oname) except: SIMBADid = '0' new_query = new_query.replace(oname, SIMBADid) return {"query": new_query} else: # This should never happen current_app.logger.error( 'No data found, even though we had %s! Should never happen!' % input_type) result = { "Error": "Failed to find data for SIMBAD %s query!" % input_type, "Error Info": "No results found, where results were expected! Needs attention!" } return result
class QuerySearch(Resource): """Given a Solr query with object names, return a Solr query with SIMBAD identifiers""" scopes = [] rate_limit = [1000, 60 * 60 * 24] decorators = [advertise('scopes', 'rate_limit')] def post(self): stime = time.time() # Get the supplied list of identifiers identifiers = [] query = None itype = None name2id = {} try: query = request.json['query'] input_type = 'query' except: current_app.logger.error( 'No query was specified for the object search') return { "Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body" }, 200 # If we get the request from BBB, the value of 'query' is actually an array if isinstance(query, list): solr_query = query[0] else: solr_query = query current_app.logger.info('Received object query: %s' % solr_query) # This query will be split up into two components: a SIMBAD and a NED object query simbad_query = solr_query.replace('object:', 'simbid:') ned_query = solr_query.replace('object:', 'nedid:') # Check if an explicit target service was specified try: target = request.json['target'] except: target = 'all' # If we receive a (Solr) query string, we need to parse out the object names try: identifiers = get_objects_from_query_string(solr_query) except Exception, err: current_app.logger.error( 'Parsing the identifiers out of the query string blew up!') return { "Error": "Unable to get results!", "Error Info": "Parsing the identifiers out of the query string blew up! (%s)" % str(err) }, 200 identifiers = [ iden for iden in identifiers if iden.lower() not in ['object', ':'] ] # How many object names did we fid? id_num = len(identifiers) # Keep a list with the object names we found identifiers_orig = identifiers # If we did not find any object names, there is nothing to do! if id_num == 0: return { "Error": "Unable to get results!", "Error Info": "No identifiers/objects found in Solr object query" }, 200 # Get translations simbad_query = '' ned_query = '' translated_query = '' if target.lower() in ['simbad', 'all']: name2simbid = {} for ident in identifiers: result = get_simbad_data([ident], 'objects') if 'Error' in result or 'data' not in result: # An error was returned! current_app.logger.error( 'Failed to find data for SIMBAD object {0}!: {1}'. format(ident, result.get('Error Info', 'NA'))) name2simbid[ident] = 0 continue try: SIMBADid = [ e.get('id', 0) for e in result['data'].values() ][0] except: SIMBADid = "0" name2simbid[ident] = SIMBADid simbad_query = translate_query(solr_query, identifiers, name2simbid, 'simbid:') if target.lower() in ['ned', 'all']: name2nedid = {} for ident in identifiers: result = get_ned_data([ident], 'objects') if 'Error' in result or 'data' not in result: # An error was returned! current_app.logger.error( 'Failed to find data for NED object {0}!: {1}'.format( ident, result.get('Error Info', 'NA'))) name2nedid[ident] = 0 continue try: NEDid = [e.get('id', 0) for e in result['data'].values()][0] except: NEDid = 0 name2nedid[ident] = str(NEDid) ned_query = translate_query(solr_query, identifiers, name2nedid, 'nedid:') if simbad_query and ned_query: translated_query = '({0}) OR ({1})'.format(simbad_query, ned_query) elif simbad_query: translated_query = simbad_query elif ned_query: translated_query = ned_query else: translated_query = 'simbid:0' return {'query': translated_query}
def post(self): stime = time.time() # Get the supplied list of identifiers identifiers = [] objects = [] facets = [] input_type = None for itype in ['identifiers', 'objects', 'facets']: try: identifiers = request.json[itype] identifiers = map(str, identifiers) input_type = itype except: pass if not input_type: current_app.logger.error('No identifiers and objects were specified for SIMBAD object query') return {"Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body"}, 200 # We should either have a list of identifiers, a list of object names or a list of facets if len(identifiers) == 0 and len(objects) == 0 and len(facets) == 0: current_app.logger.error('No identifiers, objects or facets were specified for SIMBAD object query') return {"Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body"}, 200 # How many iden identifiers do we have? id_num = len(identifiers) if id_num == 0: return {"Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body"}, 200 # Source to query source = 'simbad' # Now check if we have anything cached for them cached = {id:current_app.cache.get(id.upper()) for id in identifiers if current_app.cache.get(id.upper())} if source in ['simbad','all'] and len(identifiers) > 0: # If we have cached values, filter those out from the initial list if cached: current_app.logger.debug('Received %s %s. Using %s entries from cache.' % (id_num, input_type, len(cached))) identifiers = [id for id in identifiers if not current_app.cache.get(id.upper())] if identifiers: ident_upper = [i.upper() for i in identifiers] # We have identifiers, not found in the cache result = get_simbad_data(identifiers, input_type) if 'Error' in result: # An error was returned! current_app.logger.error('Failed to find data for SIMBAD %s query!'%input_type) return result else: # We have results! duration = time.time() - stime current_app.logger.info('Found objects for SIMBAD %s in %s user seconds.' % (input_type, duration)) # Before returning results, cache them for ident, value in result['data'].items(): current_app.cache.set(ident.upper(), value, timeout=current_app.config.get('OBJECTS_CACHE_TIMEOUT')) # Now pick the entries in the results that correspond with the original object names if input_type == 'objects': result['data'] = {k: result['data'].get(k.upper()) for k in identifiers} # If we had results from cache, merge these in if cached: res = cached.copy() res.update(result.get('data',{})) return res # Otherwise just send back the results else: return result.get('data',{}) elif cached: # We only had cached results return cached else: # This should never happen current_app.logger.error('No data found, even though we had %s! Should never happen!'%input_type) result = { "Error": "Failed to find data for SIMBAD %s query!"%input_type, "Error Info": "No results found, where results were expected! Needs attention!" } return result
def post(self): stime = time.time() # Get the supplied list of identifiers identifiers = [] query = None itype = None name2id = {} try: query = request.json['query'] input_type = 'query' except: current_app.logger.error('No query was specified for SIMBAD object search') return {"Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body"}, 200 # If we get the request from BBB, the value of 'query' is actually an array if isinstance(query, list): try: solr_query = query[0] except: solr_query = '' else: solr_query = query current_app.logger.info('Received SIMBAD object query: %s'%solr_query) new_query = solr_query.replace('object:','simbid:') # If we receive a (Solr) query string, we need to parse out the object names try: identifiers = get_objects_from_query_string(solr_query) except: current_app.logger.error('Parsing the identifiers out of the query string blew up!') return {"Error": "Unable to get results!", "Error Info": "No objects found in query string"}, 200 identifiers = [iden for iden in identifiers if iden.lower() not in ['object',':']] # How many object names did we fid? id_num = len(identifiers) # Keep a list with the object names we found identifiers_orig = identifiers # If we did not find any object names, there is nothing to do! if id_num == 0: return {"Error": "Unable to get results!", "Error Info": "No identifiers/objects found in POST body"}, 200 # Source to query source = 'simbad' if source in ['simbad','all'] and len(identifiers) > 0: if identifiers: for ident in identifiers: result = get_simbad_data([ident], 'objects') if 'Error' in result: # An error was returned! current_app.logger.error('Failed to find data for SIMBAD %s query!'%input_type) return result try: SIMBADid =[e.get('id',0) for e in result['data'].values()][0] except: SIMBADid = '0' name2id[ident] = SIMBADid for oname in identifiers: try: SIMBADid = name2id.get(oname) except: SIMBADid = '0' new_query = new_query.replace(oname, SIMBADid) return {"query": new_query} else: # This should never happen current_app.logger.error('No data found, even though we had %s! Should never happen!'%input_type) result = { "Error": "Failed to find data for SIMBAD %s query!"%input_type, "Error Info": "No results found, where results were expected! Needs attention!" } return result