def search(self, message): curs = None if message.cursor: curs = search.Cursor(web_safe_string=message.cursor) else: curs = search.Cursor() q = json.loads(message.q) # logging.info('Q %s' % q) keywords = ' '.join([x for x in q['keywords'] if x]) sort = message.sort if 'distance' in keywords: sort = None limit = message.limit # logging.info('Portal Search keywords=%s\nVersion: %s' # % (keywords, RECORD_VERSION)) # logging.info('REQUEST LATLON %s\nVersion: %s' # % (self.cityLatLong, RECORD_VERSION) ) result = vnsearch.query(keywords, limit, sort=sort, curs=curs) if len(result) == 4: recs, cursor, count, query_version = result # Build json for search counts res_counts = vnutil.search_resource_counts(recs) if not message.cursor: type = 'query' else: type = 'query-view' apitracker_params = dict( api_version=None, count=len(recs), download=None, downloader=None, error=None, latlon=self.cityLatLong, matching_records=count, query=keywords, query_version=query_version, request_source='SearchPortal', response_records=len(recs), res_counts=json.dumps(res_counts), type=type) taskqueue.add(url='/apitracker', params=apitracker_params, queue_name="apitracker") else: error = result[0].__class__.__name__ apitracker_params = dict( api_version=None, count=0, download=None, downloader=None, error=error, latlon=self.cityLatLong, matching_records=0, query=keywords, query_version=query_version, request_source='SearchPortal', response_records=0, res_counts=json.dumps({}), type='query') taskqueue.add(url='/apitracker', params=apitracker_params, queue_name="apitracker") response = RecordList(error=unicode(error)) return response if cursor: cursor = cursor.web_safe_string items = [RecordPayload(id=x['keyname'], json=json.dumps(x)) \ for x in recs if x] response = RecordList(items=items, cursor=cursor, count=count) return response
def post(self): q, email, name, latlon = map(self.request.get, ['q', 'email', 'name', 'latlon']) q = json.loads(q) writable_file_name = self.request.get('writable_file_name') filename = self.request.get('filename') cursor = self.request.get('cursor') large_file = True if self.request.get('large_file')=='True' else False if cursor: curs = search.Cursor(web_safe_string=cursor) else: curs = None logging.info('CURSOR %s' % curs) # Write chunk. max_retries = 10 retry_count = 0 success = False while not success and retry_count < max_retries: try: with files.open(writable_file_name, 'a') as f: records, next_cursor, count = vnsearch.query(q, 100, curs=curs) if not curs: params = dict(query=q, type='download', count=count, downloader=email, download=filename, latlon=latlon) taskqueue.add(url='/apitracker', params=params, queue_name="apitracker") chunk = '%s\n' % _get_tsv_chunk(records) f.write(chunk) f.close(finalize=False) success = True except Exception, e: logging.error("I/O error writing chunk to FILE: %s for\nQUERY: %s" % (filename,q) ) retry_count += 1 raise e
def search(self, message): curs = None if message.cursor: curs = search.Cursor(web_safe_string=message.cursor) else: curs = search.Cursor() q = json.loads(message.q) logging.info('Q %s' % q) keywords = ' '.join([x for x in q['keywords'] if x]) sort = message.sort if 'distance' in keywords: sort = None limit = message.limit logging.info('keywords=%s, limit=%s, sort=%s, curs=%s' % (keywords, limit, sort, curs)) logging.info('REQUEST LATLON %s' % self.cityLatLong) result = vnsearch.query(keywords, limit, sort=sort, curs=curs) if len(result) == 3: recs, cursor, count = result # Build json for search counts res_counts = {} for i in recs: dwca = i['url'] if dwca not in res_counts: res_counts[dwca] = 1 else: res_counts[dwca] += 1 logging.info("RESOURCE COUNTS: %s" % res_counts) if not message.cursor: type = 'query' query_count = count else: type = 'query-view' query_count = limit params = dict(query=keywords, type=type, count=query_count, latlon=self.cityLatLong, res_counts=json.dumps(res_counts)) taskqueue.add(url='/apitracker', params=params, queue_name="apitracker") else: error = result[0].__class__.__name__ params = dict(error=error, query=keywords, type='query', latlon=self.cityLatLong) taskqueue.add(url='/apitracker', params=params, queue_name="apitracker") response = RecordList(error=unicode(error)) return response if cursor: cursor = cursor.web_safe_string items = [RecordPayload(id=x['keyname'], json=json.dumps(x)) \ for x in recs if x] response = RecordList(items=items, cursor=cursor, count=count) return response
def get(self): count, keywords, email, name, download = map(self.request.get, ['count', 'keywords', 'email', 'name', 'writable_file_name']) logging.info(' . '.join([count, keywords, email, name, download])) q = ' '.join(json.loads(keywords)) count = int(count) latlon = self.request.headers.get('X-AppEngine-CityLatLong') if count <= 1000: self._queue(q, email, name, latlon, 'False') fname = str('%s.txt' % name) self.response.headers['Content-Type'] = "text/tab-separated-values" self.response.headers['Content-Disposition'] = "attachment; filename=%s" % fname records, cursor, count = vnsearch.query(q, count) params = dict(query=q, type='download', count=count, downloader=email, download=download, latlon=latlon) taskqueue.add(url='/apitracker', params=params, queue_name="apitracker") data = '%s\n%s' % (util.DWC_HEADER, _get_tsv_chunk(records)) self.response.out.write(data) else: self._queue(q, email, name, latlon, 'True')
def get(self): # logging.info('API search request: %s\nVersion: %s' % (self.request, API_VERSION) ) request = json.loads(self.request.get('q')) q, c, limit = map(request.get, ['q', 'c', 'l']) # Set the limit to 400 by default. This value is based on the results # of substantial performance testing. if not limit: limit = 400 if limit > 1000: # 1000 is the maximum value allowed by Google. limit = 1000 if limit < 0: limit = 1 curs = None if c: curs = search.Cursor(web_safe_string=c) else: curs = search.Cursor() result = vnsearch.query(q, limit, 'dwc', sort=None, curs=curs) response = None if len(result) == 4: recs, cursor, count, query_version = result if not c: type = 'query' query_count = count else: type = 'query-view' query_count = limit if cursor: cursor = cursor.web_safe_string # If count > 10,000, do not return the actual value of count # because it will be unreliable. Extensive testing revealed that # even for relatively small queries (>10,000 but <30,000 records), # it can be in error by one or more orders of magnitude. if count > 10000: count = '>10000' d = datetime.utcnow() # Process dynamicProperties JSON formatting for r in recs: if r.has_key('dynamicproperties'): r['dynamicproperties'] = vnutil.format_json( r['dynamicproperties']) response = json.dumps( dict(recs=recs, cursor=cursor, matching_records=count, limit=limit, response_records=len(recs), api_version=API_VERSION, query_version=query_version, request_date=d.isoformat(), request_origin=self.cityLatLong, submitted_query=q)) # logging.info('API search recs: %s\nVersion: %s' % (recs, API_VERSION) ) res_counts = vnutil.search_resource_counts(recs) params = dict(api_version=API_VERSION, count=len(recs), latlon=self.cityLatLong, matching_records=count, query=q, query_version=query_version, request_source='SearchAPI', response_records=len(recs), res_counts=json.dumps(res_counts), type=type) taskqueue.add(url='/apitracker', params=params, queue_name="apitracker") else: error = result[0].__class__.__name__ params = dict(error=error, query=q, type='query', latlon=self.cityLatLong) taskqueue.add(url='/apitracker', params=params, queue_name="apitracker") self.response.clear() message = 'Please try again. Error: %s' % error self.response.set_status(500, message=message) response = message self.response.out.headers['Content-Type'] = 'application/json' self.response.headers['charset'] = 'utf-8' self.response.out.write(response)
def get(self): s = 'API Version: %s' % API_VERSION s += '\nAPI search request: %s' % self.request logging.info(s) request = json.loads(self.request.get('q')) q, c, limit = map(request.get, ['q', 'c', 'l']) # Set the limit to 400 by default. This value is based on the results # of substantial performance testing. if not limit: limit = 400 if limit > 1000: # 1000 is the maximum value allowed by Google. limit = 1000 if limit < 0: limit = 1 curs = None if c: curs = search.Cursor(web_safe_string=c) else: curs = search.Cursor() result = vnsearch.query(q, limit, 'dwc', sort=None, curs=curs) response = None if len(result) == 4: recs, cursor, count, query_version = result if not c: type = 'query' query_count = count else: type = 'query-view' query_count = limit if cursor: cursor = cursor.web_safe_string # If count > 10,000, do not return the actual value of count # because it will be unreliable. Extensive testing revealed that # even for relatively small queries (>10,000 but <30,000 records), # it can be in error by one or more orders of magnitude. if count > 10000: count = '>10000' d=datetime.utcnow() # Process dynamicProperties JSON formatting for r in recs: if r.has_key('dynamicproperties'): r['dynamicproperties']=vnutil.format_json(r['dynamicproperties']) response = json.dumps(dict(recs=recs, cursor=cursor, matching_records=count, limit=limit, response_records=len(recs), api_version=API_VERSION, query_version=query_version, request_date=d.isoformat(), request_origin=self.cityLatLong, submitted_query=q)) # logging.info('API search recs: %s\nVersion: %s' % (recs, API_VERSION) ) res_counts = vnutil.search_resource_counts(recs) params = dict(api_version=API_VERSION, count=len(recs), latlon=self.cityLatLong, matching_records=count, query=q, query_version=query_version, request_source='SearchAPI', response_records=len(recs), res_counts=json.dumps(res_counts), type=type ) taskqueue.add(url='/apitracker', params=params, queue_name="apitracker") else: error = result[0].__class__.__name__ params = dict(error=error, query=q, type='query', latlon=self.cityLatLong) taskqueue.add(url='/apitracker', params=params, queue_name="apitracker") self.response.clear() message='Please try again. Error: %s' % error self.response.set_status(500, message=message) response = message self.response.out.headers['Content-Type'] = 'application/json' self.response.headers['charset'] = 'utf-8' self.response.out.write(response)
def get(self, message): """Return a RecordList.""" recs, cursor, count = vnsearch.query('id:%s' % message.id, 1) return RecordPayload(id=message.id, json=json.dumps(recs[0]))
def post(self): q, email, name, latlon = map(self.request.get, ['q', 'email', 'name', 'latlon']) q = json.loads(q) requesttime = self.request.get('requesttime') filepattern = self.request.get('filepattern') fileindex = int(self.request.get('fileindex')) reccount = int(self.request.get('reccount')) fromapi=self.request.get('fromapi') source=self.request.get('source') filename = '/%s/%s-%s.%s' % (TEMP_BUCKET, filepattern, fileindex, FILE_EXTENSION) cursor = self.request.get('cursor') try: total_res_counts = json.loads(self.request.get('res_counts')) except: total_res_counts = {} if cursor: curs = search.Cursor(web_safe_string=cursor) else: curs = None # Write single chunk to file, GCS does not support append records, next_cursor, count, query_version = \ vnsearch.query(q, SEARCH_CHUNK_SIZE, curs=curs) this_record_count = len(records) # Build dict for search counts res_counts = vnutil.search_resource_counts(records, total_res_counts) # Now merge the two dictionaries, summing counts if total_res_counts is None or len(total_res_counts)==0: total_res_counts=res_counts else: for r in res_counts: try: count = total_res_counts[r] total_res_counts[r]=count+res_counts[r] except: total_res_counts[r]=res_counts[r] # Update the total number of records retrieved reccount = reccount+this_record_count # Make a chunk to write to a file chunk = '%s\n' % _get_tsv_chunk(records) # Cleanup records in attempt to conserve memory records = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() if fileindex==0 and not next_cursor: # This is a query with fewer than SEARCH_CHUNK_SIZE results filename = '/%s/%s.%s' % (TEMP_BUCKET, filepattern, FILE_EXTENSION) max_retries = 2 retry_count = 0 success = False while not success and retry_count < max_retries: try: with gcs.open(filename, 'w', content_type='text/tab-separated-values', options={'x-goog-acl': 'public-read'}) as f: if fileindex==0: f.write('%s\n' % vnutil.download_header()) f.write(chunk) success = True # Cleanup chunk in attempt to conserve memory chunk = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # logging.info('Download chunk saved to %s: Total %s records. Has next \ #cursor: %s \nVersion: %s' # % (filename, reccount, not next_cursor is None, DOWNLOAD_VERSION)) except Exception, e: logging.error("Error writing chunk to FILE: %s for\nQUERY: %s \ Error: %s\nVersion: %s" % (filename, q, e, DOWNLOAD_VERSION) ) retry_count += 1
def get(self): count, keywords, email, name = map(self.request.get, ['count', 'keywords', 'email', 'name']) q = ' '.join(json.loads(keywords)) latlon = self.request.headers.get('X-AppEngine-CityLatLong') fromapi = self.request.get('api') countonly = self.request.get('countonly') # Force count to be an integer # count is a limit on the number of records to download count=int(str(count)) source='DownloadPortal' if fromapi is not None and len(fromapi)>0: source='DownloadAPI' # Try to send an indicator to the browser if it came from one. body = '' if countonly is not None and len(countonly)>0: body = 'Counting results:<br>' source = 'CountAPI' else: body = 'Downloading results:<br>' if email is None or len(email)==0 or email=='None': body += 'ERROR: You must provide an email address.' else: body += 'File name: %s<br>' % name body += 'Email: %s<br>' % email body += 'Keywords: %s<br>' % keywords body += 'X-AppEngine-CityLatLong: %s<br>' % latlon body += 'Source: %s<br>' % source body += 'API: %s<br>' % fromapi body += 'len(API): %s<br>' % len(fromapi) body += 'Request headers: %s<br>' % self.request.headers self.response.out.write(body) logging.info('API download request. API: %s Source: %s Count: %s \ Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' % (fromapi, source, count, keywords, email, name, latlon, DOWNLOAD_VERSION) ) if email is None or len(email)==0: return else: logging.info('Portal download request. API: %s Source: %s Count: %s \ Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' % (fromapi, source, count, keywords, email, name, latlon, DOWNLOAD_VERSION) ) if count==0 or count > SEARCH_CHUNK_SIZE: # The results are larger than SEARCH_CHUNK_SIZE, compose a file for download self._queue(q, email, name, latlon, fromapi, source, countonly) else: # The results are smaller than SEARCH_CHUNK_SIZE, download directly and make # a copy of the file in the download bucket filename = str('%s.txt' % name) self.response.headers['Content-Type'] = "text/tab-separated-values" self.response.headers['Content-Disposition'] = "attachment; filename=%s" \ % filename records, cursor, count, query_version = vnsearch.query(q, count) record_count = len(records) # logging.debug('%s: RECORDS: %s' % (DOWNLOAD_VERSION, records)) # Build dictionary for search counts res_counts = vnutil.search_resource_counts(records) # Write the header for the output file data = '%s\n%s' % (vnutil.download_header(), _get_tsv_chunk(records)) # Cleanup records in attempt to conserve memory records = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # Write the data self.response.out.write(data) # Write single chunk to file in DOWNLOAD_BUCKET filepattern = '%s-%s' % (name, uuid.uuid4().hex) filename = '/%s/%s.%s' % (DOWNLOAD_BUCKET, filepattern, FILE_EXTENSION) # Parameters for the coming apitracker taskqueue apitracker_params = dict( api_version=fromapi, count=record_count, download=filename, downloader=email, error=None, latlon=latlon, matching_records=record_count, query=q, query_version=query_version, request_source=source, response_records=record_count, res_counts=json.dumps(res_counts), type='download') max_retries = 2 retry_count = 0 success = False while not success and retry_count < max_retries: try: with gcs.open(filename, 'w', content_type='text/tab-separated-values', options={'x-goog-acl': 'public-read'}) as f: f.write(data) success = True # Cleanup data in attempt to conserve memory data = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # logging.info('Sending small res_counts to apitracker: %s' # % res_counts ) taskqueue.add(url='/apitracker', params=apitracker_params, queue_name="apitracker") except Exception, e: logging.error("Error writing small result set to %s.\nError: %s \n\ Version: %s" % (filename,e,DOWNLOAD_VERSION) ) retry_count += 1
def post(self): q, email, name, latlon = map(self.request.get, ['q', 'email', 'name', 'latlon']) q = json.loads(q) requesttime = self.request.get('requesttime') filepattern = self.request.get('filepattern') fileindex = int(self.request.get('fileindex')) reccount = int(self.request.get('reccount')) fromapi = self.request.get('fromapi') source = self.request.get('source') filename = '/%s/%s-%s.%s' % (TEMP_BUCKET, filepattern, fileindex, FILE_EXTENSION) cursor = self.request.get('cursor') try: total_res_counts = json.loads(self.request.get('res_counts')) except: total_res_counts = {} if cursor: curs = search.Cursor(web_safe_string=cursor) else: curs = None # Write single chunk to file, GCS does not support append records, next_cursor, count, query_version = \ vnsearch.query(q, SEARCH_CHUNK_SIZE, curs=curs) this_record_count = len(records) # Build dict for search counts res_counts = vnutil.search_resource_counts(records, total_res_counts) # Now merge the two dictionaries, summing counts if total_res_counts is None or len(total_res_counts) == 0: total_res_counts = res_counts else: for r in res_counts: try: count = total_res_counts[r] total_res_counts[r] = count + res_counts[r] except: total_res_counts[r] = res_counts[r] # Update the total number of records retrieved reccount = reccount + this_record_count # Make a chunk to write to a file chunk = '%s\n' % _get_tsv_chunk(records) # Cleanup records in attempt to conserve memory records = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() if fileindex == 0 and not next_cursor: # This is a query with fewer than SEARCH_CHUNK_SIZE results filename = '/%s/%s.%s' % (TEMP_BUCKET, filepattern, FILE_EXTENSION) max_retries = 2 retry_count = 0 success = False while not success and retry_count < max_retries: try: with gcs.open(filename, 'w', content_type='text/tab-separated-values', options={'x-goog-acl': 'public-read'}) as f: if fileindex == 0: f.write('%s\n' % vnutil.download_header()) f.write(chunk) success = True # Cleanup chunk in attempt to conserve memory chunk = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # logging.info('Download chunk saved to %s: Total %s records. Has next \ #cursor: %s \nVersion: %s' # % (filename, reccount, not next_cursor is None, DOWNLOAD_VERSION)) except Exception, e: logging.error("Error writing chunk to FILE: %s for\nQUERY: %s \ Error: %s\nVersion: %s" % (filename, q, e, DOWNLOAD_VERSION)) retry_count += 1
def get(self): count, keywords, email, name = map( self.request.get, ['count', 'keywords', 'email', 'name']) q = ' '.join(json.loads(keywords)) latlon = self.request.headers.get('X-AppEngine-CityLatLong') fromapi = self.request.get('api') countonly = self.request.get('countonly') # Force count to be an integer # count is a limit on the number of records to download count = int(str(count)) source = 'DownloadPortal' if fromapi is not None and len(fromapi) > 0: source = 'DownloadAPI' # Try to send an indicator to the browser if it came from one. body = '' if countonly is not None and len(countonly) > 0: body = 'Counting results:<br>' source = 'CountAPI' else: body = 'Downloading results:<br>' if email is None or len(email) == 0 or email == 'None': body += 'ERROR: You must provide an email address.' else: body += 'File name: %s<br>' % name body += 'Email: %s<br>' % email body += 'Keywords: %s<br>' % keywords body += 'X-AppEngine-CityLatLong: %s<br>' % latlon body += 'Source: %s<br>' % source body += 'API: %s<br>' % fromapi body += 'len(API): %s<br>' % len(fromapi) body += 'Request headers: %s<br>' % self.request.headers self.response.out.write(body) logging.info( 'API download request. API: %s Source: %s Count: %s Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' % (fromapi, source, count, keywords, email, name, latlon, DOWNLOAD_VERSION)) if email is None or len(email) == 0 or email == '*****@*****.**': logging.info( 'Ignoring download request from email: %s. Version: %s' % (email, DOWNLOAD_VERSION)) return else: logging.info( 'Portal download request. API: %s Source: %s Count: %s Keywords: %s Email: %s Name: %s LatLon: %s\nVersion: %s' % (fromapi, source, count, keywords, email, name, latlon, DOWNLOAD_VERSION)) if count == 0 or count > SEARCH_CHUNK_SIZE: # The results are larger than SEARCH_CHUNK_SIZE, compose a file for download self._queue(q, email, name, latlon, fromapi, source, countonly) else: # The results are smaller than SEARCH_CHUNK_SIZE, download directly and make # a copy of the file in the download bucket filename = str('%s.txt' % name) self.response.headers['Content-Type'] = "text/tab-separated-values" self.response.headers['Content-Disposition'] = "attachment; filename=%s" \ % filename records, cursor, count, query_version = vnsearch.query(q, count) record_count = len(records) # logging.debug('%s: RECORDS: %s' % (DOWNLOAD_VERSION, records)) # Build dictionary for search counts res_counts = vnutil.search_resource_counts(records) # Write the header for the output file data = '%s\n%s' % (vnutil.download_header(), _get_tsv_chunk(records)) # Cleanup records in attempt to conserve memory records = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # Write the data self.response.out.write(data) # Write single chunk to file in DOWNLOAD_BUCKET filepattern = '%s-%s' % (name, uuid.uuid4().hex) filename = '/%s/%s.%s' % (DOWNLOAD_BUCKET, filepattern, FILE_EXTENSION) # Parameters for the coming apitracker taskqueue apitracker_params = dict(api_version=fromapi, count=record_count, download=filename, downloader=email, error=None, latlon=latlon, matching_records=record_count, query=q, query_version=query_version, request_source=source, response_records=record_count, res_counts=json.dumps(res_counts), type='download') max_retries = 2 retry_count = 0 success = False while not success and retry_count < max_retries: try: with gcs.open(filename, 'w', content_type='text/tab-separated-values', options={'x-goog-acl': 'public-read'}) as f: f.write(data) success = True # Cleanup data in attempt to conserve memory data = None # Attempt to keep memory usage at a minimum by garbage collecting gc.collect() # logging.info('Sending small res_counts to apitracker: %s' # % res_counts ) taskqueue.add(url='/apitracker', params=apitracker_params, queue_name="apitracker") except Exception, e: logging.error( "Error writing small result set to %s.\nError: %s \n\ Version: %s" % (filename, e, DOWNLOAD_VERSION)) retry_count += 1
def get(self, message): """Return a RecordList.""" recs, cursor, count, version = vnsearch.query('id:%s' % message.id, 1) return RecordPayload(id=message.id, json=json.dumps(recs[0]))