def delete(self, src_id): ''' Deletes specified source. This will permanently remove this source from the system. USE CAREFULLY! ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: src = find_source(col, src_id) except IndexError: return 'No resource at that URL.', 404 else: try: matrices = src['matrices'] except KeyError: logging.info('No Matrices for source %s on delete', src_id) else: client = db_client() rescol = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) for mat in matrices: mat_id = mat['id'] # this subtree is deleted when the DATALOADER_PATH/src_id gets removed later # shutil.rmtree(os.path.join(DATALOADER_PATH, src_id, mat_id)) try: logging.info('going to remove %s/%s', RESPATH, mat_id) shutil.rmtree(os.path.join(RESPATH, mat_id)) rescol.remove({'src_id':mat_id}) except Exception as ex: logging.error('could not remove matrix results %s while deleting source %s exception:%s', mat_id, src_id, ex) # uses the dataloader opal deletion function try: utils.delete(src) except Exception as ex: err = 'Dataloader opal failed to delete source: %s Exception: %s'%(src,ex) logging.error(err) return err, 500 try: col.remove({'src_id':src_id}) except: return 'Failed to remove source from database', 500 try: shutil.rmtree(os.path.join(DATALOADER_PATH, src_id)) except: return 'Failed to delete source from disk', 500 return 'Deleted Source: %s'%src_id, 204
def get(self, src_id, mat_id): ''' Returns features for the specified matrix. Features are the names for the columns within the matrix. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: matrix = find_matrix(col, src_id, mat_id) except IndexError: return 'No resource at that URL.', 404 except AssertionError: return 'Bad Mongo Query', 500 else: rootdir = matrix['rootdir'] features_filepath = rootdir + 'features.txt' try: with open(features_filepath) as features_file: features = features_file.read().split("\n") features.pop() response = features except IOError: response = [] return response
def post(self, src_id): ''' Generate a matrix from the source stored at that ID. Returns metadata for that matrix. ''' try: posted_data = request.get_json(force=True) client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: src = find_source(col, src_id) except IndexError: return 'No resource at that URL.', 404 error, matricesNew = utils.ingest(posted_data, src) if error: return 'Unable to create matrix.', 406 matrices = [] for each in src['matrices']: matrices.append(each) matrices.extend(matricesNew) col.update({'src_id':src_id}, { '$set': {'matrices': matrices} }) except: tb = traceback.format_exc() return tb, 406 return matricesNew, 201
def get(self): ''' Returns a list of generated matrices. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) explorables = list(col.find({},{"_id":0})) return explorables
def get(self, group_name): ''' Returns a list of sources within a particular group. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) sources = list(col.find({'group_name':group_name},{"_id":0})) return sources
def get(self, src_id): ''' Returns a list of schemas for a particular source. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) src = find_source(col, src_id) if not src: return 'No resource at that URL.', 404 filepath = src['rootdir'] + '/source/' #get filters f_col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME) filters = f_col.find() return utils.explore(src['ingest_id'], filepath, filters)
def get(self, src_id, param1): client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: src = find_source(col, src_id) except IndexError: return 'No resource at that URL.', 404 filepath = src['rootdir'] return utils.custom(src['ingest_id'], filepath, param1=param1, request=request.args)
def post(self, src_id, param1=None, param2=None, param3=None): client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: src = find_source(col, src_id) except IndexError: return 'No resource at that URL.', 404 filepath = src['rootdir'] return utils.custom(src['ingest_id'], filepath, param1=param1, param2=param2, param3=param3, payload=request.get_json())
def get(self): ''' Returns a list of groups available. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) groups = col.aggregate([{"$group":{"_id": "$group_name"}}]) response = [src["_id"] for src in groups] return response
def get(self): ''' Returns a list of available sources. All sources registered in the system will be returned. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) sources = list(col.find({},{"_id":0,"stash":0})) return sources
def get(self): ''' Returns a list of available ingest modules. All ingest modules registered in the system will be returned. If you believe there is an ingest module that exists in the system but is not present here, it is probably not registered in the MongoDB database. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, INGEST_COL_NAME) ingest = list(col.find({},{"_id":0})) return ingest
def post(self, src_id): ''' For streaming, start or end the streaming service. No payload is sent for this request. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: src = find_source(col, src_id) except IndexError: return 'No resource at that URL.', 404 filepath = src['rootdir'] #get filters f_client = db_client() f_col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME) filters = f_col.find() return utils.stream(src['ingest_id'], filepath)
def get(self): ''' Returns a list of available filters. All filters registered in the system will be returned. If you believe there is a filter that exists in the system but is not present here, it is probably not registered in the MongoDB database. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME) cur = list(col.find({},{"_id":0})) return cur
def patch(self, src_id): ''' For streaming, toggles streaming on or off. This request is used in conjunction with the POST request to this same endpoint. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: src = find_source(col, src_id) except IndexError: return 'No resource at that URL.', 404 filepath = src['rootdir'] #get filters f_client = db_client() f_col = db_collection(client, DATALOADER_DB_NAME, FILTERS_COL_NAME) filters = f_col.find() utils.update(src['ingest_id'], filepath) return
def get(self, ingest_id): ''' ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, INGEST_COL_NAME) try: src = col.find_one({'ingest_id':ingest_id},{"_id":0}) except IndexError: return 'No resource at that URL', 401 else: return src
def get(self): ''' Returns a list of all available visualizations. All visualizations registered in the system will be returned. If you believe there is a visualization that exists in the system but is not present here, it is probably not registered in the MongoDB database. ''' vis_options = [] client = db_client() col = db_collection(client, VIS_DB_NAME, VIS_COL_NAME) cur = col.find() for c in cur: response = {key: value for key, value in c.items() if key != '_id'} vis_options.append(response) return vis_options
def delete(self): ''' Deletes all stored sources. This will permanently remove all sources from the system. USE CAREFULLY! ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) #remove the entries in mongo col.remove({}) #remove the actual files for directory in os.listdir(DATALOADER_PATH): file_path = os.path.join(DATALOADER_PATH, directory) shutil.rmtree(file_path) return '', 204
def get(self,src_id,matrix_id,output_file,file_download_name): ''' Downloads the specified matrix file. Returns the specific file indicated by the user. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: matrices = find_source(col, src_id)['matrices'] except IndexError: response = {} # return ('No resource at that URL.', 404) else: for matrix in matrices: if matrix['id'] == matrix_id or matrix['name'] == matrix_id: return send_from_directory(matrix['rootdir'],output_file, as_attachment=True, attachment_filename=file_download_name)
def get(self, src_id): ''' Returns metadata and a list of matrices available for a particular source. src_id can be the UUID or name of the source ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: response = find_source(col, src_id) if response is None: return 'No resource at that URL', 404 except Exception as e: return 'Unexpected error %s'%e, 500 else: return response
def get(self, src_id, mat_id): ''' Returns metadata for the matrix specified. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: matrices = find_source(col, src_id)['matrices'] except IndexError: return 'No resource at that URL.', 404 else: for matrix in matrices: if matrix['id'] == mat_id or matrix['name'] == mat_id: return matrix return 'No resource at that URL.', 404
def post(self): ''' Returns a list of applicable available visualizations. Not all visualizations are applicable for every dataset. This request requires a list of inputs and will return the visualizations options available based on those inputs. ''' data = request.get_json() vis_options = [] client = db_client() col = db_collection(client, VIS_DB_NAME, VIS_COL_NAME) cur = col.find() if len(data) != 1: outputsPersist = [] for res in data: outputsPersist.extend(res['outputs']) else: outputsPersist = data[0]['outputs'] if 'selected_features' in data[0]: outputsPersist.append('selected_features') outputsPersist.append('names') for vis in cur: print vis contains = False outputs = outputsPersist[:] for i in vis['inputs']: if i in outputs: contains = True outputs.pop(outputs.index(i)) else: contains = False break if contains: response = { key: value for key, value in vis.items() if key != '_id' } vis_options.append(response) return vis_options
def get(self, src_id, mat_id): ''' Returns the REAMDME content for the specified matrix. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: matrix = find_matrix(col, src_id, mat_id) except IndexError: return 'No resource at that URL.', 404 except AssertionError: return 'Bad mongo query', 500 else: try: output_path = matrix['rootdir'] + 'output.txt' with open(output_path) as output: text = output.read() return text except: return 'No Output document for %s/%s'%(src_id, mat_id), 404 return 'No resource at that URL.', 404
def delete(self, src_id, mat_id): ''' Deletes specified matrix. This will permanently remove this matrix and any results generated from it from the system. USE CAREFULLY! ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) try: matrices = find_source(col, src_id)['matrices'] except IndexError: return 'No resource at that URL.', 404 else: matrices_new = [] found = False for each in matrices: if each['id'] != mat_id and each['name'] != mat_id: matrices_new.append(each) else: found = True if found: col.update({'src_id':src_id}, { '$set': {'matrices': matrices_new} }) else: return 'No resource at that URL.', 404 shutil.rmtree(DATALOADER_PATH + src_id + '/' + mat_id) col = client[DATALOADER_DB_NAME][RESULTS_COL_NAME] try: col.remove({'src_id':mat_id}) shutil.rmtree(RESPATH + mat_id) except: pass else: return '', 204
def put(self, name, ingest_id, group_name=""): ''' Saves a new resource with a ID. Payload can be either a file or JSON structured configuration data. Returns the metadata for the new source. ''' client = db_client() col = db_collection(client, DATALOADER_DB_NAME, DATALOADER_COL_NAME) # If group_name == 'overwrite' then overwrite with same src_id overwrite = False if group_name == 'overwrite': group_name = "" overwrite = True # Check for an existing source with the same name. Do not overwrite unless specified existing_source = find_source(col, name) if existing_source is not None and not overwrite: logging.warn("Source Already Exists: {}".format(existing_source['src_id'])) existing_source['error'] = 1 existing_source['msg'] = "Source Already Exists" return existing_source try: if existing_source: src_id = existing_source['src_id'] if overwrite: col.delete_one({"src_id":src_id}) file_path = '/'.join([DATALOADER_PATH,src_id]) shutil.rmtree(file_path) else: src_id = utils.getNewId() t = utils.getCurrentTime() conn_info = request.get_json() # conn_info = request.get_json(force=True) filepath = None if conn_info == None: file = request.files['file'] ext = re.split('\.', file.filename)[1] if not ext in ALLOWED_EXTENSIONS: print("WARN: File submitted %s is not of a supported filetype".format(file.filename)) # return ('This filetype is not supported.', 415) if 'zip' in file.filename: src_type = 'zip' else: src_type = 'file' rootpath, filepath = write_source_file(DATALOADER_PATH, src_id, file) else: src_type = 'conf' rootpath, filepath = write_source_config(DATALOADER_PATH, src_id, conn_info) rootpath = DATALOADER_PATH + src_id + '/' source = Source(name, rootpath, src_id, src_type, t, ingest_id, group_name, filepath=filepath) source_insert_response = col.insert_one(source.dict()) if (source_insert_response == False): logging.error("Source Insert Failed") tb = traceback.format_exc() return tb, 406 response = col.find_one({'_id':source_insert_response.inserted_id},{"_id":0}) except: tb = traceback.format_exc() return tb, 406 return response, 201