def get_by_id_untrusted(session, user_id, project_string_id, file_id, directory_id=None): """ Even if we trust the directory (ie from project default), still have to check the file is in it! TODO make user_id and proejct_string_id optional (ie if directory is supplied?) This needs work but is used in a ton of places so review carefully! Plus want to keep as "one function" so we don't have a bunch of random checks """ from shared.database.project import Project from shared.database.source_control.working_dir import WorkingDirFileLink if not directory_id: project = Project.get(session, project_string_id) # start_time = time.time() working_dir = project.directory_default directory_id = working_dir.id working_dir_sub_query = session.query(WorkingDirFileLink).filter( WorkingDirFileLink.working_dir_id == directory_id).subquery('working_dir_sub_query') file = session.query(File).filter( File.id == working_dir_sub_query.c.file_id, File.id == file_id).first() # end_time = time.time() # print("File access time", end_time - start_time) return file
def register_user(user_data, session): """ :param user_data: Example user_data={ 'username': '******', 'email': '*****@*****.**', 'password': '******', 'project_string_id': 'myproject' } :return: """ # User registration password_hash = hashing_functions.make_password_hash( user_data['email'], user_data['password']) # TODO could insert logic here to attach a user to a project based on say the sigup code # ie signup_code.something? username = user_data['username'] user_email = user_data['email'] new_user = User( email = user_email, password_hash = password_hash, username = username, api_enabled_builder = True, api_enabled_trainer = True, security_email_verified = True, last_builder_or_trainer_mode = 'builder', permissions_general = {'general': ['normal_user']} ) new_user.permissions_projects = {} # I don't like having this here but alternative of committing object seems worse session.add(new_user) if 'project_string_id' in user_data: new_user.current_project_string_id = user_data['project_string_id'] project = Project.get(session, user_data['project_string_id']) if project is not None: new_user.projects.append(project) new_user_working_dir = WorkingDir.new_user_working_dir( session, None, project, new_user ) permission_result, permission_error = Project_permissions.add(user_data['project_string_id'], new_user, user_data['project_string_id']) return new_user
def has_project_permissions_for_export(export, project_string_id, session): log = regular_log.default() project = Project.get(session, project_string_id) # Theory is that if a user has access to project # They have access to download from project if export.project_id == project.id: log['error'][ 'project_permissions'] = 'Permission error, invalid project export match' return log
def generate_mask_by_project_id(project_string_id): # TODO use a thread, this is a long running process semantic_segmentation_data_prep = Semantic_segmentation_data_prep() with sessionMaker.session_scope() as session: project = Project.get(session, project_string_id) type = "joint" # type = "binary" semantic_segmentation_data_prep.generate_mask_core( session, project, type) return "ok", 200, {'ContentType': 'application/json'}
def tag_view_by_project(project_string_id): with sessionMaker.session_scope() as session: project = Project.get_project(session, project_string_id) if project is None: return jsonify(success=False), 400, { 'ContentType': 'application/json' } tag_list = project.serialize_tag_list_PUBLIC() out = jsonify(success=True, tag_list=tag_list) return out, 200, {'ContentType': 'application/json'}
def create_label_file(label_file_data, session): label_file = File() label_file.label = label_file_data.get('label') label_file.label_id = label_file_data.get('label').id label_file.project_id = label_file_data['project_id'] label_file.state = label_file_data.get('state', 'added') label_file.type = 'label' session.add(label_file) regular_methods.commit_with_rollback(session) project = Project.get_by_id(session, label_file.project_id) if project: WorkingDirFileLink.add(session, project.directory_default_id, label_file) project.refresh_label_dict(session) session.add(label_file) regular_methods.commit_with_rollback(session) return label_file
def by_project_core(project_string_id: str, Roles: list, apis_project_list: list = [], apis_user_list: list = []): with sessionMaker.session_scope() as session: if not project_string_id or project_string_id == "null" or project_string_id == "undefined": raise Forbidden(default_denied_message) if request.authorization is not None: result = API_Permissions.by_project( session=session, project_string_id=project_string_id, Roles=Roles) if result is not True: raise Forbidden(default_denied_message + " API Permissions") # At the moment auth doesn't actually # get project as it has all results stored... # not clear that we would need a None check here # given its checked in auth? project = Project.get(session, project_string_id) if project is None: raise Forbidden(default_denied_message + " Can't find project") # Project APIs, maybe should role this into API_Permissions check_all_apis(project=project, apis_required_list=apis_project_list) return True result = Project_permissions.check_permissions( session=session, apis_project_list=apis_project_list, apis_user_list=apis_user_list, project_string_id=project_string_id, Roles=Roles) if result is True: return True else: raise Forbidden(default_denied_message)
def web_build_name_to_file_id_dict(project_string_id): """ Given we know a label_name, and where we are working, return the label_file_id Arguments: project_string_id, integer working_dir_id, integer Returns: dict of label files or None / failure case """ log = {"success": False, "errors": []} directory_id = request.headers.get('directory_id', None) if directory_id is None: log["errors"].append("'directory_id' not supplied") return jsonify(log), 200 with sessionMaker.session_scope() as session: project = Project.get(session, project_string_id) verify_result = WorkingDir.verify_directory_in_project( session, project, directory_id) if verify_result is False: log["errors"].append("Invalid directory id") log["success"] = False return jsonify(log=log), 200 name_to_file_id, result = build_name_to_file_id_dict( session=session, directory_id=directory_id) if result == True: log["success"] = True return jsonify(log=log, name_to_file_id=name_to_file_id), 200
def filter_by_project(session, project_string_id, query): project = Project.get(session=session, project_string_id=project_string_id) query = query.filter(Job.project == project) return query
def create_project_with_context(context_data, session): """ This function will create mock data for a project and all the necessary context for any unit testing to be performed. context_data will be a dictionary with all the context for creating the project. The idea is that you'll be able to specify users, labels, tasks, etc... and the function will make sure all the data is mocked properly. Example: For my test I need a project with 2 users, 1 admin and another with view permissions. I also need 3 labels. The context_data should look something like this: { 'project_name': 'My test project', 'users': [ {'name': 'john', permissions: 'admin'} {'name': 'maria', permissions: 'view'} }, 'labels': [ {'name': 'catlabel', 'type': 'box'} {'name': 'dogabel2', 'type': 'box'} ] The function will return a similar data structure with the ID's on the test database for further querying inside the test cases. :param context_data: :return: """ random_name = get_random_string(8) project_string_id = context_data.get('project_string_id', random_name) project_name = context_data.get('project_name', random_name) default_project_limit = 10 user = register_user( {'username': '******'.format(project_string_id), 'email': 'test{}@test.com'.format(project_string_id), 'password': '******'}, session ) member = Member(kind = 'human') session.add(member) session.flush() user.member = member project = Project.new( session = session, name = project_name, project_string_id = project_string_id, goal = 'Test stuff', member_created = None, user = user ) user_list = [] for user in context_data['users']: if user.get('project_string_id') is None: user['project_string_id'] = random_name new_user = register_user(user, session) member = Member(kind = 'human') session.add(member) session.flush() new_user.member = member new_user.member_id = member.id session.add(new_user) user_list.append(new_user) regular_methods.commit_with_rollback(session) return { 'project': project, 'users': user_list }
def __fetch_object(self, opts): """ Upload a file to Diffgram from an Azure Blob :param opts: Dictionary with parameters for object fetching. :return: file obj if file was uploaded, else False """ spec_list = [{'bucket_name': str, 'path': str}] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input = opts, spec_list = spec_list, log = log) if len(log["error"].keys()) >= 1: return {'log': log} shared_access_signature = BlobSharedAccessSignature( account_name = self.connection_client.account_name, account_key = self.connection_client.credential.account_key ) expiration_offset = 40368000 blob_name = opts['path'] container = opts['bucket_name'] added_seconds = datetime.timedelta(0, expiration_offset) expiry_time = datetime.datetime.utcnow() + added_seconds filename = blob_name.split("/")[-1] sas = shared_access_signature.generate_blob( container_name = container, blob_name = blob_name, start = datetime.datetime.utcnow(), expiry = expiry_time, permission = BlobSasPermissions(read = True), content_disposition = 'attachment; filename=' + filename, ) sas_url = 'https://{}.blob.core.windows.net/{}/{}?{}'.format( self.connection_client.account_name, container, blob_name, sas ) with sessionMaker.session_scope() as session: project = Project.get_by_string_id(session, self.config_data.get('project_string_id')) member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first() # Deduct Media Type: extension = Path(opts['path']).suffix extension = extension.lower() media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: # TODO: Decide, do we want to raise an exception? or just do nothing? log = regular_log.default() log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = opts['path'] log['opts'] = opts Event.new( session = session, member_id = opts['event_data']['request_user'], kind = 'microsoft_azure_new_import_warning', description = 'Skipped import for {}, invalid file type.'.format(opts['path']), error_log = log, project_id = project.id, member = member, success = False ) return None # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path) created_input = packet.enqueue_packet(self.config_data['project_string_id'], session = session, media_url = sas_url, media_type = media_type, job_id = opts.get('job_id'), batch_id = opts.get('batch_id'), file_name = opts.get('path'), video_split_duration = opts.get('video_split_duration'), directory_id = opts.get('directory_id'), extract_labels_from_batch = True) log = regular_log.default() log['opts'] = opts Event.new( session = session, member_id = opts['event_data']['request_user'], kind = 'microsoft_azure_new_import_success', description = 'New cloud import for {}'.format(opts['path']), error_log = opts, project_id = project.id, member = member, success = True ) return created_input
def __send_export(self, opts): spec_list = [{'project_string_id': dict}] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input = self.config_data, spec_list = spec_list, log = log) if len(log["error"].keys()) >= 1: return {'log': log} spec_list = [ {'path': str}, {"format": { 'default': 'JSON', 'kind': str, 'valid_values_list': ['JSON', 'YAML'] }}, {'export_id': str}, {'bucket_name': str}, ] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input = opts, spec_list = spec_list, log = log, string_len_not_zero = False) if len(log["error"].keys()) >= 1: return {'log': log} if not opts['path'].endswith('/') and opts['path'] != '': log['error']['path'] = 'Path on bucket must be a folder, not a filename.' return log with sessionMaker.session_scope() as session: project = Project.get_by_string_id(session, self.config_data['project_string_id']) member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first() export = session.query(Export).filter(Export.id == opts['export_id']).first() # Check perms and export status. export_check_result = check_export_permissions_and_status(export, self.config_data['project_string_id'], session) if len(export_check_result['error'].keys()) > 1: return export_check_result result = export_view_core( export = export, format = opts['format'], return_type = 'bytes') filename = generate_file_name_from_export(export, session) if opts['path'] != '': key = '{}{}.{}'.format(opts['path'], filename, opts['format'].lower()) else: key = '{}.{}'.format(filename, opts['format'].lower()) file = io.BytesIO(result) blob_client = self.connection_client.get_blob_client(container = opts['bucket_name'], blob = key) content_type = mimetypes.guess_type(filename)[0] my_content_settings = ContentSettings(content_type = content_type) blob_client.upload_blob(file, content_settings = my_content_settings) log = regular_log.default() log['opts'] = opts Event.new( session = session, member_id = opts['event_data']['request_user'], kind = 'microsoft_azure_new_export_success', description = 'New cloud export for {}{}'.format(opts['path'], filename), error_log = opts, member = member, project_id = project.id, success = True ) return {'result': True}
def update_tags(project_string_id): """ Update tags to latest """ have_error = False error_message_list = [] with sessionMaker.session_scope() as session: data = request.get_json(force=True) tag_list = data.get('tag_list', None) # tag_list could be none, ie deleted all tags... #if tag_list is None: #error_message_list.append("tag list is None") #return jsonify(error_message_list), 400, {'ContentType' : 'application/json'} project = Project.get(session, project_string_id) print(tag_list) rebuilt_tag_list = [] for tag in tag_list: if valid_tag(tag): name = tag.lower() #Check if tag with same name already exists #If so can just add that database object to tag_db = session.query(Tag).filter(Tag.name == name).first() if not tag_db: tag_db = Tag() tag_db.name = name tag_db.is_public = project.is_public session.add(tag_db) if tag_db: session.add(tag_db) # TODO handle counts properly ie on tag being removed etc. #tag_db.count += 1 rebuilt_tag_list.append(tag_db) session.add(project) # This handles removing link to tag that's no longer in project project.tag_list = rebuilt_tag_list return json.dumps({'success': True}), 200, { 'ContentType': 'application/json' }
def job_launch_list_core(session, metadata_proposed, output_mode="serialize"): """ Get the job_launch objects based on filters in metadata_proposed. """ meta = default_metadata(metadata_proposed) output_job_launch_list = [] limit_counter = 0 # CAUTION # Multiple "modes", for output and trainer builder, maybe more in future builder_or_trainer_mode = meta['builder_or_trainer']['mode'] # It doesn't really make sense to have this here # Should be part of some other meta data checking or something. if builder_or_trainer_mode not in ["builder", "trainer"]: raise Forbidden("Invalid builder_or_trainer_mode mode.") query = session.query(JobLaunch).join(Job) user = User.get(session) if user.last_builder_or_trainer_mode != builder_or_trainer_mode: raise Forbidden( "Invalid user relation to builder_or_trainer_mode mode.") ### START FILTERS ### if meta["status"]: if meta["status"] != "All": query = query.filter(JobLaunch.status == meta["status"]) if meta["date_from"]: date_from = datetime.datetime.strptime(meta["date_from"], "%Y-%m-%d") date_from = date_from.replace(hour=0, minute=0, second=0, microsecond=0) query = query.filter(JobLaunch.time_created >= date_from) if meta["date_to"]: date_to = datetime.datetime.strptime(meta["date_to"], "%Y-%m-%d") date_to = date_to.replace(hour=0, minute=0, second=0, microsecond=0) query = query.filter(JobLaunch.time_created <= date_to) if meta["job_ids"]: query = query.filter(Job.id.in_(meta["job_ids"])) # Also assumes org is None. # Actually this should be complimentary still if meta["project_string_id"]: project = Project.get_by_string_id( session=session, project_string_id=meta["project_string_id"]) query = query.filter(Job.project_id == project.id) #### END FILTERS ### query = query.order_by(Job.time_created.desc()) query = query.limit(meta["limit"]) query = query.offset(meta["start_index"]) job_launch_list = query.all() if output_mode == "serialize": for job_launch in job_launch_list: serialized = job_launch.serialize_for_list_view(session=session) output_job_launch_list.append(serialized) limit_counter += 1 meta['end_index'] = meta['start_index'] + len(output_job_launch_list) meta['length_current_page'] = len(output_job_launch_list) if limit_counter == 0: meta['no_results_match_meta'] = True return output_job_launch_list, meta
def check_permissions(Roles, project_string_id, session, apis_project_list=None, apis_user_list=None): """ TODO this could use better organization ie project_string_id is None check could be here, but we seem to do it an extra time on line 123 """ if project_string_id is None: # TODO merge None checks from other thing up top here. raise Forbidden("project_string_id is None") if 'allow_anonymous' in Roles: return True project = Project.get(session, project_string_id) if project is None: raise Forbidden(default_denied_message) # Careful here! Just becuase a project is public # Doesn't mean public is allowed acccess to all # routes. ie only admins can delete project # if a project is public we don't need to check a user's apis right? if 'allow_if_project_is_public' in Roles: if project: if project.is_public is True: return True # TODO merge LoggedIn() with getUserID() similar internal logic if LoggedIn() != True: raise Forbidden(default_denied_message) if 'allow_any_logged_in_user' in Roles: # TODO not happy with this name, want more clarity on how this effects other permissions like apis / project etc. return True user = session.query(User).filter(User.id == getUserID()).one() if user.is_super_admin == True: return True if apis_project_list: check_all_apis(project=project, apis_required_list=apis_project_list) if apis_user_list: User_Permissions.general(user=user, apis_user_list=apis_user_list) # This could be slow if a user has a lot of projects? for project, Permissions in user.permissions_projects.items(): if Permissions is None: continue if project_string_id == project: check_role_result = check_roles(Roles, Permissions) if check_role_result is True: return True # Default # Good to have this here so we can call # this function as one line and don't ahve to worry # About returning False (must have returned True eariler) raise Forbidden(default_denied_message)
def __fetch_folder(self, opts): result = [] if self.config_data.get('project_string_id') is None: return {'result': 'error'} paths = opts['path'] if type(paths) != list: paths = [paths] with sessionMaker.session_scope() as session: project = Project.get_by_string_id( session, self.config_data.get('project_string_id')) member = session.query(Member).filter( Member.user_id == opts['event_data']['request_user']).first() for path in paths: blobs = self.connection_client.list_blobs(opts['bucket_name'], prefix=path) for blob in blobs: # Deduct Media Type: if blob.name.endswith('/'): continue blob_expiry = int(time.time() + (60 * 60 * 24 * 30)) signed_url = blob.generate_signed_url( expiration=blob_expiry) extension = Path(blob.path).suffix media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: logging.warn('File: {} must type of: {} {}'.format( blob.name, str(images_allowed_file_names), str(videos_allowed_file_names))) log = regular_log.default() log['error'][ 'invalid_type'] = 'File must type of: {} {}'.format( str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = path log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_warning', description= 'Skipped import for {}, invalid file type.'.format( blob.name), error_log=log, project_id=project.id, member=member, success=False) continue result = [] # TODO: check Input() table for duplicate file? created_input = packet.enqueue_packet( self.config_data['project_string_id'], session=session, media_url=signed_url, media_type=media_type, job_id=opts.get('job_id'), batch_id=opts.get('batch_id'), file_name=path, video_split_duration=opts.get('video_split_duration'), directory_id=opts.get('directory_id'), extract_labels_from_batch=True) log = regular_log.default() log['opts'] = opts Event.new(session=session, member_id=opts['event_data']['request_user'], kind='google_cloud_new_import_success', description='New cloud import for {}'.format( blob.name), error_log=opts, project_id=project.id, member=member, success=True) result.append(created_input) return result
def __fetch_object(self, opts): """Upload a file to diffgram from an S3 bucket :param s3_file_key: path of file to fetch from :return: file obj if file was uploaded, else False """ spec_list = [{'bucket_name': str, 'path': str}] log = regular_log.default() log, input = regular_input.input_check_many(untrusted_input=opts, spec_list=spec_list, log=log) if len(log["error"].keys()) >= 1: return {'log': log} # This might be an issue. Currently not supporting urls with no expiration. Biggest time is 1 week. signed_url = self.connection_client.generate_presigned_url('get_object', Params={'Bucket': opts['bucket_name'], 'Key': opts['path']}, ExpiresIn=3600 * 24 * 6) # 5 Days. with sessionMaker.session_scope() as session: project = Project.get_by_string_id(session, self.config_data.get('project_string_id')) member = session.query(Member).filter(Member.user_id == opts['event_data']['request_user']).first() # Deduct Media Type: extension = Path(opts['path']).suffix extension = extension.lower() media_type = None if extension in images_allowed_file_names: media_type = 'image' elif extension in videos_allowed_file_names: media_type = 'video' else: # TODO: Decide, do we want to raise an exception? or just do nothing? log = regular_log.default() log['error']['invalid_type'] = 'File must type of: {} {}'.format(str(images_allowed_file_names), str(videos_allowed_file_names)) log['error']['file_name'] = opts['path'] log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='aws_s3_new_import_warning', description='Skipped import for {}, invalid file type.'.format(opts['path']), error_log=log, project_id=project.id, member=member, success=False ) return None # print('AAAAA', opts, opts.get('job_id')) # metadata = self.connection_client.head_object(Bucket=self.config_data['bucket_name'], Key=path) created_input = packet.enqueue_packet(self.config_data['project_string_id'], session=session, media_url=signed_url, media_type=media_type, job_id=opts.get('job_id'), video_split_duration=opts.get('video_split_duration'), directory_id=opts.get('directory_id')) log = regular_log.default() log['opts'] = opts Event.new( session=session, member_id=opts['event_data']['request_user'], kind='aws_s3_new_import_success', description='New cloud import for {}'.format(opts['path']), error_log=opts, project_id=project.id, member=member, success=True ) return created_input