class OptimizedSurfaceReconstructionMeta(models.Model): method = models.CharField(max_length=255) # TODO: Should be choices reconstruction_params = S3FileField() template_reconstruction = S3FileField() shape_model = models.ForeignKey( OptimizedShapeModel, on_delete=models.CASCADE, related_name='+', )
class OptimizedPCAModel(models.Model): shape_model = models.OneToOneField( OptimizedShapeModel, on_delete=models.CASCADE, related_name='pca_model', primary_key=True, ) mean_particles = S3FileField() pca_modes = S3FileField() eigen_spectrum = S3FileField()
class Project(TimeStampedModel, models.Model): file = S3FileField() keywords = models.CharField(max_length=255, blank=True, default='') description = models.TextField(blank=True, default='') dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE, related_name='projects')
class OptimizedSurfaceReconstruction(models.Model): particles = models.OneToOneField( OptimizedParticles, on_delete=models.CASCADE, related_name='surface_reconstruction', primary_key=True, ) sample_reconstruction = S3FileField()
class Approach(models.Model): class Meta: verbose_name_plural = 'approaches' constraints = [ models.UniqueConstraint(fields=['name', 'task', 'team'], name='unique_approaches') ] class ReviewState(models.TextChoices): ACCEPTED = 'accepted', _('Accepted') REJECTED = 'rejected', _('Rejected') class RejectReason(models.TextChoices): BLANK_OR_CORRUPT_MANUSCRIPT = ( 'blank_or_corrupt_manuscript', _('Blank or corrupt manuscript'), ) LOW_QUALITY_MANUSCRIPT = 'low_quality_manuscript', _('Low quality manuscript') RULE_VIOLATION = 'rule_violation', _('Violation of rules') created = models.DateTimeField(default=timezone.now) name = models.CharField(max_length=100) description = models.TextField(blank=True) docker_tag = models.CharField(blank=True, max_length=120) uses_external_data = models.BooleanField(default=False, choices=((True, 'Yes'), (False, 'No'))) manuscript = S3FileField( validators=[FileExtensionValidator(allowed_extensions=['pdf'])], blank=True ) review_assignee = models.ForeignKey( get_user_model(), null=True, blank=True, on_delete=models.DO_NOTHING ) review_state = models.CharField( max_length=8, blank=True, default='', choices=ReviewState.choices ) reject_reason = models.CharField( max_length=27, blank=True, default='', choices=RejectReason.choices ) task = models.ForeignKey(Task, on_delete=models.CASCADE) team = models.ForeignKey(Team, on_delete=models.CASCADE) objects = models.Manager() successful = SuccessfulApproachesManager() def __str__(self): return self.name @property def latest_submission(self): return Submission.objects.filter(approach=self).order_by('-created').first() @property def latest_successful_submission(self) -> Optional[Submission]: return ( Submission.objects.filter(approach=self, status=Submission.Status.SUCCEEDED) .order_by('-created') .first() )
class GroomedMesh(models.Model): # The contents of the nrrd file file = S3FileField() # represent these in raw form? pre_cropping = S3FileField(null=True) pre_alignment = S3FileField(null=True) mesh = models.OneToOneField( Mesh, on_delete=models.CASCADE, primary_key=True, related_name='groomed', ) project = models.ForeignKey(Project, on_delete=models.CASCADE, related_name='groomed_meshes')
class Upload(Task): """An object to track uploads.""" class DataType(models.TextChoices): CSV = 'CSV' D3_JSON = 'D3_JSON' NESTED_JSON = 'NESTED_JSON' NEWICK = 'NEWICK' blob = S3FileField() data_type = models.CharField(max_length=20, choices=DataType.choices)
class RegisteredImage(TimeStampedModel): blob = S3FileField() source_image = models.ForeignKey( Image, on_delete=models.CASCADE, related_name='registered_images', db_index=True, ) atlas = models.ForeignKey(Atlas, on_delete=models.PROTECT, related_name='registered_images') registration_type = models.CharField(max_length=100, default='affine')
class JacobianImage(TimeStampedModel): blob = S3FileField() source_image = models.ForeignKey( Image, on_delete=models.CASCADE, related_name='jacobian_images', db_index=True, ) atlas = models.ForeignKey(Atlas, on_delete=models.PROTECT, related_name='jacobian_images')
class FeatureImage(TimeStampedModel): blob = S3FileField() source_image = models.ForeignKey( Image, on_delete=models.CASCADE, related_name='feature_images', db_index=True, ) atlas = models.ForeignKey(Atlas, on_delete=models.PROTECT, related_name='feature_images') downsample_factor = models.FloatField()
class OptimizedParticles(models.Model): world = S3FileField() local = S3FileField() transform = S3FileField() shape_model = models.ForeignKey( OptimizedShapeModel, on_delete=models.CASCADE, related_name='particles', ) groomed_segmentation = models.ForeignKey( GroomedSegmentation, on_delete=models.CASCADE, related_name='+', blank=True, null=True, ) groomed_mesh = models.ForeignKey( GroomedMesh, on_delete=models.CASCADE, related_name='+', blank=True, null=True, )
class Dataset(TimeStampedModel, models.Model): name = models.CharField(max_length=255, unique=True) file = S3FileField(null=True) # FK to another table? license = models.TextField() description = models.TextField() acknowledgement = models.TextField() keywords = models.CharField(max_length=255, blank=True, default='') # FK to another table? contributors = models.TextField(blank=True, default='') # FK to another table? publications = models.TextField(blank=True, default='')
class Frame(TimeStampedModel, models.Model): class Meta: indexes = [models.Index(fields=['scan', 'frame_number'])] ordering = ['scan', 'frame_number'] id = models.UUIDField(primary_key=True, default=uuid4, editable=False) scan = models.ForeignKey('Scan', related_name='frames', on_delete=models.CASCADE) content = S3FileField(null=True) raw_path = models.CharField(max_length=500, blank=False) frame_number = models.IntegerField(default=0) @property def path(self) -> Path: return Path(self.raw_path) @property def zarr_path(self: Frame) -> Path: return convert_to_store_path(self.path) @property def size(self) -> int: return self.path.stat().st_size @property def experiment(self) -> Experiment: return self.scan.experiment @property def storage_mode(self) -> bool: if settings.S3_SUPPORT: if self.content: return StorageMode.CONTENT_STORAGE elif self.raw_path.startswith('s3://'): return StorageMode.S3_PATH return StorageMode.LOCAL_PATH @property def s3_download_url(self) -> Optional[str]: if self.storage_mode == StorageMode.S3_PATH: bucket, key = self.raw_path.strip()[5:].split('/', maxsplit=1) client = boto3.client('s3') return client.generate_presigned_url('get_object', Params={ 'Bucket': bucket, 'Key': key })
class Image(TimeStampedModel, models.Model): name = models.CharField(max_length=255) blob = S3FileField() checksum = models.CharField(max_length=128, blank=True, null=True) owner = models.ForeignKey(User, on_delete=models.CASCADE) # TimeStampedModel also provides "created" and "modified" fields @property def short_checksum(self) -> Optional[str]: return f'{self.checksum[:10]}' if self.checksum else None def compute_checksum(self) -> None: hasher = sha512() with self.blob.open() as blob: for chunk in blob.chunks(): hasher.update(chunk) self.checksum = hasher.hexdigest()
class Image(TimeStampedModel, models.Model): class Meta: indexes = [models.Index(fields=['dataset'])] ordering = ['name'] name = models.CharField(max_length=255) type = models.CharField(max_length=100, default=ImageType.structural_mri) blob = S3FileField() dataset = models.ForeignKey(Dataset, on_delete=models.CASCADE, related_name='images') patient = models.ForeignKey(Patient, on_delete=models.CASCADE, related_name='images') metadata = MetadataField() @property def size(self) -> int: return self.blob.size
class Submission(models.Model): class Meta: ordering = ['-created'] class Status(models.TextChoices): QUEUED = 'queued', _('Queued for scoring') SCORING = 'scoring', _('Scoring') INTERNAL_FAILURE = 'internal_failure', _('Internal failure') FAILED = 'failed', _('Failed') SUCCEEDED = 'succeeded', _('Succeeded') created = models.DateTimeField(default=timezone.now) creator = models.ForeignKey(get_user_model(), on_delete=models.CASCADE) creator_fingerprint_id = models.CharField(max_length=32, null=True, blank=True) creator_ip = models.GenericIPAddressField(null=True, blank=True) approach = models.ForeignKey('Approach', on_delete=models.CASCADE) accepted_terms = models.BooleanField(default=False) test_prediction_file = S3FileField() status = models.CharField(max_length=20, default=Status.QUEUED, choices=Status.choices) score = models.JSONField(blank=True, null=True) overall_score = models.FloatField(blank=True, null=True) validation_score = models.FloatField(blank=True, null=True) fail_reason = models.TextField(blank=True) objects = DeferredFieldsManager('score') def __str__(self): return f'{self.id}' def get_absolute_url(self): return reverse('submission-detail', args=[self.id]) def reset_scores(self): self.score = None self.overall_score = None self.validation_score = None return self
class Task(models.Model): class Meta: ordering = ['id'] class Type(models.TextChoices): SEGMENTATION = 'segmentation', _('Segmentation') CLASSIFICATION = 'classification', _('Classification') class MetricField(models.TextChoices): BALANCED_ACCURACY = 'balanced_accuracy', _('Balanced Accuracy') AVERAGE_PRECISION = 'ap', _('Average Precision') AUC = 'auc', _('AUC') type = models.CharField(max_length=20, choices=Type.choices) metric_field = models.CharField( max_length=100, choices=MetricField.choices, help_text='Which metric to use for the overall score', ) created = models.DateTimeField(default=timezone.now) challenge = models.ForeignKey(Challenge, on_delete=models.DO_NOTHING, related_name='tasks') name = models.CharField(max_length=100) description = models.TextField() short_description = models.TextField() locked = models.BooleanField( default=True, help_text= 'Whether users are blocked from making and editing approaches and submissions.', ) hidden = models.BooleanField( default=True, help_text='Whether the GUI exposes this task to users.') scores_published = models.BooleanField( default=False, help_text= 'Whether final scores are visible to submitters and the leaderboard is open.', ) max_approaches = models.PositiveSmallIntegerField( verbose_name='Maximum approaches', default=3, help_text= ('The maximum number of approaches a team can make on this task. Set to 0 to disable.' ), ) max_submissions_per_week = models.PositiveSmallIntegerField( verbose_name='Maximum submissions per week', default=10, help_text= ('The maximum number of submissions a team can make to this task per week. ' 'Set to 0 to disable.'), ) requires_manuscript = models.BooleanField( verbose_name='Requires a manuscript', default=True, help_text='Whether approaches should require a manuscript.', ) test_ground_truth_file = S3FileField() # Define custom "objects" first, so it will be the "_default_manager", which is more efficient # for many automatically generated queries objects = SelectRelatedManager('challenge') def __str__(self): return f'{self.challenge.name}: {self.name}' @property def allowed_submission_extension(self): return { self.Type.SEGMENTATION: 'zip', self.Type.CLASSIFICATION: 'csv' }[self.type] def get_absolute_url(self): return reverse('task-detail', args=[self.id]) def pending_or_succeeded_submissions(self, team_or_user) -> QuerySet[Submission]: filters = { 'status__in': [ Submission.Status.QUEUED, Submission.Status.SCORING, Submission.Status.SUCCEEDED, ], 'approach__task': self, } if isinstance(team_or_user, Team): filters['approach__team'] = team_or_user elif isinstance(team_or_user, User): filters['creator'] = team_or_user return Submission.objects.filter(**filters) def next_available_submission(self, team) -> Optional[datetime]: """ Return a datetime of when the next submission can be made. Returns None if the submission can be made now. """ if self.max_submissions_per_week == 0: return None one_week_ago = timezone.now() - timedelta(weeks=1) submissions_in_last_week = ( self.pending_or_succeeded_submissions(team).filter( created__gte=one_week_ago).order_by('created')) if len(submissions_in_last_week) >= self.max_submissions_per_week: oldest_submission_in_last_week = cast( Submission, submissions_in_last_week.first()) return oldest_submission_in_last_week.created + timedelta(weeks=1) else: return None
class ChecksumFile(TimeStampedModel, TaskEventMixin, PermissionPathMixin): """The main class for user-uploaded files. This has support for manually uploading files or specifying a URL to a file (for example in an existing S3 bucket). This broadly supports ``http<s>://`` URLs to file resources as well as ``s3://`` as long as the node the app is running on is provisioned to access that S3 bucket. """ name = models.CharField(max_length=1000, blank=True) description = models.TextField(null=True, blank=True) checksum = models.CharField(max_length=128) # sha512 validate_checksum = models.BooleanField( default=False ) # a flag to validate the checksum against the saved checksum last_validation = models.BooleanField(default=True) collection = models.ForeignKey( Collection, on_delete=models.SET_NULL, related_name='%(class)ss', related_query_name='%(class)ss', null=True, blank=True, ) type = models.IntegerField(choices=FileSourceType.choices, default=FileSourceType.FILE_FIELD) file = S3FileField(null=True, blank=True, upload_to=uuid_prefix_filename) url = models.TextField(null=True, blank=True) task_funcs = ( # tasks.task_checksum_file_post_save, ) permissions_paths = [('collection', Collection)] class Meta: constraints = [ models.CheckConstraint( name='%(app_label)s_%(class)s_file_source_value_matches_type', check=( models.Q( models.Q(type=FileSourceType.FILE_FIELD, file__regex=r'.+') & models.Q(models.Q(url__in=['', None]) | models.Q(url__isnull=True)) ) | models.Q( models.Q(type=FileSourceType.URL) & models.Q(models.Q(url__isnull=False) & models.Q(url__regex=r'.+')) & models.Q(models.Q(file__in=['', None]) | models.Q(file__isnull=True)) ) ), ) ] def get_checksum(self): """Compute a new checksum without saving it.""" if self.type == FileSourceType.FILE_FIELD: return compute_checksum_file(self.file) elif self.type == FileSourceType.URL: return compute_checksum_url(self.url) else: raise NotImplementedError(f'Type ({self.type}) not supported.') def update_checksum(self): self.checksum = self.get_checksum() # Simple update save - not full save super(ChecksumFile, self).save( update_fields=[ 'checksum', ] ) def validate(self): previous = self.checksum self.update_checksum() self.last_validation = self.checksum == previous # Simple update save - not full save super(ChecksumFile, self).save( update_fields=[ 'last_validation', ] ) return self.last_validation def post_save_job(self): if not self.checksum or self.validate_checksum: if self.validate_checksum: self.validate() else: self.update_checksum() # Reset the user flags self.validate_checksum = False # Simple update save - not full save self.save( update_fields=[ 'checksum', 'last_validation', 'validate_checksum', ] ) def save(self, *args, **kwargs): if not self.name: if self.type == FileSourceType.FILE_FIELD and self.file.name: self.name = os.path.basename(self.file.name) elif self.type == FileSourceType.URL: try: with safe_urlopen(self.url) as r: self.name = r.info().get_filename() except (AttributeError, ValueError, URLError): pass if not self.name: # Fallback self.name = os.path.basename(urlparse(self.url).path) # Must save the model with the file before accessing it for the checksum super(ChecksumFile, self).save(*args, **kwargs) def yield_local_path(self, vsi=False): """Create a local path for the file to be accessed. This will first attempt to use httpfs to FUSE mount the file's URL. If FUSE is unavailable, this will fallback to a Virtual File Systems URL (``vsicurl``) if the ``vsi`` option is set. Otherwise, this will download the entire file to local storage. Parameters ---------- vsi : bool If FUSE fails, fallback to a Virtual File Systems URL. See ``get_vsi_path``. This is especially useful if the file is being utilized by GDAL and FUSE is not set up. """ if self.type == FileSourceType.URL and precheck_fuse(self.get_url()): return url_file_to_fuse_path(self.get_url(internal=True)) elif vsi and self.type != FileSourceType.FILE_FIELD: logger.info('`yield_local_path` falling back to Virtual File System URL.') return self.yield_vsi_path(internal=True) # Fallback to loading entire file locally logger.info('`yield_local_path` falling back to downloading entire file to local storage.') if self.type == FileSourceType.FILE_FIELD: return field_file_to_local_path(self.file) elif self.type == FileSourceType.URL: return url_file_to_local_path(self.url) def get_url(self, internal=False): """Get the URL of the stored resource. Parameters ---------- internal : bool In most cases this URL will be accessible from anywhere. In some cases, this URL will only be accessible from within the container. This flag is for use with internal processes to make sure the host is correctly set to ``minio`` when needed. See ``patch_internal_presign`` for more details. """ if self.type == FileSourceType.FILE_FIELD: if internal: with patch_internal_presign(self.file): return self.file.url else: return self.file.url elif self.type == FileSourceType.URL: return self.url def data_link(self): return _link_url(self, 'get_url') data_link.allow_tags = True def get_vsi_path(self, internal=False) -> str: """Return the GDAL Virtual File Systems [0] URL. This currently formulates the `/vsicurl/...` URL [1] for internal and external files. This is assuming that both are read-only. External files can still be from private S3 buckets as long as `self.url` redirects to a presigned S3 URL [1]: > Starting with GDAL 2.1, `/vsicurl/` will try to query directly redirected URLs to Amazon S3 signed URLs during their validity period, so as to minimize round-trips. This URL can be used for both GDAL and Rasterio [2]: > To help developers switch [from GDAL], Rasterio will accept [vsi] identifiers and other format-specific connection strings, too, and dispatch them to the proper format drivers and protocols. `/vsis3/` could be used for... * read/write access * directory listing (for sibling files) ...but is a bit more of a challenge to setup. [2] [0] https://gdal.org/user/virtual_file_systems.html [1] https://gdal.org/user/virtual_file_systems.html#vsicurl-http-https-ftp-files-random-access [2] https://gdal.org/user/virtual_file_systems.html#vsis3-aws-s3-files [3] https://rasterio.readthedocs.io/en/latest/topics/switch.html?highlight=vsis3#dataset-identifiers """ url = self.get_url(internal=internal) if url.startswith('s3://'): s3_path = url.replace('s3://', '') vsi = f'/vsis3/{s3_path}' else: gdal_options = { 'url': url, 'use_head': 'no', 'list_dir': 'no', } vsi = f'/vsicurl?{urlencode(gdal_options)}' logger.info(f'vsi URL: {vsi}') return vsi @contextlib.contextmanager def yield_vsi_path(self, internal=False): """Wrap ``get_vsi_path`` in a context manager.""" yield self.get_vsi_path(internal=internal)
class Atlas(models.Model): class Meta: verbose_name_plural = 'atlases' blob = S3FileField() name = models.CharField(max_length=255)
class File(TimeStampedModel, models.Model): class Meta: indexes = [ models.Index(fields=['folder', 'name']), models.Index( fields=['legacy_file_id'], condition=~models.Q(legacy_file_id=''), name='file_legacy_file_id_idx', ), models.Index( fields=['legacy_item_id'], condition=~models.Q(legacy_item_id=''), name='file_legacy_item_id_idx', ), ] ordering = ['name'] constraints = [ models.UniqueConstraint(fields=['folder', 'name'], name='file_siblings_name_unique'), ] name = models.CharField( max_length=255, validators=[ validators.RegexValidator( regex='/', inverse_match=True, message='Name may not contain forward slashes.', ) ], ) description = models.TextField(max_length=3000, blank=True) size = models.PositiveBigIntegerField() content_type = models.CharField(max_length=255, default='application/octet-stream') blob = S3FileField(blank=True) sha512 = models.CharField(max_length=128, blank=True, default='', db_index=True, editable=False) user_metadata = JSONObjectField() folder = models.ForeignKey(Folder, on_delete=models.CASCADE, related_name='files') # Prevent deletion of User if it has Folders referencing it creator = models.ForeignKey(User, on_delete=models.PROTECT) legacy_file_id = models.CharField(max_length=24, default='', blank=True) legacy_item_id = models.CharField(max_length=24, default='', blank=True) @property def abs_path(self) -> str: """Get a string representation of this File's absolute path.""" return f'{self.folder.abs_path}{self.name}' @property def public(self) -> bool: return self.folder.tree.public @property def short_checksum(self) -> Optional[str]: return self.sha512[:10] if self.sha512 else None def compute_sha512(self) -> None: hasher = hashlib.sha512() with self.blob.open() as blob: for chunk in blob.chunks(): hasher.update(chunk) self.sha512 = hasher.hexdigest() def clean(self) -> None: if self.folder.child_folders.filter(name=self.name).exists(): raise ValidationError( {'name': 'A folder with that name already exists here.'}) super().clean() @classmethod def filter_by_permission( cls, user: User, permission: Permission, queryset: models.QuerySet['File']) -> models.QuerySet['File']: """Filter a queryset according to a user's access. This method uses the tree's filter_by_permission method to create a queryset containing *all* trees with the appropriate permission level. This queryset is used as a subquery to filter the provided queryset by traversing through the folder->tree relationship. """ tree_query = Tree.filter_by_permission(user, permission, Tree.objects).values('pk') return queryset.filter(folder__tree__in=models.Subquery(tree_query)) def has_permission(self, user: User, permission: Permission) -> bool: """Return whether the given user has a specific permission for the file.""" return self.folder.tree.has_permission(user, permission)
class Image(models.Model): file = S3FileField() modality = models.CharField(max_length=255) # choices? subject = models.ForeignKey(Subject, on_delete=models.CASCADE, related_name='images')
class Mesh(models.Model): file = S3FileField() anatomy_type = models.CharField(max_length=255) # choices? subject = models.ForeignKey(Subject, on_delete=models.CASCADE, related_name='meshes')