Exemplo n.º 1
0
class Samples(db.Document):
    """A single tiff stack or image file.

    This must be a separate collection to facilitate searching across Samples.
    """

    session = db.IntField(required=True)
    position = db.IntField(required=True)
    imaging_params = db.EmbeddedDocumentField(ImagingParameters)
    dimensions = db.EmbeddedDocumentField(Dimensions)
    time_step = db.StringField(max_length=255)
    z_step = db.StringField(max_length=255)

    species = db.StringField(max_length=1000)
    specimen = db.StringField(max_length=1000)
    modality = db.EmbeddedDocumentField(ModalityInformation)

    # location in the ontology
    kinetics = db.StringField(choices=('static', 'dynamic'), required=True)
    spatial_dim = db.StringField(choices=('2d', '3d'), required=True)

    # each sample belongs to an Experiment
    experiment = db.ReferenceField(
        Experiments, required=True, reverse_delete_rule=db.NULLIFY
    )
Exemplo n.º 2
0
class Experiments(db.Document):
    created_by = db.ReferenceField(Users)
    doi = db.StringField(max_length=1000)  # Name/ID field in addtion to this?
    date_collected = (
        db.StringField()
    )  # Date on microscope (date added auto-saved by mongo)
    methods = db.EmbeddedDocumentField(
        Methods
    )  # Each experiment should have the same methods
    copyright = db.StringField()
Exemplo n.º 3
0
class Subsection(db.EmbeddedDocument):
    coordinate_x = db.IntField()
    coordinate_y = db.IntField()
    coordinate_z = db.IntField()
    coordinate_t = db.IntField()

    dimensions = db.EmbeddedDocumentField(Dimensions)

    queued = db.BooleanField()
    annotated = db.BooleanField()
    curated = db.BooleanField()  # Could also be QCd?
Exemplo n.º 4
0
class Crowdsourcing(db.Document):
    """This should describe which samples have been sent to which crowdsourcing companies.
    It should also note what dimensions were used and what area of the original raw image
    it came from (we sometimes crop out areas because theyre at the edge of dish, etc).

    Should we state/force standard dimensions here?
    """

    # Should be connected to individual samples

    platform = db.StringField(choices=('appen', 'anolytics', 'mturk'), required=True)
    submitted_by = db.ReferenceField(Users)

    subsections = db.EmbeddedDocumentField(Subsection)

    split_seed = (
        db.IntField()
    )  # Fed into caliban-toolbox to create train/val/test split
Exemplo n.º 5
0
class Training_Data(db.Document):
    """A collection of pointers to each npz containing paired X(raw) and y(annotations) data."""

    # location in the ontology (the annotation could be different than the raw data
    # e.g. movies vs indpendent imgs)
    kinetics = db.StringField(choices=('static', 'dynamic'), required=True)
    spatial_dim = db.StringField(choices=('2d', '3d'), required=True)
    annotation_type = db.StringField()  # whole cell, cyto, nuc, AM, tracking, dots?

    # ID information
    doi = db.StringField(
        max_length=1000
    )  # DOI may be different than raw (compliation of multiple)
    title = (
        db.StringField()
    )  # Human-readable for display purpose (eg. smith et al. nuclear study)
    copyright = db.StringField()

    # Samples contained or link to crowdsourcing (individual annotated pieces of samples)?
    samples_contained = db.ListField(
        db.ReferenceField(Samples), reverse_delete_rule=db.NULLIFY
    )
    # TODO: Should have coordinates that follow samples and subsamples
    # TODO: Is samples_contained sufficient? Should keys like tissue/platform list be stored here?
    # TODO: does channel_list violate our data ontology? shouldnt it be 1-to-1?
    # TODO: Should this be DNA/Membrane or dsDNA or DAPI or nuc?
    raw_channel_list = db.ListField(db.StringField())
    # TODO: Which samples/platforms exist with which batch? Do we need a one-to-one like that?
    padding = db.BooleanField()
    # TODO: Include size of padding (x and y)

    ann_version = db.StringField()  # TODO: Link this to DVC
    last_modified = db.StringField()
    ann_stats = db.EmbeddedDocumentField(annotation_stats)

    split_train = db.FloatField()  # Percentage of total data in train
    split_val = db.FloatField()
    split_test = db.FloatField()

    raw_dtype = db.StringField()  # TODO: Enumerate as choices
    ann_dtype = db.StringField()

    nas_filepath = db.StringField()  # path to the npz on madrox
    cloud_storage_loc = db.URLField()  # aws address
Exemplo n.º 6
0
class annotation_stats(db.EmbeddedDocument):
    # TODO: include total number of annotations/trajectories/children
    num_batches = db.IntField()  # for 2d this is num imgs, for 3d num movies, etc
    dimensions = db.EmbeddedDocumentField(Dimensions)
    num_ann = db.IntField()
    num_div = db.IntField()