class DataFile(db.Model): __tablename__ = "datafiles" id: str = db.Column(GUID, primary_key=True, default=generate_uuid) name: str = db.Column(db.String(80)) type: str = db.Column(db.String(20)) dataset_version_id: str = db.Column(GUID, db.ForeignKey("dataset_versions.id")) dataset_version: "DatasetVersion" = db.relationship( "DatasetVersion", foreign_keys=[dataset_version_id], backref=db.backref(__tablename__, cascade="all, delete-orphan"), ) # these two columns really belong to VirtualDataFile, but SQLAlchemy seems to have some problems with # self-referencing FKs. Moved here to get it to work, but assume these will be both None unless the type is # VirtualDataFile. Also underlying_data_file_id is always None. Use underlying_data_file.id instead if you want the ID underlying_data_file_id = db.Column(GUID, db.ForeignKey("datafiles.id")) underlying_data_file = db.relationship("DataFile", remote_side=[id]) __table_args__ = ( UniqueConstraint("dataset_version_id", "name"), CheckConstraint( "(type = 'virtual' and underlying_data_file_id is not null) or (type = 's3') or (type = 'gcs')", name="typedcheck", ), ) __mapper_args__ = { "polymorphic_on": type, "polymorphic_identity": "abstract" }
class Activity(db.Model): class ActivityType(enum.Enum): created = "Created" changed_name = "Changed name" changed_description = "Changed Description" added_version = "Added version" started_log = "Log started" __tablename__ = "activities" id: str = db.Column(GUID, primary_key=True, default=generate_uuid) user_id: str = db.Column(GUID, db.ForeignKey("users.id")) user: User = db.relationship("User", backref=__tablename__) dataset_id: str = db.Column(GUID, db.ForeignKey("datasets.id")) dataset: Dataset = db.relationship("Dataset", backref=__tablename__) # We would want the type of change and the comments associated type: ActivityType = db.Column(db.Enum(ActivityType), nullable=False) timestamp: datetime.datetime = db.Column(db.DateTime, default=datetime.datetime.utcnow) comments: str = db.Column(db.Text) __mapper_args__ = {"polymorphic_on": type}
class User(db.Model): __tablename__ = "users" id: str = db.Column(GUID, primary_key=True, default=generate_uuid) name: str = db.Column(db.String(80), unique=True) email: str = db.Column(db.TEXT) token: str = db.Column(db.String(50), unique=True, default=generate_str_uuid) home_folder_id: str = db.Column(GUID, db.ForeignKey("folders.id")) home_folder: "Folder" = db.relationship("Folder", foreign_keys="User.home_folder_id", backref="home_user") trash_folder_id: str = db.Column(GUID, db.ForeignKey("folders.id")) trash_folder: "Folder" = db.relationship( "Folder", foreign_keys="User.trash_folder_id", backref="trash_user") figshare_personal_token: str = db.Column(db.String(128), nullable=True) def __str__(self): return "name: {}, home_folder: {}, trash_folder: {}".format( self.name, self.home_folder.name, self.trash_folder.name)
class DatasetSubscription(db.Model): __tablename__ = "dataset_subscriptions" id: str = db.Column(GUID, primary_key=True, default=generate_uuid) user: User = db.relationship("User", backref=__tablename__) user_id: str = db.Column(GUID, db.ForeignKey("users.id")) dataset: Dataset = db.relationship("Dataset", backref=__tablename__) dataset_id: str = db.Column(GUID, db.ForeignKey("datasets.id"))
class Entry(db.Model): __tablename__ = "entries" id: str = db.Column(GUID, primary_key=True, default=generate_uuid) name: str = db.Column(db.Text, nullable=False) type: str = db.Column(db.String(50)) creation_date: datetime.datetime = db.Column( db.DateTime, default=datetime.datetime.utcnow) creator_id: str = db.Column(GUID, db.ForeignKey("users.id")) creator: User = db.relationship("User", foreign_keys="Entry.creator_id", backref=__tablename__) description: str = db.Column(db.Text) __mapper_args__ = { "polymorphic_identity": classmethod.__class__.__name__, "polymorphic_on": type, "with_polymorphic": "*", } def __str__(self): return "Entry name: {}".format(self.name)
class UploadSession(db.Model): __tablename__ = "upload_sessions" id: str = db.Column(GUID, primary_key=True, default=generate_uuid) user_id: str = db.Column(GUID, db.ForeignKey("users.id")) user: User = db.relationship("User", backref=__tablename__)
class FigshareDatasetVersionLink(ThirdPartyDatasetVersionLink): __tablename__ = "figshare_dataset_version_links" id: str = db.Column(GUID, db.ForeignKey("third_party_dataset_version_links.id"), primary_key=True) figshare_article_id: int = db.Column(db.Integer, nullable=False) figshare_article_version: int = db.Column(db.Integer, nullable=False) dataset_version_id: str = db.Column(GUID, db.ForeignKey("dataset_versions.id")) dataset_version: DatasetVersion = db.relationship( "DatasetVersion", backref=backref("figshare_dataset_version_link", uselist=False), ) figshare_datafile_links: List["FigshareDataFileLink"] = db.relationship( "FigshareDataFileLink") __mapper_args__ = {"polymorphic_identity": "figshare"}
class UserLog(db.Model): __tablename__ = "user_logs" id: str = db.Column(GUID, primary_key=True, default=generate_uuid) user_id: str = db.Column(GUID, db.ForeignKey("users.id")) user: User = db.relationship("User", foreign_keys="UserLog.user_id", backref="user") entry_id: str = db.Column(GUID, db.ForeignKey("entries.id")) entry: "Entry" = db.relationship("Entry", foreign_keys="UserLog.entry_id", backref="entry") # TODO: Setup the constraint of only having one (user, entry) row last_access: datetime.datetime = db.Column( db.DateTime, default=datetime.datetime.utcnow)
class ProvenanceEdge(db.Model): __tablename__ = "provenance_edges" edge_id = db.Column(GUID, primary_key=True, default=generate_uuid) from_node_id = db.Column(GUID, db.ForeignKey("provenance_nodes.node_id")) from_node = db.relationship( "ProvenanceNode", foreign_keys="ProvenanceEdge.from_node_id", backref="from_edges", ) to_node_id = db.Column(GUID, db.ForeignKey("provenance_nodes.node_id")) to_node = db.relationship("ProvenanceNode", foreign_keys="ProvenanceEdge.to_node_id", backref="to_edges") label = db.Column(db.Text)
class ProvenanceNode(db.Model): __tablename__ = "provenance_nodes" class NodeType(enum.Enum): Dataset = "dataset" External = "external" Process = "process" node_id = db.Column(GUID, primary_key=True, default=generate_uuid) graph_id = db.Column(GUID, db.ForeignKey("provenance_graphs.graph_id")) graph = db.relationship("ProvenanceGraph", backref=__tablename__) datafile_id = db.Column(GUID, db.ForeignKey("datafiles.id"), nullable=True) datafile = db.relationship("DataFile", backref=__tablename__) label = db.Column(db.Text) type = db.Column(db.Enum(NodeType))
class UploadSessionFile(db.Model): __tablename__ = "upload_session_files" id: str = db.Column(GUID, primary_key=True, default=generate_uuid) session_id: str = db.Column(GUID, db.ForeignKey("upload_sessions.id")) session: UploadSession = db.relationship("UploadSession", backref=__tablename__) # filename submitted by user filename: str = db.Column(db.Text) encoding: str = db.Column(db.Text) initial_filetype: InitialFileType = db.Column(db.Enum(InitialFileType)) initial_s3_key: str = db.Column(db.Text) converted_filetype: S3DataFile.DataFileFormat = db.Column( db.Enum(S3DataFile.DataFileFormat)) converted_s3_key: str = db.Column(db.Text) compressed_s3_key: str = db.Column(db.Text) s3_bucket: str = db.Column(db.Text) gcs_path: str = db.Column(db.Text) generation_id: str = db.Column(db.Text) short_summary: str = db.Column(db.Text) long_summary: str = db.Column(db.Text) column_types_as_json: Dict[str, str] = db.Column(db.JSON) original_file_sha256: str = db.Column(db.Text) original_file_md5: str = db.Column(db.Text) data_file_id: str = db.Column(GUID, db.ForeignKey("datafiles.id")) data_file: DataFile = db.relationship( "DataFile", uselist=False, foreign_keys="UploadSessionFile.data_file_id")
class Group(db.Model): __tablename__ = "groups" id: str = db.Column(db.INTEGER, primary_key=True, autoincrement=True) name: str = db.Column(db.String(80)) users: List[User] = db.relationship(User.__name__, secondary=group_user_association_table, backref=__tablename__) def __repr__(self): return "Group {}".format(self.name)
class Dataset(Entry): __tablename__ = "datasets" id = db.Column(GUID, db.ForeignKey("entries.id"), primary_key=True) # TODO: Use the name/key of the dataset and add behind the uuid? permanames: List["DatasetPermaname"] = db.relationship("DatasetPermaname") @property def permaname(self) -> str: if len(self.permanames) > 0: return max(self.permanames, key=lambda permaname: permaname.creation_date).permaname return "" __mapper_args__ = {"polymorphic_identity": "Dataset"}
class ProvenanceGraph(db.Model): __tablename__ = "provenance_graphs" graph_id = db.Column(GUID, primary_key=True, default=generate_uuid) permaname = db.Column(GUID, unique=True, default=generate_uuid) name = db.Column(db.Text) created_by_user_id = db.Column(GUID, db.ForeignKey("users.id"), nullable=True) user = db.relationship("User", backref=__tablename__) created_timestamp = db.Column(db.DateTime, default=datetime.datetime.utcnow)
class ThirdPartyDatasetVersionLink(db.Model): __tablename__ = "third_party_dataset_version_links" id: str = db.Column(GUID, primary_key=True, default=generate_uuid) type: str = db.Column(db.String(50)) creator_id: str = db.Column(GUID, db.ForeignKey("users.id")) creator: User = db.relationship( "User", foreign_keys="ThirdPartyDatasetVersionLink.creator_id", backref=__tablename__, ) __mapper_args__ = { "polymorphic_on": type, "polymorphic_identity": "abstract" }
class Folder(Entry): # Enum Folder types # TODO: Could be a good idea to transform these enums into Classes. So they can have different behaviors if needed class FolderType(enum.Enum): home = "home" trash = "trash" folder = "folder" __tablename__ = "folders" # TODO: Instead of using a string 'entry.id', can we use Entry.id? id: str = db.Column(GUID, db.ForeignKey("entries.id"), primary_key=True) folder_type: FolderType = db.Column(db.Enum(FolderType)) # TODO: This should be a set, not a list. entries: List[Entry] = db.relationship( "Entry", secondary=folder_entry_association_table, backref="parents") __mapper_args__ = {"polymorphic_identity": "Folder"} def __repr__(self): return "Folder name: {} and id: {}".format(self.name, self.id)
class DatasetVersion(Entry): class DatasetVersionState(enum.Enum): approved = "Approved" deprecated = "Deprecated" deleted = "Deleted" # TODO: Missing the permaname of the DatasetVersion __tablename__ = "dataset_versions" id: str = db.Column(GUID, db.ForeignKey("entries.id"), primary_key=True) dataset_id: str = db.Column(GUID, db.ForeignKey("datasets.id")) dataset: Dataset = db.relationship( "Dataset", foreign_keys=[dataset_id], backref=db.backref(__tablename__), single_parent=True, cascade="all, delete-orphan", ) # Filled out by the server version: int = db.Column(db.Integer) # State of the version state: DatasetVersionState = db.Column( db.Enum(DatasetVersionState), default=DatasetVersionState.approved) # Reason for the state of the version. Should be empty if approved reason_state: str = db.Column(db.Text) changes_description: str = db.Column(db.Text) __table_args__ = (UniqueConstraint("dataset_id", "version"), ) # TODO: See how to manage the status (persist.py) __mapper_args__ = {"polymorphic_identity": "DatasetVersion"}