Exemplo n.º 1
0
class SBOLOutput(SuperGSLFunction):
    """Generate SBOL document containing the assemblies."""

    name = 'sbol'

    def get_arguments(self):
        return [('filename', str), ('assemblies', list)]

    def get_return_type(self):
        return type(None)

    def sanitize_identifier(self, identifier):
        """Sanitize SuperGSL Identifiers to conform to identifiers in the SBOL spec."""
        bad_chars = '[]~:'
        for c in bad_chars:
            identifier = identifier.replace(c, '_')
        return identifier

    def handle_assembly(self, assembly):
        """Add each assembly to the SBOL Document."""
        self.assembly_count += 1

        label = node.label
        if not label:
            label = 'Assembly%05d' % self.assembly_count

        assembly = ComponentDefinition(label)
        self.sbol_doc.addComponentDefinition(assembly)

        part_components = []
        for part_node in node.parts:
            part = part_node.part

            sanitized_ident = self.sanitize_identifier(part.identifier)

            part_component = ComponentDefinition(sanitized_ident)
            part_component.roles = part.roles
            part_component.sequence = Sequence(sanitized_ident,
                                               str(part.get_sequence().seq))

            part_components.append(part_component)

        assembly.assemblePrimaryStructure(part_components)
        assembly.compile()

    def execute(self, params: dict):
        """Initialize the SBOL Document."""

        setHomespace('http://sbols.org/SuperGSL_Example/')
        Config.setOption('sbol_compliant_uris', True)
        Config.setOption('sbol_typed_uris', True)

        self.sbol_doc = Document()
        self.assembly_count = 0

        assembly_list: AssemblyResultSet = params[0]
        for assembly in assembly_list:
            self.handle_assembly(assembly)

        self.sbol_doc.write('output_sbol.xml')
Exemplo n.º 2
0
def write_sbol(filled_library,
               filled_library_metadata,
               filled_description,
               molecule_type=BIOPAX_DNA,
               part_column="Part Name",
               sequence_column="Sequence",
               description_column="Description (Optional)",
               role_column="Role",
               length_column="length (bp)"):
    """
    the function reads an excel spreadsheet and converts it into valid sbol

    Parameters
    ----------
    filled_library : DATAFRAME
        Dataframe containing the library part data
    filled_metadata : DATAFRAME
        Dataframe containing the metadata
    filled_description : DATAFRAME
        Dataframe containing the description data
    molecule_type : STRING, default = BIOPAX_DNA
        Molecule type
    part_column : STRING, default = "Part Name"
        Name of the dataframe column that contains the part names
    sequence_column : STRING, default = "Sequence"
        Name of the dataframe column that contains the sequence
    description_column : STRING, default = "Description (Optional)"
        Name of the dataframe column that contains the library description
    role_column : STRING, default = "Role"
        Name of the dataframe column that contains the Role
    length_column : STRING, default = "length (bp)"
        Name of the dataframe column that contains the sequence length
    

    Returns
    -------
     doc: SBOL Document
         Document containing all components and sequences
     
     Example
     -------
     Doc = write_sbol(filled_library, filled_library_metadata, filled_description)

    """
    doc = Document()
    Config.setOption('sbol_typed_uris', False)

    #Define SBOL object and components
    #Parts Library
    molecule_type = BIOPAX_DNA  #Change later
    part_column = "Part Name"
    sequence_column = "Sequence"
    description_column = "Description (Optional)"
    role_column = "Role"
    length_column = "length (bp)"

    for index, row in filled_library.iterrows():
        component = ComponentDefinition(row[part_column], molecule_type)
        component.roles = ontology[row[role_column]]
        component.name = row[part_column]
        if not (pd.isnull(row[description_column])):
            component.description = row[description_column]
        doc.addComponentDefinition(component)

        row[sequence_column] = "".join(row[sequence_column].split())
        row[sequence_column] = row[sequence_column].replace(u"\ufeff", "")
        row[sequence_column] = row[sequence_column].lower()
        if len(row[sequence_column]) != row[length_column]:
            logging.warning(
                f"The length of the sequence {row[part_column]} does not coincide with the length in the corresponding column 'length (bp)'"
            )
        sequence = Sequence(f"{row[part_column]}_sequence",
                            row[sequence_column], SBOL_ENCODING_IUPAC)
        doc.addSequence(sequence)
        component.sequences = sequence

    #Metadata
    doc.description = str(filled_description.values)
    doc.name = filled_library_metadata.iloc[0, 1]

    doc.write('SBOL_testcollection.xml')

    return (doc)