Exemplo n.º 1
0
def main():
    image = Image.open('b2.png')
    st.image(image, width=200)
    st.title("Trisakti Bioinformatics Application")
    st.title("Powered by Python")

    menu = ["Intro", "DNA Sequence", "DotPlot", "About"]
    choice = st.sidebar.selectbox("Select Activity", menu)

    if choice == "Intro":
        st.subheader("Intro to BioInformatics")
        image = Image.open('dna.png')
        st.image(image, width=800)

        st.subheader("Bioinformatics")
        st.subheader(
            "Bioinformatika adalah ilmu yang mempelajari penerapan teknik komputasional untuk mengelola dan menganalisis informasi biologis. Bidang ini mencakup penerapan metode matematika, statistika, dan informatika untuk memecahkan masalah-masalah biologis, terutama dengan menggunakan sekuens DNA dan asam amino serta informasi yang berkaitan dengannya. Contoh topik utama bidang ini meliputi basis data untuk mengelola informasi biologis, penyejajaran sekuens (sequence alignment), prediksi struktur untuk meramalkan bentuk struktur protein maupun struktur sekunder RNA, analisis filogenetik, dan analisis ekspresi gen"
        )

        st.subheader("DNA")
        st.subheader(
            "Asam deoksiribonukleat, lebih dikenal dengan singkatan DNA (bahasa Inggris: deoxyribonucleic acid), adalah sejenis biomolekul yang menyimpan dan menyandi instruksi-instruksi genetika setiap organisme dan banyak jenis virus. Instruksi-instruksi genetika ini berperan penting dalam pertumbuhan, perkembangan, dan fungsi organisme dan virus. DNA merupakan asam nukleat; bersamaan dengan protein dan karbohidrat, asam nukleat adalah makromolekul esensial bagi seluruh makhluk hidup yang diketahui. Kebanyakan molekul DNA terdiri dari dua unting biopolimer yang berpilin satu sama lainnya membentuk heliks ganda. Dua unting DNA ini dikenal sebagai polinukleotida karena keduanya terdiri dari satuan-satuan molekul yang disebut nukleotida. Tiap-tiap nukleotida terdiri atas salah satu jenis basa nitrogen (guanina (G), adenina (A), timina (T), atau sitosina (C)), gula monosakarida yang disebut deoksiribosa, dan gugus fosfat. Nukleotida-nukelotida ini kemudian tersambung dalam satu rantai ikatan kovalen antara gula satu nukleotida dengan fosfat nukelotida lainnya. Hasilnya adalah rantai punggung gula-fosfat yang berselang-seling. Menurut kaidah pasangan basa (A dengan T dan C dengan G), ikatan hidrogen mengikat basa-basa dari kedua unting polinukleotida membentuk DNA unting ganda."
        )

    elif choice == "DNA Sequence":
        st.subheader("DNA Sequence Analysis")

        seq_file = st.file_uploader("Upload FASTA File",
                                    type=["fasta", "fa", "txt"])

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            #st.write(dna_record)
            dna_seq = dna_record.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record.description)
            elif details == "Sequence":
                st.write(dna_record.seq)

            # Frekuensi Nucleotide
            st.subheader("Nucleotide Frequency")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            thymine_color = st.beta_color_picker("Thymine Color")
            guanine_color = st.beta_color_picker("Guanine Color")
            cytosil_color = st.beta_color_picker("Cytosil Color")

            if st.button("Plot Frequency"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[2].set_color(adenine_color)
                barlist[3].set_color(thymine_color)
                barlist[1].set_color(guanine_color)
                barlist[0].set_color(cytosil_color)

                st.pyplot()

            st.subheader("DNA Composition")
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({"GC Content": gc_score, "AT Content": at_score})

            # Count Nucleotide
            nt_count = st.text_input("Enter Nucleotide",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} Nucleotide is ::{}".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            st.subheader("Protein Synthesis")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())

            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())

            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())

            elif st.checkbox("AA Frequency"):
                st.write(aa_freq)

            elif st.checkbox("Plot AA Frequency"):
                aa_color = st.beta_color_picker("Pick An Amino Acid Color")
                #barlist = plt.bar(aa_freq.keys(),aa_freq.values())
                #barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()

            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("=========================")
                st.write(aa3)

                st.write("=========================")
                st.write(utils.get_acid_name(aa3))

    elif choice == "DotPlot":
        st.subheader("Generate Dot Plot For Two Sequences")
        seq_file1 = st.file_uploader("Upload 1st FASTA File",
                                     type=["fasta", "fa"])
        seq_file2 = st.file_uploader("Upload 2nd FASTA File",
                                     type=["fasta", "fa"])

        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")
            # st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq
            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record1.description)
                st.write("=====================")
                st.write(dna_record2.description)
            elif details == "Sequence":
                st.write(dna_record1.seq)
                st.write("=====================")
                st.write(dna_record2.seq)
            cus_limit = st.number_input("Select Max number of Nucleotide", 100,
                                        40000, 10000)
            if st.button("Dot Plot"):
                st.write(
                    "Comparing the first {} Nucleotide of the Two Sequences".
                    format(cus_limit))
                dotplotx(dna_seq1[0:cus_limit], dna_seq2[0:cus_limit])

                st.pyplot()
            elif st.button("Similarity"):
                st.write(
                    "Similarity of Comparing the first {} Nucleotide of the Two Sequences"
                    .format(cus_limit))
                r = pairwise2.align.globalxx(dna_seq1[0:cus_limit],
                                             dna_seq2[0:cus_limit],
                                             one_alignment_only=True,
                                             score_only=True)
                r / len(dna_seq1[0:cus_limit]) * 100

    elif choice == "About":
        st.subheader(
            "Website ini dalam tahap pengembangan & digunakan untuk project penelitian."
        )
        st.subheader("contact : hafiz065001600009.trisakti.ac.id")
Exemplo n.º 2
0
def test_gc_content():
    seq1 = nt.Sequence('ATGCTATGCTT')
    result = gc_content(seq1)
    assert result == 36.36363636363637
Exemplo n.º 3
0
def main():
    st.title("Bioinformatics App")
    st.set_option('deprecation.showfileUploaderEncoding', False)

    activity = [
        'Intro', 'SequenceAnalysis', 'DotPlot', 'ProteinSearch',
        "MoleculeVisualizer", "ChemicalSearch"
    ]
    choice = st.sidebar.selectbox("Select Activity", activity)
    if choice == 'Intro':
        st.subheader("Intro")
        st.write(
            """ This is a bioinformatics web app made with Python and Streamlit. Use the left panel dropdown to choose the various features to use."""
        )
        image = Image.open("overviewpicture.png")
        st.image(image, use_column_width=True)

    elif choice == "SequenceAnalysis":
        st.subheader("DNA Sequence Analysis")

        seq_file = st.file_uploader("Upload FASTA File", type=["fasta", "fa"])

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            # st.write(dna_record)
            dna_seq = dna_record.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record.description)
            elif details == "Sequence":
                st.write(dna_record.seq)

            # Nucleotide Frequencies
            st.subheader("Nucleotide Frequency")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            thymine_color = st.beta_color_picker("thymine Color")
            guanine_color = st.beta_color_picker("Guanine Color")
            cytosil_color = st.beta_color_picker("cytosil Color")

            if st.button("Plot Freq"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[2].set_color(adenine_color)
                barlist[3].set_color(thymine_color)
                barlist[1].set_color(guanine_color)
                barlist[0].set_color(cytosil_color)

                st.pyplot()

            st.subheader("DNA Composition")
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({"GC Content": gc_score, "AT Content": at_score})

            # Nucleotide Count
            nt_count = st.text_input("Enter Nucleotide Here",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} Nucleotide is ::{}".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            st.subheader("Protein Synthesis")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())

            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())

            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())

            elif st.checkbox("AA Frequency"):
                st.write(aa_freq)

            elif st.checkbox("Plot AA Frequency"):
                aa_color = st.beta_color_picker("Pick An Amino Acid Color")
                # barlist = plt.bar(aa_freq.keys(),aa_freq.values(),color=aa_color)
                # barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()

            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("=====================")
                st.write(aa3)

                st.write("=====================")
                st.write(utils.get_acid_name(aa3))

    elif choice == "ProteinSearch":
        st.subheader("Search for Papers Related to a Protein")
        st.write(""" Try entering ACE2 and coronavirus!""")

        ace2 = st.text_input("Query Protein")
        disease = st.text_input(
            "Query Specifier (more specific thing to narrow down papers with)")

        if ace2 and disease is not None:
            protein = req.get(
                'https://www.ebi.ac.uk/proteins/api/proteins?offset=0&size=10&gene='
                + ace2 + '&organism=h**o%20sapiens',
                headers={'Accept': "application/json"})
            for i, v in enumerate(protein.json()[0]['references']):
                counter = 1
                try:
                    title = protein.json(
                    )[0]['references'][i]['citation']['title']
                    if counter == 10:
                        break

                    if title.find(disease) != -1:
                        st.write(title)
                        counter += 1
                except:
                    pass

    elif choice == "DotPlot":
        st.subheader("Generate Dot Plot For Two Sequences")
        seq_file1 = st.file_uploader("Upload 1st FASTA File",
                                     type=["fasta", "fa"])
        seq_file2 = st.file_uploader("Upload 2nd FASTA File",
                                     type=["fasta", "fa"])

        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")
            # st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record1.description)
                st.write("=====================")
                st.write(dna_record2.description)
            elif details == "Sequence":
                st.write(dna_record1.seq)
                st.write("=====================")
                st.write(dna_record2.seq)

            cus_limit = st.number_input("Select Max number of Nucleotide", 10,
                                        200, 50)
            if st.button("Dot Plot"):
                st.write(
                    "Comparing the first {} Nucleotide of the Two Sequences".
                    format(cus_limit))
                dotplotx(dna_seq1[0:cus_limit], dna_seq2[0:cus_limit])

                st.pyplot()

    elif choice == "MoleculeVisualizer":
        st.subheader(
            "Look at a molecule! Pre-loaded example is the Covid-19 Spike Protein. Thank you to: https://github.com/napoles-uach/streamlit_3dmol"
        )

        component_3dmol()

    elif choice == "ChemicalSearch":
        st.title(
            "Search for chemicals and get info. Pre-loaded example: imatinib")
        user_compound = st.text_input("Enter compound name", 'imatinib')
        if user_compound is not None:
            results = pcp.get_compounds(user_compound, 'name')
            for compound in results:
                st.write('Compound ID: ' + str(compound.cid))
                st.write('SMILES: ' + compound.isomeric_smiles)

                vioxx = Compound.from_cid(compound.cid)
                st.write('Molecular Formula: ' + vioxx.molecular_formula)
                st.write('Molecular Weight: ' + str(vioxx.molecular_weight))
                st.write('IUPAC Name: ' + vioxx.iupac_name)
                st.write('xlogp value: ' + str(vioxx.xlogp))
def main():

    st.title('Simple Bioinformatics App')
    menu = ["DNA Sequence", "Dot Plot"]
    choice = st.sidebar.selectbox("Select Activity", menu)

    if choice == "DNA Sequence":
        st.subheader("DNA Sequence Analysis")
        seq_file = st.file_uploader("Upload FASTA File", type=["fasta", "fa"])
        #text_io = io.TextIOWrapper(seq_file)

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            #st.write(dna_record)
            dna_seq = dna_record.seq
            desc = dna_record.description
            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(desc)
            elif details == "Sequence":
                st.write(dna_seq)

            # Nucleotide Frequencies
            st.subheader("Nucleotide Frequencies")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            guanine_color = st.beta_color_picker("Guanine Color")
            thymine_color = st.beta_color_picker("Thymine Color")
            cytosil_color = st.beta_color_picker("Cytosil Color")

            if st.button("Plot Freq"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[0].set_color(adenine_color)
                barlist[1].set_color(thymine_color)
                barlist[2].set_color(guanine_color)
                barlist[3].set_color(cytosil_color)

                st.pyplot()

            st.subheader("DNA Composition")
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({"GC Content ": gc_score, "AT Content ": at_score})

            # Nucleotide Count
            nt_count = st.text_input("Enter Nucleotide Here",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} nucleotide is : {} ".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            st.subheader("Protein Synthesis")
            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())
            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())
            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())
            elif st.checkbox("AA Frequency"):
                st.write(aa_freq)
            elif st.checkbox("AA Plot Frequency"):
                #aa_color = st.beta_color_picker("Amino Acid Color")
                #barlist = plt.bar(aa_freq.keys(), aa_freq.values(), color = aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values())
                #barlist[0].set_color(aa_color)
                st.pyplot()
            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                st.write(aa_name)
                st.write("--------------------------")
                st.write(utils.convert_1to3(aa_name))

    elif choice == "Dot Plot":
        st.subheader("Generate Dot Plot For Two Sequences")
        seq_file = st.file_uploader("Upload 1st FASTA File",
                                    type=["fasta", "fa"])
        seq_file2 = st.file_uploader("Upload 2nd FASTA File",
                                     type=["fasta", "fa"])

        # text_io = io.TextIOWrapper(seq_file)

        if seq_file and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")

            # st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq

            desc1 = dna_record1.description
            desc2 = dna_record2.description

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(desc1)
                st.write("----------")
                st.write(desc2)
            elif details == "Sequence":
                st.write(dna_seq1)
                st.write("----------")
                st.write(dna_seq2)

            custom_limit = st.number_input("Select max number of Nucleotide ",
                                           10, 200, 25)
            if st.button("Dot Plot"):
                st.write("Comparing the first {} Nucleotide of Two Sequences ".
                         format(custom_limit))
                dotplotx(dna_seq1[0:custom_limit], dna_seq2[0:custom_limit])
                st.pyplot()
Exemplo n.º 5
0
def main():
    """Bioinformatics Genome analysis web app"""

    st.title(
        "DNA Genome analysis and Cosine Similarity Analysis web application")
    menu = [
        "Introduction", "DNA sequence Analysis", "Dotplot Analysis", "About us"
    ]
    choice = st.sidebar.selectbox("Select Option", menu)

    if choice == "Introduction":
        st.subheader("Welcome to our Sequence Analysis Application :)")
    elif choice == "DNA sequence Analysis":
        st.subheader("DNA sequence Analysis will be done here.")
        seq_file = st.file_uploader(
            "Upload the .FASTA file for any DNA analysis of the considered Genome.",
            type=["fasta", "fa"])

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            #st.write(dna_record)
            dna_seq = dna_record.seq

            details = st.radio(
                "Details of the DNA as provided by NCBI database:",
                ("DNA Record description", "Sequence"))
            if details == "DNA Record description":
                st.write(dna_record.description)
            elif details == "Sequence":
                st.write(dna_record.seq)

            #Nucleotide
            st.subheader("Nucleotide Frequency :")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Toggle the Adenine Colour ")
            guanine_color = st.beta_color_picker("Toggle the Guanine Colour ")
            thymine_color = st.beta_color_picker("Toggle the Thymine Colour ")
            cytosine_color = st.beta_color_picker(
                "Toggle the Cytosine Colour ")

            if st.button("Plot frequency"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[0].set_color(adenine_color)
                barlist[1].set_color(guanine_color)
                barlist[2].set_color(thymine_color)
                barlist[3].set_color(cytosine_color)
                st.pyplot()

            st.subheader("DNA complete Composition")

            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({
                "GC Content(for heat stability)": gc_score,
                "AT Content": at_score
            })

            #protein synthesis
            st.subheader("Protein Synthesis operations on the DNA :")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))
            if st.checkbox("Transcription :"):
                st.write(dna_seq.transcribe())
            elif st.checkbox("Translation :"):
                st.write(dna_seq.translate())
            elif st.checkbox("Complement :"):
                st.write(dna_seq.complement())
            elif st.checkbox("Amino Acid frequency :"):
                st.write(aa_freq)

            elif st.checkbox("Plot the Amino Acid frequency :"):
                aa_color = st.beta_color_picker("Pick the Amino acid color:")
                #barlist= plt.bar(aa_freq.keys(),aa_freq.values(),color=aa_color)
                #barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()

            elif st.checkbox("The complete Amino acid name is given as"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("========================")
                st.write(aa3)

                st.write("========================")
                st.write(utils.get_acid_name(aa3))

            #Top most amino acids

    elif choice == "Dotplot Analysis":
        st.subheader(
            "Generate Dotplot for the comparision between two DNA sequences here."
        )
        seq_file1 = st.file_uploader(
            "Upload the first .FASTA file for any DNA analysis of the considered Genome.",
            type=["fasta", "fa"])
        seq_file2 = st.file_uploader(
            "Upload the second .FASTA file for any DNA analysis of the considered Genome.",
            type=["fasta", "fa"])

        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")
            #st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq

            details = st.radio(
                "Details of the DNA as provided by NCBI database:",
                ("Record details from the NCBI database", "Gene Sequence"))
            if details == "Record details from the NCBI database":
                st.write(dna_record1.description)
                st.write("===And the other Record is decribed as :===")
                st.write(dna_record2.description)

            elif details == "Gene Sequence":
                st.write(dna_record1.seq)
                st.write("===And the other sequence can be given as: ===")
                st.write(dna_record2.seq)

            display_limit = st.number_input(
                "Select maximum number of Nucleotides", 10, 200, 50)
            if st.button("Push here for Dotplot :)"):
                st.write(
                    "Comparing the first {} nucleotide of the two sequences".
                    format(display_limit))
                dotplotx(dna_seq1[0:display_limit], dna_seq2[0:display_limit])
                st.pyplot()

    elif choice == "About us":
        st.subheader("About the application and about us :)")
Exemplo n.º 6
0
def main():
    """A Bioinformatics App used to visuallize recombination site"""
    st.title(
        "RSarbiter: a machine learning supported expert in prediction and analysis of recombination spots in S. cerevisiae"
    )
    menu = ["Intro", "DNA Sequence", "Dot Plot", "About"]
    choice = st.sidebar.selectbox("Select Activity", menu)
    if choice == "Intro":
        st.subheader("Intro to BioInformatics")
    elif choice == "DNA Sequence":
        st.subheader("DNA Sequence Analysis")
        seq_file = st.file_uploader("Upload FASTA File",
                                    type=["fasta", "fa", "txt"])
        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            # dna_record = list(SeqIO.parse(seq_file,"fasta"))
            dna_seq = dna_record.seq
            st.write(dna_record)
            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record.description)
            else:
                st.write(dna_record.seq)
            # Nucleotide Frequency
            st.subheader("Nucleotide Frequency")
            dan_freq = Counter(dna_seq)
            st.write(dan_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            thymine_color = st.beta_color_picker("thymine Color")
            cytosine_color = st.beta_color_picker("cytosine Color")
            guanine_color = st.beta_color_picker("guanine Color")
            if st.button("Plot Freq"):
                barlist = plt.bar(dan_freq.keys(), dan_freq.values())
                barlist[0].set_color(adenine_color)
                barlist[1].set_color(thymine_color)
                barlist[2].set_color(guanine_color)
                barlist[3].set_color(cytosine_color)
                st.pyplot()
            st.subheader("DNA composition")
            gc_content = utils.gc_content(dna_seq)
            at_content = utils.at_content(dna_seq)
            st.write({
                "GC Content": gc_content,
                "AT Content": at_content
            })  # st.write = st.json

            # Nucleotide Count
            nt_count = st.text_input("Enter Nucleotide Here",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} Nucleotide is {}".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            st.subheader("Protein Synthesis")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))
            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())
            # Top Most Common Amino
            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())
            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())
            elif st.checkbox("AA Frequency"):
                st.write(aa_freq)
            elif st.checkbox("Plot AA Frequency"):
                aa_color = st.beta_color_picker("Pick An Amino Acid Color")
                # barlist = plt.bar(aa_freq.keys(),aa_freq.values())
                # barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()
            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("======================")
                st.write(aa3)
                st.write("======================")
                st.write(utils.get_acid_name(aa3))

    elif choice == "Dot Plot":
        st.subheader("Generate a Dot Plot For Two Sequences")
        # seq_file1 = st.file_uploader("Upload 1st FASTA File",type=["fasta","fa"])
        # seq_file2 = st.file_uploader("Upload 2nd FASTA File",type=["fasta","fa"])

        # if seq_file1 and seq_file2 is not None:
        #     dna_record1 = SeqIO.read(seq_file1,"fasta")
        #     dna_record2 = SeqIO.read(seq_file2,"fasta")
        #     # st.write(dna_record)
        #     dna_seq1 = dna_record1.seq
        #     dna_seq2 = dna_record2.seq

        #     details = st.radio("Details",("Description","Sequence"))
        #     if details == "Description":
        #         st.write(dna_record1.description)
        #         st.write("=====================")
        #         st.write(dna_record2.description)
        #     elif details == "Sequence":
        #         st.write(dna_record1.seq)
        #         st.write("=====================")
        #         st.write(dna_record2.seq)

        #     cus_limit = st.number_input("Select Max number of Nucleotide",10,200,50)
        #     if st.button("Dot Plot"):
        #         st.write("Comparing the first {} Nucleotide of the Two Sequences".format(cus_limit))
        #         dotplotx(dna_seq1[0:cus_limit],dna_seq2[0:cus_limit])

        #         st.pyplot()

        seq_file_1 = st.file_uploader("Upload 1st FASTA File",
                                      type=["fasta", "fa", "txt"])
        seq_file_2 = st.file_uploader("Upload 2nd FASTA File",
                                      type=["fasta", "fa", "txt"])

        if seq_file_1 and seq_file_2 is not None:
            dna_record_1 = SeqIO.read(seq_file_1, "fasta")
            dna_record_2 = SeqIO.read(seq_file_2, "fasta")
            dna_seq_1 = dna_record_1.seq
            dna_seq_2 = dna_record_2.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record_1.description)
                st.write("======================")
                st.write(dna_record_2.description)
            elif details == "Sequence":
                st.write(dna_record_1.seq)
                st.write("==============")
                st.write(dna_record_2.seq)
            cus_limit = st.number_input("Select Max Number of Nucleotide", 10,
                                        200, 50)
            if st.button("Dot Plot"):
                st.write(
                    "Comparing The First {} Nucleotide of The Two Sequence".
                    format(cus_limit))
                dotplotx(dna_seq_1[0:cus_limit], dna_seq_2[0:cus_limit])
                st.pyplot()

    elif choice == "About":
        st.subheader("About")
Exemplo n.º 7
0
def main():
    image = Image.open('b2.png')
    st.image(image, width=200)
    st.title("Trisakti Bioinformatics Application")
    st.title("Powered by Python")

    menu = ["Intro", "About", "DNA Sequence", "DotPlot"]
    choice = st.sidebar.selectbox("Select Activity", menu)

    if choice == "Intro":
        st.subheader("Intro to BioInformatics")
        image = Image.open('dna.png')
        st.image(image, width=800)

        st.subheader("Bioinformatics")
        "Bioinformatika merupakan cabang ilmu dari biologi yang mengkombinasikan penggunaan komputerisasi dengan karakterisik molekuler biologi. Kombinasi ini disebabkan karena perkembangan teknologi informasi yang sangat pesat, sehingga memudahkan untuk dilakukannya penelitian, serta dapat memberikan informasi yang akurat berdasarkan pengelolaan data. Bioinformatika mempelajari interpretasi biologis data, dan evolusi berbagai bentuk kehidupan dari pendekatan komputasi."

        st.subheader("DNA")
        "DNA adalah singkatan dari asam deoksiribonukleat, yang merupakan molekul yang menyimpan informasi genetik utama dalam sel. Nukleotida terdiri dari tiga bagian: gugus fosfat, gula pentosa (gula ribosa), dan basa. Basisnya terdiri dari empat jenis: adenin (A), guanin (G), sitosin (C), dan timin (T). A dan G adalah purin dengan dua cincin menyatu. C dan T adalah pirimidin dengan satu cincin tunggal. Selain DNA, ada jenis nukleotida lain yang disebut RNA atau asam ribonukleat."

        st.subheader("Protein/Amino Acid")
        "Asam amino adalah senyawa organik yang memiliki gugus fungsional karboksil (-COOH) dan amina (biasanya -NH2). Dalam biokimia sering kali pengertiannya dipersempit: keduanya terikat pada satu atom karbon (C) yang sama (disebut atom C alfa atau α). Gugus karboksil memberikan sifat asam dan gugus amina memberikan sifat basa. Dalam bentuk larutan, asam amino bersifat amfoterik: cenderung menjadi asam pada larutan basa dan menjadi basa pada larutan asam."

        st.subheader("Biopython")
        "Biopython adalah seperangkat alat yang tersedia secara gratis untuk komputasi biologis yang ditulis dengan Python oleh tim pengembang internasional. Aplikasi ini dibuat dan dikembangkan dengan bahasa pemrograman python yang mana menggunakan library biopython untuk proses eksplorasi dan ekstraksi data. Dalam eksplorasi dan ekstraksi data, biopython dapat melakukan proses comparing sequences DNA dan transtlation Nucleotide DNA ke Amino Acid penyusun protein. Berikut ini merupakan tabel Codon transtlation dari Nucleotide ke Amino Acid."
        image = Image.open('protein.png')
        st.image(image, width=800)

    elif choice == "About":

        st.subheader("Sejarah Awal Coronavirus")
        "Coronavirus pertama kali ditemukan pada pertengahan tahun 1960 dengan jenis HCoV-229E. Virus ini bermutasi selama 56 tahun sampai pada tahun 2020 tercatat ada tujuh dari banyaknya jenis spesies virus corona yang menginfeksi manusia muali dari Alpha Coronavirus, Beta Coronavirus, SARS, dan juga MERS. Evolusi dari jenis spesies virus corona dapat terlihat pada gambar di bawah ini."
        image = Image.open('mutasi.PNG')
        st.image(image, width=800)

        st.subheader("Corona Virus Disease 2019")
        "Pandemi koronavirus 2019 (bahasa Inggris: coronavirus disease 2019, disingkat COVID-19) adalah penyakit menular yang disebabkan oleh SARS-CoV-2, salah satu jenis koronavirus. Penyakit ini mengakibatkan pandemi koronavirus 2019–2020.Penderita COVID-19 dapat mengalami demam, batuk kering, dan kesulitan bernapas.Sakit tenggorokan, pilek, atau bersin-bersin lebih jarang ditemukan.Pada penderita yang paling rentan, penyakit ini dapat berujung pada pneumonia dan kegagalan multiorgan.Infeksi menyebar dari satu orang ke orang lain melalui percikan (droplet) dari saluran pernapasan yang sering dihasilkan saat batuk atau bersin. Waktu dari paparan virus hingga timbulnya gejala klinis berkisar antara 1–14 hari dengan rata-rata 5 hari. Metode standar diagnosis adalah uji reaksi berantai polimerase transkripsi-balik (rRT-PCR) dari usap nasofaring atau sampel dahak dengan hasil dalam beberapa jam hingga 2 hari. Pemeriksaan antibodi dari sampel serum darah juga dapat digunakan dengan hasil dalam beberapa hari. Infeksi juga dapat didiagnosis dari kombinasi gejala, faktor risiko, dan pemindaian tomografi terkomputasi pada dada yang menunjukkan gejala pneumonia."

        st.subheader("Tujuan Pembuatan Aplikasi")
        "Tujuan pembuatan dan pengembangan aplikasi ini adalah agar dapat membantu para peneliti dalam menganalisis informasi yang ada pada data DNA dengan bentuk visualisasi diagram plot. Juga hasil informasi dari ekstraksi data DNA menghasilkan pola/patern transtlation protein dari sample DNA Coronavirus. Aplikasi ini dapat memberi hasil persentase similaritas sequencing alignment DNA, sehingga terlihat adanya mutasi genetik yang terjadi."

        st.subheader(
            "Website ini dalam tahap pengembangan & digunakan untuk project penelitian."
        )
        st.subheader("contact : hafiz065001600009.trisakti.ac.id")

    elif choice == "DNA Sequence":
        st.subheader("DNA Sequence Analysis")

        seq_file = st.file_uploader("Upload FASTA File",
                                    type=["fasta", "fa", "txt"])

        if seq_file is not None:
            dna_record = SeqIO.read(seq_file, "fasta")
            #st.write(dna_record)
            dna_seq = dna_record.seq

            details = st.radio("Details", ("Description", "Sequence"))
            if details == "Description":
                st.write(dna_record.description)
            elif details == "Sequence":
                st.write(dna_record.seq)

            # Frekuensi Nucleotide
            st.subheader("Nucleotide Frequency")
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_color = st.beta_color_picker("Adenine Color")
            thymine_color = st.beta_color_picker("Thymine Color")
            guanine_color = st.beta_color_picker("Guanine Color")
            cytosil_color = st.beta_color_picker("Cytosil Color")

            if st.button("Plot Frequency"):
                barlist = plt.bar(dna_freq.keys(), dna_freq.values())
                barlist[2].set_color(adenine_color)
                barlist[3].set_color(thymine_color)
                barlist[1].set_color(guanine_color)
                barlist[0].set_color(cytosil_color)

                st.pyplot()

            st.subheader("DNA Composition")
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.json({"GC Content": gc_score, "AT Content": at_score})

            # Count Nucleotide
            nt_count = st.text_input("Enter Nucleotide",
                                     "Type Nucleotide Alphabet")
            st.write("Number of {} Nucleotide is ::{}".format(
                (nt_count),
                str(dna_seq).count(nt_count)))

            # Protein Synthesis
            st.subheader("Protein Synthesis")
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            if st.checkbox("Transcription"):
                st.write(dna_seq.transcribe())

            elif st.checkbox("Translation"):
                st.write(dna_seq.translate())

            elif st.checkbox("Complement"):
                st.write(dna_seq.complement())

            elif st.checkbox("Amino Acid Frequency"):
                st.write(aa_freq)

            elif st.checkbox("Plot Amino Acid Frequency"):
                aa_color = st.beta_color_picker("Pick An Amino Acid Color")
                #barlist = plt.bar(aa_freq.keys(),aa_freq.values())
                #barlist[2].set_color(aa_color)
                plt.bar(aa_freq.keys(), aa_freq.values(), color=aa_color)
                st.pyplot()

            elif st.checkbox("Full Amino Acid Name"):
                aa_name = str(p1).replace("*", "")
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write("=========================")
                st.write(aa3)

                st.write("=========================")
                st.write(utils.get_acid_name(aa3))

    elif choice == "DotPlot":
        st.subheader("Generate Dot Plot For Two Sequences")
        seq_file1 = st.file_uploader("Upload 1st FASTA File",
                                     type=["fasta", "fa"])
        seq_file2 = st.file_uploader("Upload 2nd FASTA File",
                                     type=["fasta", "fa"])

        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, "fasta")
            dna_record2 = SeqIO.read(seq_file2, "fasta")
            # st.write(dna_record)
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq
            dna_freq1 = Counter(dna_seq1)
            dna_freq2 = Counter(dna_seq2)
            p1 = dna_seq1.translate()
            aa_freq1 = Counter(str(p1))
            p2 = dna_seq2.translate()
            aa_freq2 = Counter(str(p2))
            details = st.radio(
                "Details",
                ("Description", "Sequence", "Nucleotide Frequency",
                 "Nucleotide Plot Frequency", "Amino Acid Frequency",
                 "Amino Acid Plot Frequency"))
            if details == "Description":
                st.write(dna_record1.description)
                st.write("=====================")
                st.write(dna_record2.description)
                st.subheader("DNA Composition")
                gc_score1 = utils.gc_content(str(dna_seq1))
                at_score1 = utils.at_content(str(dna_seq1))
                st.json({"GC Content": gc_score1, "AT Content": at_score1})
                gc_score2 = utils.gc_content(str(dna_seq2))
                at_score2 = utils.at_content(str(dna_seq2))
                st.json({"GC Content": gc_score2, "AT Content": at_score2})
            elif details == "Sequence":
                st.write(dna_record1.seq)
                st.write(
                    "========================================================================="
                )
                st.write(dna_record2.seq)
            elif details == "Nucleotide Frequency":
                st.write(dna_freq1)
                st.write("=====================")
                st.write(dna_freq2)
            elif details == "Nucleotide Plot Frequency":
                barlist = plt.bar(dna_freq1.keys(), dna_freq1.values())
                st.pyplot()
                st.write(
                    "=========================================================================="
                )
                barlist = plt.bar(dna_freq2.keys(), dna_freq2.values())
                st.pyplot()
            elif details == "Amino Acid Frequency":
                st.write(aa_freq1)
                st.write("=====================")
                st.write(aa_freq2)
            elif details == "Amino Acid Plot Frequency":
                plt.bar(aa_freq1.keys(), aa_freq1.values())
                st.pyplot()
                st.write(
                    "=========================================================================="
                )
                plt.bar(aa_freq2.keys(), aa_freq2.values())
                st.pyplot()
            cus_limit = st.number_input(
                "Select Max number of Nucleotide (Minimum 100)", 100, 40000,
                10000)
            if st.button("Dot Plot"):
                st.write(
                    "Comparing the first {} Nucleotide of the Two Sequences".
                    format(cus_limit))
                dotplotx(dna_seq1[0:cus_limit], dna_seq2[0:cus_limit])

                st.pyplot()
            elif st.button("Similarity"):
                st.write(
                    "Similarity of Comparing the first {} Nucleotide of the Two Sequences"
                    .format(cus_limit))
                r = pairwise2.align.globalxx(dna_seq1[0:cus_limit],
                                             dna_seq2[0:cus_limit],
                                             one_alignment_only=True,
                                             score_only=True)
                r / len(dna_seq1[0:cus_limit]) * 100
Exemplo n.º 8
0
def main():
    """A simple bioinformatics app"""
    st.title("Simple Bioinformatics App")
    menu = ['Intro', 'DNA Sequence', 'DotPlot', 'About']

    choice = st.sidebar.selectbox('Select Activity', menu)
    if choice == 'Intro':
        st.subheader('Intro to BioInformatics')
    elif choice == 'DNA Sequence':
        st.subheader('DNA Sequence Analysis')
        seq_file = st.file_uploader('Upload FASTA file', type=['fasta', 'fa',
                                                               'txt', 'fna'])

        if seq_file is not None:
            # to no longer autodetect the file's encoding.
            # This means that all files will be returned as binary buffers.
            # thus must wrap in textio wrapper
            # seq_file_io = io.TextIOWrapper(seq_file)
            dna_record = SeqIO.read(seq_file, 'fasta')
            dna_seq = dna_record.seq

            details = st.radio('Details', ('Description','Sequence'))
            if details == 'Description':
                st.write(dna_record.description)
            elif details == 'Sequence':
                st.write(dna_seq)


            # nucleotide frequencies
            st.subheader('Nucleotide Frequency')
            dna_freq = Counter(dna_seq)
            st.write(dna_freq)
            adenine_colour = st.beta_color_picker('Adenine Colour')
            guanine_colour = st.beta_color_picker('Guanine Colour')
            cytosine_colour = st.beta_color_picker('Cytosine Colour')
            thymine_colour = st.beta_color_picker('Thymine Colour')

            if st.button('Plot Frequency'):
                fig, ax = plt.subplots()
                barlist = ax.bar(dna_freq.keys(), dna_freq.values())
                barlist[3].set_color(cytosine_colour)
                barlist[0].set_color(guanine_colour)
                barlist[2].set_color(adenine_colour)
                barlist[1].set_color(thymine_colour)
                st.pyplot(fig)

            st.subheader('DNA Composition')
            gc_score = utils.gc_content(str(dna_seq))
            at_score = utils.at_content(str(dna_seq))
            st.write({'GC Content' : gc_score, 'AT Content' : at_score})
            st.json({'GC Content' : gc_score, 'AT Content' : at_score})

            # nucleotide count
            nt_count = st.text_input('Enter Nucleotide Here', 'Type Nucleotide Alphabet')
            st.write('Number of {} Nucleotide is ::{}'.format(nt_count,\
                                                              str(dna_seq).count(nt_count)))

            # protein synthesis
            st.subheader('Protein Synthesis')
            p1 = dna_seq.translate()
            aa_freq = Counter(str(p1))

            if st.checkbox('Transcription'):
                st.write(dna_seq.transcribe())

            elif st.checkbox('Translate'):
                st.write(p1)

            elif st.checkbox('Complement'):
                st.write(dna_seq.complement())

            elif st.checkbox('AA Frequency'):
                st.write(aa_freq)

            # top most common amino acid
            elif st.checkbox('Plot AA Frequency'):
                fig, ax = plt.subplots()
                aa_colour = st.beta_color_picker('Pick an amino acid colour')
                barlist = ax.bar(aa_freq.keys(), aa_freq.values(),
                                 color=aa_colour)
                st.pyplot(fig)

            elif st.checkbox('Full amino acid name'):
                aa_name = str(p1).replace('*', '')
                aa3 = utils.convert_1to3(aa_name)
                st.write(aa_name)
                st.write('='*30)
                st.write(aa3)

                st.write('='*30)
                st.write(utils.get_acid_name(aa3))




    elif choice == 'DotPlot':
        st.subheader('Generate Dot Plot for two Sequences')

        seq_file1 = st.file_uploader('Upload 1st FASTA file', type=['fasta', 'fa',
                                                               'txt', 'fna'])

        seq_file2 = st.file_uploader('Upload 2nd FASTA file', type=['fasta', 'fa',
                                                               'txt', 'fna'])
        if seq_file1 and seq_file2 is not None:
            dna_record1 = SeqIO.read(seq_file1, 'fasta')
            dna_record2 = SeqIO.read(seq_file2, 'fasta')
            dna_seq1 = dna_record1.seq
            dna_seq2 = dna_record2.seq

            details = st.radio('Details', ('Description','Sequence'))
            if details == 'Description':
                st.write(dna_record1.description)
                st.write('='*50)
                st.write(dna_record2.description)
            elif details == 'Sequence':
                st.write(dna_record1.seq)
                st.write('='*50)
                st.write(dna_record2.seq)

            # (label, min_value, max_value, value)
            custom_limit = st.number_input('Select max number of nucleotides',
                                           10, 400, 50)
            if st.button('Dot Plot'):
                seq1 = join_str(dna_seq1, custom_limit)
                seq2 = join_str(dna_seq2, custom_limit)
                numeric_arr1 = assign_numeric_array(seq1)
                numeric_arr2 = assign_numeric_array(seq2)
                compr_array = generate_comparison_array(numeric_arr1,
                                                        numeric_arr2)
                fig, ax = plot_dotplot(compr_array, seq1, seq2)
                st.write('Comparing the first {} nucleotides of the two sequences'.format(custom_limit))
                st.pyplot(fig)


    elif choice == 'About':
        st.subheader('About')