Exemplo n.º 1
0
    def associate_annotations(network_module, line, sql_session):

        # Split the line with tab to separate tag and annotation list
        token_list = line.split("\t")
        if token_list == None or len(token_list) != 2:
            raise RainetException(
                "NetworkModuleParser.associate_annotations : Abnormal annotation line : "
                + line + " for network module ID = " + network_module.moduleID)

        # Get the annotation list by splitting with "||"
        annotation_list = token_list[1].split("||")

        # Parse the annotations and create the new NetworkModuleAnnotation object
        # adding it to the annotated network module
        # The annotation should be composed of "GO_term<>GO_ID" or only "GO_term" in case of unknown process
        for annotation in annotation_list:
            annotation_term_list = annotation.split("<>")
            if len(annotation_term_list) == 2:
                current_annotation = NetworkModuleAnnotation(
                    annotation_term_list[0], annotation_term_list[1])
            elif len(annotation_term_list) == 1:
                current_annotation = NetworkModuleAnnotation(
                    annotation_term_list[0], "")
            else:
                raise RainetException(
                    "NetworkModuleParser.associate_annotations : Abnormal annotation : "
                    + annotation)
            network_module.add_associated_annotation(current_annotation)
            sql_session.add(current_annotation)
            sql_session.add(network_module)
Exemplo n.º 2
0
    def search_similar_interaction(self, protein_a, protein_b, sql_session):

        # Look at an interaction from A to B
        a_to_b_list = sql_session.query(PPINetworkInteraction).filter(
            PPINetworkInteraction.interactorA == protein_a,
            PPINetworkInteraction.interactorB == protein_b).all()

        # Check the result: if a single interaction is found, return it. If several are found, raise an exception
        if a_to_b_list != None and len(a_to_b_list) > 0:
            if len(a_to_b_list) == 1:
                return a_to_b_list[0]
            else:
                raise RainetException(
                    "PPINetworkInteraction.__init__ : Found several protein interaction for interactor A = "
                    + protein_a.uniprotAC + " /interactor B = " +
                    protein_b.uniprotAC)

        # Look at an interaction from B to A
        b_to_a_list = sql_session.query(PPINetworkInteraction).filter(
            PPINetworkInteraction.interactorA == protein_b,
            PPINetworkInteraction.interactorB == protein_a).all()

        # Check the result: if a single interaction is found, return it. If several are found, raise an exception
        if b_to_a_list != None and len(b_to_a_list) > 0:
            if len(b_to_a_list) == 1:
                return b_to_a_list[0]
            else:
                raise RainetException(
                    "PPINetworkInteraction.__init__ : Found several protein interaction for interactor A = "
                    + protein_b.uniprotAC + " /interactor B = " +
                    protein_a.uniprotAC + " /detection method = " +
                    self.detectionMethod + " /pubmedID = " + self.pubmedID)

        return None
Exemplo n.º 3
0
    def query_to_dict(self, keyword, col1, col2):

        outDict = {}

        if keyword in self.data:
            data = self.data[keyword]
        else:
            raise RainetException(
                "DataManager.query_to_dict : Data keyword does not exist: " +
                keyword)

        try:
            for entry in data:
                if col1 > len(entry) or col2 > len(entry):
                    raise RainetException(
                        "DataManager.query_to_dict : requested columns out of boundaries of query result: "
                        + entry)

                keyItem = str(entry[col1])
                valueItem = str(entry[col2])

                if keyItem not in outDict:
                    outDict[keyItem] = []
                outDict[keyItem].append(valueItem)
        except:
            raise RainetException(
                "DataManager.query_to_dict : query result is not iterable: " +
                keyword)

        self.data[keyword] = outDict
Exemplo n.º 4
0
    def check_species(sql_session, species, create=False):

        try:
            db_species = sql_session.query(DBParameter.parameterValue).filter(
                DBParameter.parameterName ==
                OptionConstants.OPTION_SPECIES).first()
            if db_species == None or len(db_species) == 0:
                if create:
                    species_param = DBParameter(OptionConstants.OPTION_SPECIES,
                                                "string", species)
                    sql_session.add(species_param)
                    sql_session.commit()
                else:
                    raise RainetException(
                        "SQLManager.check_species: The database has no species declared and must not be used with that command."
                    )
            else:
                db_species_string = db_species[0]
                if db_species_string.lower() != species.lower():
                    raise RainetException(
                        "SQLManager.check_species: The database species is not the same as the user's specified species: "
                        + db_species_string + " != " + species)
        except exc.SQLAlchemyError as sqle:
            if create:
                sql_session.rollback()
                raise RainetException(
                    "SQLManager.build_databse : An error occurred while committing the species parameter.",
                    sqle)
            else:
                raise RainetException(
                    "SQLManager.build_databse : An error occurred while querying the species parameter.",
                    sqle)
Exemplo n.º 5
0
    def read_properties(self, file_path):

        Logger.get_instance().info("Reading properties from file : " +
                                   file_path)
        config_parser = ConfigParser()

        config_parser.read(file_path)

        for section in config_parser.sections():
            options = config_parser.options(section)
            for option in options:
                try:
                    option = option.lower()
                    self.propertiesDict[option] = config_parser.get(
                        section, option)
                    if self.propertiesDict[option] == None:
                        raise RainetException(
                            "PropertyManager.readProperties : Wrong property definition for key = "
                            + option + " in section " + section + " of file " +
                            file_path)
                except:
                    raise RainetException(
                        "PropertyManager.readProperties : Abnormal property definition for key = "
                        + option + " in section " + section + " of file " +
                        file_path)
Exemplo n.º 6
0
 def __init__(self, iso_ac, protein_ac, is_alternative):
     
     # Assign the basic parameters
     self.isoformUniprotAC = iso_ac
     self.isAlternative = bool( is_alternative)
     
     # Found the uniprotAC in the isoformUniprotAC as the string before the "-" character
     # for instance : isoformUniprotAC=P65321-2 => uniprotAC=P65321
     if iso_ac != None and iso_ac != "":
         if protein_ac == None:
             raise RainetException("ProteinIsoform.__init__: abnormal isoform definition : Isoform AC = " + iso_ac + " but original protein uniprotAC is not defined.")
         # Get the Protein corresponding to the given unirptoAC
         sql_session = SQLManager.get_instance().get_session()
         from fr.tagc.rainet.core.data.Protein import Protein
         protein_list = sql_session.query( Protein).filter( Protein.uniprotAC == protein_ac).all()
         # If the protein list is correct (one result), build the relationship between the Protein and the ProteinIsoform
         if protein_list != None and len( protein_list) != 0:
             if len( protein_list) == 1:
                 protein = protein_list[0]
                 if protein != None:
                     protein.add_isoform( self)
                 else:
                     raise RainetException("ProteinIsoform.__init__: Found Proteins with uniprotAC= " + protein_ac + " is None.")
             else:
                 raise RainetException("ProteinIsoform.__init__: Several Proteins found with uniprotAC= " + protein_ac + " : " + str( len( protein_list)) + " Proteins")
         else:
             raise NotRequiredInstantiationException("ProteinIsoform.__init__: No Protein found with uniprotAC= " + protein_ac)
     else:
         raise RainetException("ProteinIsoform.__init__: No isoformProteinAC provided: ")
Exemplo n.º 7
0
    def calculate_protein_degree(self):

        degreeDict = {}  # key -> uniprotID, val -> degree of protein

        proteinsPerDegreeDict = {
        }  # key -> degree, val -> set of proteins with that degree

        g = self.graph

        proteinNotFound = set()

        # for each vertice in graph
        for v in g.vs:

            # look for protein name using index
            try:
                proteinName = self.proteinGraphIDDict[v.index]
            except KeyError:
                if v.index == 0:
                    continue
                else:
                    raise RainetException(
                        "NetworkScoreAnalysis.calculate_protein_degree : Could not find protein name for graph index %s"
                        % (v.index))

#             try:
#                 proteinUniprotAC = self.proteinIDMappingDict[ proteinName]
#             except KeyError:
#                 Logger.get_instance().warning( "NetworkScoreAnalysis.calculate_protein_degree : Could not find protein uniprotac for protein name %s. We will be converting here to uniprotAC" % ( proteinName ) )
#
#                 if "_HUMAN" in proteinName:
#                     proteinUniprotAC = proteinName.split("_")[0]
#                 else:
#                     proteinNotFound.add( proteinName)
#                     continue

            if proteinName not in degreeDict:
                degreeDict[proteinName] = v.degree()
            else:
                raise RainetException(
                    "NetworkScoreAnalysis.calculate_protein_degree : duplicate uniprotAC %s"
                    % (proteinName))

        Logger.get_instance().info(
            "NetworkScoreAnalysis.calculate_protein_degree : Could not find proteins uniprotac for protein names %s. These were discarded."
            % (proteinNotFound))

        # create dictionary with list of proteins per degree
        for prot in degreeDict:
            degree = degreeDict[prot]
            if degree not in proteinsPerDegreeDict:
                proteinsPerDegreeDict[degree] = set()
            proteinsPerDegreeDict[degree].add(prot)

        ## list of degrees available, so that closest degree to the degree of interest can be probed
        #sortedDegrees = sorted( proteinsPerDegreeDict.keys())

        self.degreeDict = degreeDict
        self.proteinsPerDegreeDict = proteinsPerDegreeDict
Exemplo n.º 8
0
 def find_unique_protein_from_uniprotAC_list( uniprot_ac_list ):
     
     if len( uniprot_ac_list ) == 0:
         raise RainetException( "SQLUtil.find_unique_protein_from_uniprotAC_list: Provided uniprotAC list is None." )
     
     for uniprot_ac in uniprot_ac_list:
         protein = SQLUtil.find_unique_protein_from_uniprotAC( uniprot_ac )
         if protein != None:
             return protein
         
     raise RainetException( "SQLUtil.find_unique_protein_from_uniprotAC_list: No Proteins found with uniprotAC list = " + str( uniprot_ac_list ) )
Exemplo n.º 9
0
    def __init__(self, Corum_cluster_id, protein_id):
                
        sql_session = SQLManager.get_instance().get_session()
         
        #=======================================================================
        # Search for the Corum cluster corresponding to the given Corum cluster ID
        #=======================================================================
        # -- make the query
        from fr.tagc.rainet.core.data.CorumCluster import CorumCluster
        clusters_list = sql_session.query( CorumCluster).filter( CorumCluster.clusterID == Corum_cluster_id).all()
         
        # --Check if a single cross reference is found. If not, raise an issue
        cluster_id = None
        if clusters_list != None and len( clusters_list) > 0:
            if len( clusters_list) == 1:
                cluster_id = clusters_list[0]
            else:
                raise RainetException( "ProteinCorumAnnotation.init : Abnormal number of Corum clusters found for cluster_id = " + cluster_id + " : " + str( len( clusters_list))) 
        else:
            raise NotRequiredInstantiationException( "ProteinCorumAnnotation.init : No Corum cluster found for cluster id = " + cluster_id)
 
        if cluster_id == None:
            raise RainetException( "ProteinCorumAnnotation.init : returned cross reference is None for " + cluster_id)
         
        #=======================================================================
        # Search the protein object. Then build the association between the Protein and the CorumCluster 
        #=======================================================================
        # -- make the query
        protein_list = sql_session.query( Protein).filter( Protein.uniprotAC == protein_id).all()
         
        # --Check if a single Protein is found. If not, raise an issue
        protein = None
        if protein_list != None and len( protein_list) > 0:
            if len( protein_list) == 1:
                protein = protein_list[0]
            else:
                raise RainetException( "ProteinCorumAnnotation.init : Abnormal number of Protein found for = " +protein_id + " : " + str( len( protein_list))) 
        else:
            raise NotRequiredInstantiationException( "ProteinCorumAnnotation.init : No Protein found for uniprotAC = " + protein_id)
 
        # -- Check if the Protein found is not None
        if protein == None:
            raise RainetException( "ProteinCorumAnnotation.init : returned Protein is None for UniprotAC" + protein_id)
             
        # -- Build the relation between the Corum cluster and the Protein
        cluster_id.add_annotated_protein( protein)
        sql_session.add( cluster_id)
        sql_session.add( protein)
            
        # Raise the Exception to indicate the instance must not be inserted since it is automatically created 
        raise NotRequiredInstantiationException( "ProteinCorumAnnotation.init : ProteinCorumAnnotation objects do not have to be inserted by __init__ since they are created by CorumCluster to Protein association table.")
Exemplo n.º 10
0
    def delete_data(self, keyword):

        try:
            keyword = str(keyword)
        except:
            raise RainetException(
                "DataManager.delete_data : keyword must be a string.")

        if keyword in self.data:
            del self.data[keyword]
        else:
            raise RainetException(
                "DataManager.delete_data : Data keyword does not exist: " +
                keyword)
Exemplo n.º 11
0
    def get_data(self, keyword):

        try:
            keyword = str(keyword)
        except:
            raise RainetException(
                "DataManager.get_data : keyword must be a string.")

        if keyword not in self.data:
            raise RainetException(
                "DataManager.get_data : Data keyword does not exist: " +
                keyword)

        return self.data[keyword]
Exemplo n.º 12
0
    def read_rainet_db(self):

        rnaCrossReference = {}  # key -> transcript ID, val -> ensembl Gene ID

        #===============================================================================
        # Query RNA table, which contains gene ID
        #===============================================================================
        query = self.sql_session.query(RNA.transcriptID, RNA.geneID).all()
        # Note: an gene name points to several ensembl IDs

        # Correspondence should be many-to-1 (a transcript can only have an associated gene, a gene can have many transcripts)

        geneSet = set()

        for transcriptID, geneID in query:
            if transcriptID in rnaCrossReference:
                raise RainetException(
                    "LncRNAGroupOddsRatio.read_rainet_db: duplicate transcript ID "
                )

            rnaCrossReference[transcriptID] = str(geneID)

            geneSet.add(geneID)

        Logger.get_instance().info(
            "LncRNAGroupOddsRatio.read_rainet_db : Number transcripts read %s"
            % (len(rnaCrossReference)))
        Logger.get_instance().info(
            "LncRNAGroupOddsRatio.read_rainet_db : Number genes read %s" %
            (len(geneSet)))

        self.rnaCrossReference = rnaCrossReference
Exemplo n.º 13
0
    def __init__(self, annotationFile, externalFiles, backgroundList,
                 outputFile, useGenes, rainetDB):

        self.annotationFile = annotationFile
        self.externalFiles = externalFiles
        self.backgroundList = backgroundList
        self.outputFile = outputFile
        self.useGenes = useGenes
        self.rainetDB = rainetDB

        # check for argument problems, if ID mapping needs to be used, read RAINET DB
        if useGenes:
            if rainetDB == LncRNAGroupOddsRatio.ARGUMENT_RAINET_DB_DEFAULT:
                raise RainetException(
                    "LncRNAGroupOddsRatio.__init__: --rainetDB input needs to be provided for converting transcript IDs to gene IDs."
                )
            else:
                # Build a SQL session to DB
                SQLManager.get_instance().set_DBpath(self.rainetDB)
                self.sql_session = SQLManager.get_instance().get_session()

                self.read_rainet_db()

        # make output folder
        baseFolder = os.path.basename(self.outputFile)
        print baseFolder
        if not os.path.exists(baseFolder):
            os.system(baseFolder)
Exemplo n.º 14
0
    def __init__(self, rainetDB, outputFolder, useInteractingProteins,
                 listDatasets, highOverlapStat):

        self.rainetDB = rainetDB
        self.outputFolder = outputFolder
        self.useInteractingProteins = useInteractingProteins
        self.highOverlapStat = highOverlapStat

        # Build a SQL session to DB
        SQLManager.get_instance().set_DBpath(self.rainetDB)
        self.sql_session = SQLManager.get_instance().get_session()

        # make output folder
        if not os.path.exists(self.outputFolder):
            os.mkdir(self.outputFolder)

        # Process list datasets
        self.listDatasets = []
        datasets = listDatasets.split(",")
        for dataset in datasets:
            if dataset in ComplexDatasetOverlap.ANNOTATION_TABLES_DICT:
                self.listDatasets.append(dataset)
            else:
                raise RainetException("__init__: dataset not found: %s" %
                                      (dataset))
Exemplo n.º 15
0
    def __init__(self, RNA_acc, db_source, cross_reference):

        # Get a SQLalchemy session
        sql_session = SQLManager.get_instance().get_session()

        # Retrieve the list of RNAs corresponding to the provided accession number
        from fr.tagc.rainet.core.data.RNA import RNA
        RNA_list = sql_session.query(RNA).filter(
            or_(RNA.transcriptID ==
                RNA_acc)).all()  #this returns python object, RNA instance

        # If a single RNA exists with the given accession number, create the new RNACrossReference
        # object with the right value and and it to the RNA cross reference list
        if RNA_list != None and len(RNA_list) == 1:
            RNA = RNA_list[0]
            self.sourceDB = db_source
            self.crossReferenceID = cross_reference
            RNA.add_cross_reference(self)
            sql_session.add(RNA)
        # If several RNAs are found with the accession number raise an issue
        # If no RNAs are found, raise a NotRequiredInstantiationException to indicate
        # the new RNACrossReference object does not have to be inserted in DB.
        else:
            if len(RNA_list) > 1:
                raise RainetException(
                    "RNACrossReference.init : Abnormal number of RNAs found for accession number '"
                    + RNA_acc + "' : " + str(len(RNA_list)) + " RNAs found.")
            else:
                raise NotRequiredInstantiationException(
                    "RNACrossReference.init : No Corresponding RNA found in Database."
                )
Exemplo n.º 16
0
 def execute( self ):
     
     # Get the option values
     self.DBPath = OptionManager.get_instance().get_option( OptionConstants.OPTION_DB_NAME )
     query_file_path = OptionManager.get_instance().get_option( OptionConstants.OPTION_QUERY_FILE )
     species = OptionManager.get_instance().get_option( OptionConstants.OPTION_SPECIES )
     
     # Check if a species has been provided
     if species == None or len( species) == 0:
         raise RainetException( "InteractiveQueryStrategy.execute: You must specify a species in your command line. Please check help.")
     
     # Build a SQL session to DB
     SQLManager.get_instance().set_DBpath( self.DBPath)
     sql_session = SQLManager.get_instance().get_session()
     
     # Check if the species in the DB correspond to the species given by the user
     SQLManager.check_species(sql_session, species, True)
     
     # Read the query from file
     Logger.get_instance().info("InteractiveQueryStrategy.execute : Reading query...")
     query_string = self.read_query( query_file_path)
     
     # Get the result of the query on DB
     Logger.get_instance().info("InteractiveQueryStrategy.execute : Querying database...")
     query_result = self.perform_query( sql_session, query_string)
     
     # Export query result to file
     Logger.get_instance().info("InteractiveQueryStrategy.execute : Exporting result...")
     self.export_query_result( query_result, query_file_path)
     
     Logger.get_instance().info("InteractiveQueryStrategy.execute : Finished.")
Exemplo n.º 17
0
    def initialize(self):

        # Get the main keyword that defines the strategy
        self.strategy = sys.argv[1]
        Logger.get_instance().info("Chosen strategy = " + self.strategy)

        # If the strategy is not known, raise an exception
        if self.strategy not in OptionConstants.STRATEGIES_LIST:
            raise RainetException(
                "OptionManager.initialize() : The main keyword is not correct : '"
                + self.strategy + "'. Should be one of " +
                str(OptionConstants.STRATEGIES_LIST))

        # Build an option parser to collect the option values
        option_parser = OptionParser()
        for current_prop_list in OptionConstants.OPTION_LIST[self.strategy]:
            option_parser.add_option(current_prop_list[0],
                                     current_prop_list[1],
                                     action=current_prop_list[2],
                                     type=current_prop_list[3],
                                     dest=current_prop_list[4],
                                     default=current_prop_list[5],
                                     help=current_prop_list[6])

        # Get the various option values into a dictionary
        (opts, args) = option_parser.parse_args()
        self.optionDict = vars(opts)
Exemplo n.º 18
0
    def read_expression_file(self):

        # E.g.: Q7RTM1  ENST00000437598 0.013

        interactingPairs = {
        }  # key -> pair of transcriptID and proteinID, val -> score

        proteinSet = set()

        countLines = 0

        outFile = open(run.outputFolder + "/scores.tsv", "w")
        outFile.write("pairID\tcatrapid_score\tin_validated_set\n")

        text = ""

        with open(self.catrapidFile, "r") as f:
            for line in f:
                spl = line.split("\t")

                countLines += 1

                if countLines % 1000000 == 0:
                    print "Processed %s interactions" % countLines

                    outFile.write(text)

                    interactingPairs = {}
                    text = ""

                proteinID = spl[0]
                transcriptID = spl[1]
                intScore = float(spl[2])

                pair = transcriptID + "|" + proteinID

                proteinSet.add(proteinID)

                # add pair to interacting pairs and keep the maximum interaction score
                if pair not in interactingPairs:
                    interactingPairs[pair] = intScore
                else:
                    raise RainetException("Repeated protein-RNA pair: " + line)

                if pair in self.npinterPairs:
                    inValidated = 1
                else:
                    inValidated = 0

                text += "%s\t%s\t%s\n" % (pair, intScore, inValidated)

        outFile.write(text)
        outFile.close()

        print "read_expression_file: Number of proteins: ", len(proteinSet)
        print "read_expression_file: Number of protein-RNA pairs in file: ", len(
            interactingPairs)

        return interactingPairs
Exemplo n.º 19
0
    def rollback_session(self):

        try:
            self.session.rollback()
        except exc.SQLAlchemyError as sqle:
            raise RainetException(
                "SQLManager.rollback : An error occurred while rollbacking the session.",
                sqle)
Exemplo n.º 20
0
 def find_unique_protein_from_uniprotAC( uniprot_ac ):
     
     sql_session = SQLManager.get_instance().get_session()
     
     protein_list = sql_session.query( Protein ).filter( or_( Protein.uniprotAC == uniprot_ac, Protein.uniprotID == uniprot_ac) ).all()
     # If the protein list is correct (one result), build the relationship between the Protein and the ProteinIsoform
     if protein_list != None and len( protein_list ) != 0:
         if len( protein_list ) == 1:
             protein = protein_list[0]
             if protein != None:
                 return protein
             else:
                 raise RainetException( "SQLUtil.find_unique_protein_from_uniprotAC: Found Proteins with uniprotAC= " + uniprot_ac + " is None." )
         else:
             raise RainetException( "SQLUtil.find_unique_protein_from_uniprotAC: Several Proteins found with uniprotAC= " + uniprot_ac + " : " + str( len( protein_list ) ) + " Proteins" )
     else:
         return None
Exemplo n.º 21
0
    def store_data(self, keyword, data):

        try:
            keyword = str(keyword)
        except:
            raise RainetException(
                "DataManager.get_data : keyword must be a string.")

        self.data[keyword] = data
Exemplo n.º 22
0
    def associate_proteins(module, line):

        token_list = line.split(" ")

        sql_session = SQLManager.get_instance().get_session()

        if token_list != None and len(token_list) > 0:
            for token in token_list:
                # Get the protein with the token as ID (uniprotAD or cross reference)
                # Search first in Protein table both with Uniprot AC and Uniprot ID
                # then with CrossReference
                protein_list = sql_session.query(Protein).filter(
                    or_(Protein.uniprotID == token,
                        Protein.uniprotAC == token)).all()
                if protein_list == None or len(protein_list) == 0:
                    protein_list = sql_session.query(Protein).filter(
                        Protein.uniprotAC == ProteinCrossReference.protein_id,
                        ProteinCrossReference.crossReferenceID == token).all()
                # Check if a single Protein is found. If not, raise an issue or report problem
                protein = None
                if protein_list != None and len(protein_list) > 0:
                    if len(protein_list) == 1:
                        protein = protein_list[0]
                        sql_session.add(protein)
                    else:
                        raise RainetException(
                            "NetworkModuleParser.associate_proteins : Abnormal number of Protein found for ID = "
                            + token + " : " + str(len(protein_list)))
                else:
                    Logger.get_instance().debug(
                        "NetworkModuleParser.associate_proteins : No Protein found for ID = "
                        + token)
                    continue
                # if a protein is found, associate it to the module
                if protein != None:
                    module.add_associated_protein(protein)
                else:
                    raise RainetException(
                        "NetworkModuleParser.associate_proteins : Protein found is none for ID = "
                        + token)
        else:
            raise RainetException(
                "NetworkModuleParser.associate_proteins : list of proteins is empty or None : "
                + line)
Exemplo n.º 23
0
    def read_catrapid_file(self):

        interactingPairs = {
        }  # key -> pair of transcriptID and proteinID, val -> maximum interaction score

        #e.g. 1       1       ENSP00000269701_ENST00000456726 -266.23 0.986

        peptideIDNotFound = set()
        proteinSet = set()

        countLines = 0

        with open(self.catrapidFile, "r") as f:
            for line in f:
                spl = line.split("\t")

                countLines += 1

                if countLines % 10000000 == 0:
                    print "Processed %s interactions" % countLines

                splIDs = spl[2].split("_")
                peptideID = splIDs[0]
                transcriptID = splIDs[1]
                intScore = float(spl[3])

                if peptideID in self.xrefDict:
                    proteinID = self.xrefDict[peptideID]
                    if len(proteinID) == 1:
                        #proteinID = next( iter( proteinID))
                        proteinID, = proteinID  # unpacking set
                    else:
                        raise RainetException(
                            "ENSP should point to a single UniProtID: " + line)
                else:
                    #print "read_catrapid_file: PeptideID not found in RAINET database: ", peptideID
                    peptideIDNotFound.add(peptideID)
                    continue

                pair = transcriptID + "|" + proteinID

                proteinSet.add(proteinID)

                # add pair to interacting pairs and keep the maximum interaction score
                if pair not in interactingPairs:
                    interactingPairs[pair] = float("-inf")
                if intScore > interactingPairs[pair]:
                    interactingPairs[pair] = intScore

        print "read_catrapid_file: Number of peptideIDs not found in RAINET DB: ", len(
            peptideIDNotFound)  # for old catRAPID dataset, 243 is expected
        print "read_catrapid_file: Number of proteins: ", len(proteinSet)
        print "read_catrapid_file: Number of protein-RNA pairs in catRAPID: ", len(
            interactingPairs)

        return interactingPairs
Exemplo n.º 24
0
    def create_object_from_tsv(self, parameter_value_list):
        # Test if the parameter list contains elements
        if parameter_value_list == None or len(parameter_value_list) == 0:
            raise RainetException(
                "DataFactory.create_object_from_tsv: No value provided for the object creation of class "
                + self.className)

        #=======================================================================
        # Build the code (introspection) for constructor call
        #=======================================================================
        # -- start with the variable that will receive the new instance
#         constructor_command = "new_instance = "
        constructor_command = ""
        # -- add the class name with one parenthesis
        constructor_command += self.className + "("
        # -- add the list of parameters separated by a comma
        for parameter_value in parameter_value_list:
            constructor_command += "\"" + str(
                parameter_value.replace("\"", "\\\"")) + "\","
        # -- remove the last comma
        constructor_command = constructor_command[:-1]
        # -- add the final closing parenthesis
        constructor_command += ")"

        #=======================================================================
        # Evaluate the constructor command and add the object to SQLAlchemy session if required
        #=======================================================================
        new_instance = None
        try:
            Logger.get_instance().debug("command = " + constructor_command)
            new_instance = eval(constructor_command)
            new_instance.add_to_session()
        # Possibly the object does not have to be created because it is already in database
        # In that case, a NotRequiredInstantiationException is raised by the class constructor
        except NotRequiredInstantiationException:
            pass
        # If any other Exception occurred, something went wrong
        except Exception as excep:
            raise RainetException(
                "DataFactory.create_object_from_tsv : An issue occurred in the object creation.",
                excep)

        return new_instance
Exemplo n.º 25
0
    def open_text_r(path):
        try:
            file_handle = open(path, 'r')
        except IOError as ioe:
            Logger.get_instance().critical\
            ("FileUtils.open_text_r : IOError:  Unable to open '" + path + "' : " + str(ioe))
            raise RainetException(
                "Unable to open file '" + path + "' : " + str(ioe), ioe)

        return file_handle
Exemplo n.º 26
0
    def launch_insertion_TSV(self,
                             file_path,
                             has_headers,
                             headers,
                             class_name,
                             params,
                             default_values,
                             comment_char,
                             table_extension="",
                             clean_table=True):

        composite_table_name = class_name + table_extension

        try:
            # Start timing
            Timer.get_instance().step("Inserting " + composite_table_name +
                                      ":")
            # Check if the insertion is required not not, depending on the fact the data
            # were already inserted and theforceOverride option
            if SQLUtil.insert_data_required(class_name, self.forceOverride,
                                            table_extension):
                Logger.get_instance().info("|--Starting insertion...")
                status = TSVParser.parse_file(file_path, has_headers, headers,
                                              class_name, params,
                                              default_values, comment_char,
                                              clean_table)
            else:
                Logger.get_instance().info(
                    "|--Data already inserted: insertion bypassed.")
                status = None
        # If an exception is raised by controlled code, status of insertion is set to "Rainet Error"
        except RainetException as re:
            Logger.get_instance().error(re.to_string())
            status = Constants.STATUS_RAINET_ERROR
            raise re
        # If an exception is raised by uncontrolled code, status of insertion is set to "Error"
        except Exception as e:
            Logger.get_instance().error(e.message)
            status = Constants.STATUS_ERROR
            raise RainetException(
                "Abnormal Exception during insertion of " + class_name, e)
        # In all case insert or update the status of the insertion to TableStatus table in DB
        finally:
            if status != None:
                sql_session = SQLManager.get_instance().get_session()
                db_status_list = sql_session.query(TableStatus).filter(
                    TableStatus.tableName == composite_table_name).all()
                if db_status_list == None or len(db_status_list) == 0:
                    sql_session.add(
                        TableStatus(composite_table_name, status, file_path))
                else:
                    db_status = db_status_list[0]
                    db_status.tableStatus = status
                    sql_session.add(db_status)
                SQLManager.get_instance().commit()
Exemplo n.º 27
0
    def commit(self):

        try:
            self.session.commit()
        except exc.SQLAlchemyError as sqle:
            self.rollback_session()
            raise RainetException(
                "SQLManager.commit : An error occurred while committing the session.",
                sqle)
        finally:
            self.close_session()
Exemplo n.º 28
0
 def perform_query(self, sql_session, query_string):
     
     full_query = 'sql_session.' + query_string
     Logger.get_instance().info( "InteractiveQueryStrategy.perform_query : query is '" + full_query + "'")
     
     try:
         query_result = eval( full_query)
     except Exception as ex:
         raise RainetException( "InteractiveQueryStrategy.perform_query : Exception occurred during query on DB", ex)
     
     return query_result
Exemplo n.º 29
0
    def get_property(self, property_name, not_none=False):

        property_name_lower = property_name.lower()
        if property_name_lower in self.propertiesDict.keys():
            return self.propertiesDict[property_name_lower]
        else:
            if not_none:
                raise RainetException(
                    "PropertyManager.get_property : No property with name : " +
                    property_name_lower + ". Check your '.ini' file")
            else:
                return None
Exemplo n.º 30
0
    def prepare_t_test(self, proteinStats):

        #proteinStats has stats for protein, one key for lncRNA, other for mRNA, a list with all scores for each

        if "LncRNA" not in proteinStats:
            raise RainetException(
                "prepare_t_test : protein has no LncRNA targets %s" %
                (proteinStats))
        if "MRNA" not in proteinStats:
            raise RainetException(
                "prepare_t_test : protein has no MRNA targets %s" %
                (proteinStats))

        meanLncRNA = "%.2f" % numpy.mean(proteinStats["LncRNA"])
        meanMRNA = "%.2f" % numpy.mean(proteinStats["MRNA"])

        tTest = self.t_test(proteinStats["LncRNA"], proteinStats["MRNA"])
        statistic = "%.2f" % tTest[0]
        pvalue = tTest[1]  # "%.1e" % tTest[1]

        return meanLncRNA, meanMRNA, statistic, pvalue