Exemplo n.º 1
0
 def __init__(self, file_name, significance=None):
     """ Prepares a ResultFileReader to read such from a `file_name`.
         If we specify a certain `significance`, we will only consider
         those contents with a pvalue less than this `significance`.
     """
     self.file = IndexedReadOnlyFile(file_name, r"^\w+")
     self.keys = self.file.get_keys()
     self.pval_regex = re.compile(r"^\d")
     self.go_names = dict()
     if significance is not None:
         self.alpha = significance
     else:
         self.alpha = 1000.0
Exemplo n.º 2
0
 def __init__(self, file_name, significance=None):
     """ Prepares a ResultFileReader to read such from a `file_name`.
         If we specify a certain `significance`, we will only consider
         those contents with a pvalue less than this `significance`.
     """
     self.file = IndexedReadOnlyFile(file_name, "^\w+")
     self.keys = self.file.get_keys()
     self.pval_regex = re.compile("^\d")
     self.go_names = dict()
     if significance is not None:
         self.alpha = significance
     else:
         self.alpha = 1000.0
Exemplo n.º 3
0
class ResultFileReader(object):
    """Class implementing a parser of the
    GFam result file, i.e., a text file with the
    following format:

    protein_id
        p-value: GO:XXXXXXX (function)
        p-value: GO:XXXXXXX (function)
        ...

    protein_id2
        p-value: ...
        ...
    """
    def __init__(self, file_name, significance=None):
        """ Prepares a ResultFileReader to read such from a `file_name`.
            If we specify a certain `significance`, we will only consider
            those contents with a pvalue less than this `significance`.
        """
        self.file = IndexedReadOnlyFile(file_name, "^\w+")
        self.keys = self.file.get_keys()
        self.pval_regex = re.compile("^\d")
        self.go_names = dict()
        if significance is not None:
            self.alpha = significance
        else:
            self.alpha = 1000.0

    def get_keys(self):
        """ Return the set of protein-ids of the file
        """
        return self.keys

    def get_result_as_dict(self):
        """ Retrieves the whole dataset as a dictionary.
            Not recommended if the file is too large.
        """
        d = dict()
        for key in self.keys:
            d[key] = self.__getitem__(key)
        return d

    def __getitem__(self, key):
        """ Gets the set of GO terms and p-values for a
            certain key (which should be a real key in
            the file).
        """
        list_go_terms = []
        for line in [l for l in self.file[key] if self.pval_regex.match(l)]:
            pvalue, goterm = line.split(' ',2)[0:2]
            pvalue = float(pvalue[0:-1])
            if pvalue < self.alpha:
                list_go_terms.append((goterm, pvalue))
                if goterm not in self.go_names:
                    name = line.split('(', 1)[1][0:-1]
                    self.go_names[goterm] = name
        return list_go_terms

    def get_go_names(self):
        if not self.go_names:
            for key in self.keys:
                self.__getitem__(key)            
        return self.go_names
Exemplo n.º 4
0
class ResultFileReader(object):
    """Class implementing a parser of the
    GFam result file, i.e., a text file with the
    following format:

    protein_id
        p-value: GO:XXXXXXX (function)
        p-value: GO:XXXXXXX (function)
        ...

    protein_id2
        p-value: ...
        ...
    """
    def __init__(self, file_name, significance=None):
        """ Prepares a ResultFileReader to read such from a `file_name`.
            If we specify a certain `significance`, we will only consider
            those contents with a pvalue less than this `significance`.
        """
        self.file = IndexedReadOnlyFile(file_name, r"^\w+")
        self.keys = self.file.get_keys()
        self.pval_regex = re.compile(r"^\d")
        self.go_names = dict()
        if significance is not None:
            self.alpha = significance
        else:
            self.alpha = 1000.0

    def get_keys(self):
        """ Return the set of protein-ids of the file
        """
        return self.keys

    def get_result_as_dict(self):
        """ Retrieves the whole dataset as a dictionary.
            Not recommended if the file is too large.
        """
        results = dict()
        for key in self.keys:
            results[key] = self.__getitem__(key)
        return results

    def __getitem__(self, key):
        """ Gets the set of GO terms and p-values for a
            certain key (which should be a real key in
            the file).
        """
        list_go_terms = []
        for line in [l for l in self.file[key] if self.pval_regex.match(l)]:
            pvalue, goterm = line.split(' ', 2)[0:2]
            pvalue = float(pvalue[0:-1])
            if pvalue < self.alpha:
                list_go_terms.append((goterm, pvalue))
                if goterm not in self.go_names:
                    name = line.split('(', 1)[1][0:-1]
                    self.go_names[goterm] = name
        return list_go_terms

    def get_go_names(self):
        if not self.go_names:
            for key in self.keys:
                self.__getitem__(key)
        return self.go_names