Пример #1
0
    def signal_diff(self, scale, feature_ids=False):
        # TODO scale
        num_scales = 19

        if not(feature_ids):
            feat_vec = numpy.zeros(num_scales)
            for index in xrange(num_scales):
                scale = sequtil.get_georgiev_scales()[index]
                feat_vec[index] = self.mutation_signal_distance(scale)
            return feat_vec
        else:
            ids = ['%i' % (i) for i in xrange(num_scales)]
            names = ['Georgiev %i signal difference' % (i)
                     for i in xrange(num_scales)]
            return (ids, names)
Пример #2
0
    def _parse_scales(self, scales):

        if (type(scales) == str):
            try:
                scales = int(scales)
            except ValueError:
                pass

        # retrieve the set of Georgiev aa scales
        if (scales == 'gg'):
            scale_list = sequtil.get_georgiev_scales()
            scale_ids = ['gg%i' % (i) for i in xrange(1, len(scale_list) + 1)]
            scale_names = [
                'Georgiev scale %i' % (i)
                for i in xrange(1,
                                len(scale_list) + 1)
            ]

        # retrieve AAIndex scale with index scales
        elif (type(scales) == int):
            scale_list = [sequtil.get_aaindex_scale(scales)]
            scale_ids = ['aai%i' % (scales)]
            scale_names = ['amino acid index %i' % (scales)]

        # retrieve list of pseaac scale
        elif (scales[0] == 'p' and len(scales) > 2):
            scale_indices = [int(i) for i in scales.split('p')[1:]]
            scale_list = scale_indices
            scale_ids = ['pseaac%i' % (i + 1) for i in scale_indices]
            scale_names = ['PseAAC scale %i' % (i + 1) for i in scale_indices]

        # retrieve pseaac scale
        elif (scales[0] == 'p'):
            scale_index = int(scales[1:])
            scale_list = [int(scales[1:])]
            scale_ids = ['pseaac%i' % (scale_index + 1)]
            scale_names = ['PseAAC scale %i' % (scale_index + 1)]

        # retrieve list of AAIndex scales... (still used somewhere?)
        elif (type(scales) == list and all([type[i] == int for i in scales])):
            scale_list = [sequtil.get_aaindex_scale(i) for i in scales]
            scale_ids = ['aai%i' % (i) for i in scales]
            scale_names = ['amino acid index %i' % (i) for i in scales]

        else:
            raise ValueError('Incorrect scale provided: %s\n' % (str(scales)))

        return (scale_list, scale_ids, scale_names)
Пример #3
0
    def msa_signal_diff(self, scale, feature_ids=False):
        # TODO scale
        num_scales = 19

        if not(feature_ids):
            feat_vec = numpy.zeros(num_scales)
            for index in xrange(num_scales):
                scale = sequtil.get_georgiev_scales()[index]
                feat_vec[index] = self.min_signal_dist_to_msa(scale)
            return feat_vec

        else:
            ids = ['%i' % (i) for i in xrange(num_scales)]
            names = ['Georgiev %i signal dist. to msa variability' % (i)
                     for i in xrange(num_scales)]
            return (ids, names)
Пример #4
0
    def _parse_scales(self, scales):

        if(type(scales) == str):
            try:
                scales = int(scales)
            except ValueError:
                pass

        # retrieve the set of Georgiev aa scales
        if(scales == 'gg'):
            scale_list = sequtil.get_georgiev_scales()
            scale_ids = ['gg%i' % (i) for i in xrange(1, len(scale_list) + 1)]
            scale_names = ['Georgiev scale %i' % (i)
                           for i in xrange(1, len(scale_list) + 1)]

        # retrieve list of pseaac scale
        elif(scales[0] == 'p' and len(scales) > 2):
            scale_indices = [int(i) for i in scales.split('p')[1:]]
            scale_list = scale_indices
            scale_ids = ['pseaac%i' % (i + 1) for i in scale_indices]
            scale_names = ['PseAAC scale %i' % (i + 1) for i in scale_indices]

        # retrieve pseaac scale
        elif(scales[0] == 'p'):
            scale_index = int(scales[1:])
            scale_list = [int(scales[1:])]
            scale_ids = ['pseaac%i' % (scale_index + 1)]
            scale_names = ['PseAAC scale %i' % (scale_index + 1)]

        # retrieve AAIndex scale with index scales
        elif(type(scales) == int):
            scale_list = [sequtil.get_aaindex_scale(scales)]
            scale_ids = ['aai%i' % (scales)]
            scale_names = ['amino acid index %i' % (scales)]

        # retrieve list of AAIndex scales... (still used somewhere?)
        elif(type(scales) == list and all([type[i] == int for i in scales])):
            scale_list = [sequtil.get_aaindex_scale(i) for i in scales]
            scale_ids = ['aai%i' % (i) for i in scales]
            scale_names = ['amino acid index %i' % (i) for i in scales]

        else:
            raise ValueError('Incorrect scale provided: %s\n' % (str(scales)))

        return (scale_list, scale_ids, scale_names)
Пример #5
0
    def signal_auc(self, scale, env_window=21, sig_window=9, edge=1.0,
                   threshold=1.5, below_threshold=False,
                   feature_ids=False):
        # TODO scale
        num_scales = 19

        if not(feature_ids):
            feat_vec = numpy.zeros(num_scales)

            for index in xrange(num_scales):
                scale = sequtil.get_georgiev_scales()[index]
                auc = self.environment_signal_peak_area(
                    env_window, scale, sig_window, edge, threshold,
                    below_threshold)
                # anscombe transform (~poissos --> ~normal)
                feat_vec[index] = 2 * numpy.sqrt(auc + (3.0 / 8.0))
            return feat_vec
        else:
            ids = ['%i' % (i) for i in xrange(num_scales)]
            names = ['Georgiev %i signal ew%i sw%i e%.2f th%.2f' %
                     (i, env_window, sig_window, edge, threshold)
                     for i in xrange(num_scales)]
            return (ids, names)