def signal_diff(self, scale, feature_ids=False): # TODO scale num_scales = 19 if not(feature_ids): feat_vec = numpy.zeros(num_scales) for index in xrange(num_scales): scale = sequtil.get_georgiev_scales()[index] feat_vec[index] = self.mutation_signal_distance(scale) return feat_vec else: ids = ['%i' % (i) for i in xrange(num_scales)] names = ['Georgiev %i signal difference' % (i) for i in xrange(num_scales)] return (ids, names)
def _parse_scales(self, scales): if (type(scales) == str): try: scales = int(scales) except ValueError: pass # retrieve the set of Georgiev aa scales if (scales == 'gg'): scale_list = sequtil.get_georgiev_scales() scale_ids = ['gg%i' % (i) for i in xrange(1, len(scale_list) + 1)] scale_names = [ 'Georgiev scale %i' % (i) for i in xrange(1, len(scale_list) + 1) ] # retrieve AAIndex scale with index scales elif (type(scales) == int): scale_list = [sequtil.get_aaindex_scale(scales)] scale_ids = ['aai%i' % (scales)] scale_names = ['amino acid index %i' % (scales)] # retrieve list of pseaac scale elif (scales[0] == 'p' and len(scales) > 2): scale_indices = [int(i) for i in scales.split('p')[1:]] scale_list = scale_indices scale_ids = ['pseaac%i' % (i + 1) for i in scale_indices] scale_names = ['PseAAC scale %i' % (i + 1) for i in scale_indices] # retrieve pseaac scale elif (scales[0] == 'p'): scale_index = int(scales[1:]) scale_list = [int(scales[1:])] scale_ids = ['pseaac%i' % (scale_index + 1)] scale_names = ['PseAAC scale %i' % (scale_index + 1)] # retrieve list of AAIndex scales... (still used somewhere?) elif (type(scales) == list and all([type[i] == int for i in scales])): scale_list = [sequtil.get_aaindex_scale(i) for i in scales] scale_ids = ['aai%i' % (i) for i in scales] scale_names = ['amino acid index %i' % (i) for i in scales] else: raise ValueError('Incorrect scale provided: %s\n' % (str(scales))) return (scale_list, scale_ids, scale_names)
def msa_signal_diff(self, scale, feature_ids=False): # TODO scale num_scales = 19 if not(feature_ids): feat_vec = numpy.zeros(num_scales) for index in xrange(num_scales): scale = sequtil.get_georgiev_scales()[index] feat_vec[index] = self.min_signal_dist_to_msa(scale) return feat_vec else: ids = ['%i' % (i) for i in xrange(num_scales)] names = ['Georgiev %i signal dist. to msa variability' % (i) for i in xrange(num_scales)] return (ids, names)
def _parse_scales(self, scales): if(type(scales) == str): try: scales = int(scales) except ValueError: pass # retrieve the set of Georgiev aa scales if(scales == 'gg'): scale_list = sequtil.get_georgiev_scales() scale_ids = ['gg%i' % (i) for i in xrange(1, len(scale_list) + 1)] scale_names = ['Georgiev scale %i' % (i) for i in xrange(1, len(scale_list) + 1)] # retrieve list of pseaac scale elif(scales[0] == 'p' and len(scales) > 2): scale_indices = [int(i) for i in scales.split('p')[1:]] scale_list = scale_indices scale_ids = ['pseaac%i' % (i + 1) for i in scale_indices] scale_names = ['PseAAC scale %i' % (i + 1) for i in scale_indices] # retrieve pseaac scale elif(scales[0] == 'p'): scale_index = int(scales[1:]) scale_list = [int(scales[1:])] scale_ids = ['pseaac%i' % (scale_index + 1)] scale_names = ['PseAAC scale %i' % (scale_index + 1)] # retrieve AAIndex scale with index scales elif(type(scales) == int): scale_list = [sequtil.get_aaindex_scale(scales)] scale_ids = ['aai%i' % (scales)] scale_names = ['amino acid index %i' % (scales)] # retrieve list of AAIndex scales... (still used somewhere?) elif(type(scales) == list and all([type[i] == int for i in scales])): scale_list = [sequtil.get_aaindex_scale(i) for i in scales] scale_ids = ['aai%i' % (i) for i in scales] scale_names = ['amino acid index %i' % (i) for i in scales] else: raise ValueError('Incorrect scale provided: %s\n' % (str(scales))) return (scale_list, scale_ids, scale_names)
def signal_auc(self, scale, env_window=21, sig_window=9, edge=1.0, threshold=1.5, below_threshold=False, feature_ids=False): # TODO scale num_scales = 19 if not(feature_ids): feat_vec = numpy.zeros(num_scales) for index in xrange(num_scales): scale = sequtil.get_georgiev_scales()[index] auc = self.environment_signal_peak_area( env_window, scale, sig_window, edge, threshold, below_threshold) # anscombe transform (~poissos --> ~normal) feat_vec[index] = 2 * numpy.sqrt(auc + (3.0 / 8.0)) return feat_vec else: ids = ['%i' % (i) for i in xrange(num_scales)] names = ['Georgiev %i signal ew%i sw%i e%.2f th%.2f' % (i, env_window, sig_window, edge, threshold) for i in xrange(num_scales)] return (ids, names)