def find_nearest_features(peaks, features, distance=None, tss_only=False, only_differentially_expressed=False): """ Locate nearest features for each peak Arguments: features (FeatureList): list of features peaks (PeakList): list of peaks distance (int): optional cut-off distance to apply tss_only (bool): only consider distances from the feature TSS (default is to consider distances from both the TSS and TES) only_differentially_expressed (bool): only consider features that are flagged as differentially expressed pad (bool): add extra 'None' items to output FeatureSet so that it contains max_closest results Yields: tuple: Peak object and a FeatureSet object with the nearest features, for each peak in the input PeakSet """ # Find nearest features to each peak for peak in peaks: # Only consider features on same chromosome feature_list = features.filterByChr(peak.chrom) # Differentially-expressed features only? if only_differentially_expressed: feature_list = feature_list.filterByFlag(1) if tss_only: sort_features_by_tss_distances(peak, feature_list) else: sort_features_by_edge_distances(peak, feature_list) # Apply distance cut-off if distance is not None: closest = FeatureSet() for feature in feature_list: if tss_only: if distances.distance_tss(peak, feature) > distance: break else: if distances.distance_closest_edge(peak, feature) > distance: break closest.addFeature(feature) feature_list = closest # Return at least one (null) result if not feature_list: feature_list.addFeature(None) # Return result yield (peak, feature_list)
def find_nearest_features(peaks,features,distance=None,tss_only=False, only_differentially_expressed=False): """ Locate nearest features for each peak Arguments: features (FeatureList): list of features peaks (PeakList): list of peaks distance (int): optional cut-off distance to apply max_closest (int): optional maximum number of peaks to find per feature tss_only (bool): only consider distances from the feature TSS (default is to consider distances from both the TSS and TES) only_differentially_expressed (bool): only consider features that are flagged as differentially expressed pad (bool): add extra 'None' items to output FeatureSet so that it contains max_closest results Yields: tuple: Peak object and a FeatureSet object with the nearest features, for each peak in the input PeakSet """ # Find nearest features to each peak for peak in peaks: # Only consider features on same chromosome feature_list = features.filterByChr(peak.chrom) # Differentially-expressed features only? if only_differentially_expressed: feature_list = feature_list.filterByFlag(1) if tss_only: sort_features_by_tss_distances(peak,feature_list) else: sort_features_by_edge_distances(peak,feature_list) # Apply distance cut-off if distance is not None: closest = FeatureSet() for feature in feature_list: if tss_only: if distances.distance_tss(peak,feature) > distance: break else: if distances.distance_closest_edge(peak,feature) > distance: break closest.addFeature(feature) feature_list = closest # Return at least one (null) result if not feature_list: feature_list.addFeature(None) # Return result yield (peak,feature_list)
def find_nearest_peaks(features,peaks,distance=None,tss_only=False, only_differentially_expressed=False): """ Locate nearest peaks for each feature Arguments: features (FeatureList): list of features peaks (PeakList): list of peaks distance (int): optional cut-off distance to apply tss_only (bool): only consider distances from the feature TSS (default is to consider distances from both the TSS and TES) only_differentially_expressed (bool): only consider features that are flagged as differentially expressed Yields: tuple: Feature object and a PeakSet object with the nearest peaks, for each feature in the input FeatureSet """ # Reduce to set of differentially expressed features if only_differentially_expressed: features = features.filterByFlag(1) # Find nearest peaks for each feature for feature in features: # Only consider peaks on same chromosome peak_list = peaks.filterByChr(feature.chrom) # Sort into distance order if tss_only: sort_peaks_by_tss_distances(feature,peak_list) else: sort_peaks_by_edge_distances(feature,peak_list) # Apply distance cut-off if distance is not None: closest = PeakSet() for peak in peak_list: if distances.distance_tss(peak,feature) > distance: break closest.addPeak(peak) peak_list = closest # Return at least one (null) result if not peak_list: peak_list.addPeak(None) # Return results yield (feature,peak_list)
def _value_for(self,attr): """ Return the value for the specified attribute Given the name of a field/attribute (see above for a list and definition of each), return the value for the current peak/feature pair (which should have been set by the calling method in the '_context_peak' and '_context_feature' properties). Arguments: attr (string): attribute name Returns: Value of the field for the current peak/feature pair Raises: AttributeError: if valid ``attr`` cannot be derived KeyError: if ``attr`` is not a recognised attribute name """ peak = self._context_peak feature = self._context_feature is_features = self._is_features if attr == 'peak.id': return peak.id elif attr == 'chr' or attr == 'peak.chr': return peak.chrom elif attr == 'peak.start' or attr == 'start': return peak.start elif attr == 'peak.end' or attr == 'end': return peak.end elif attr == 'id' or attr == 'feature.id': return feature.id elif attr == 'feature.chr': return feature.chrom elif attr == 'feature.start': return feature.start elif attr == 'feature.end': return feature.end elif attr == 'TSS': return feature.tss elif attr == 'TES': return feature.tes elif attr == 'strand' or attr == 'feature.strand': return feature.strand elif attr == 'differentially_expressed': return feature.flag elif attr == 'dist_closest': return distances.distance_closest_edge(peak,feature) elif attr == 'dist_TSS': return distances.distance_tss(peak,feature) elif attr == 'dist_TES': return distances.distance_tes(peak,feature) elif attr == 'overlap_feature' or attr == 'in_the_feature': if distances.regions_overlap((peak.start,peak.end), (feature.tss,feature.tes)): overlap_feature = 1 else: overlap_feature = 0 if attr == 'in_the_feature': overlap_feature = ('YES' if overlap_feature == 1 else 'NO') return overlap_feature elif attr == 'overlap_promoter': if self._promoter_region is not None: promoter = feature.getPromoterRegion(*self._promoter_region) if distances.regions_overlap((peak.start,peak.end), promoter): overlap_promoter = 1 else: overlap_promoter = 0 else: raise Exception("'overlap_promoter' requested but no " "promoter region has been defined") return overlap_promoter elif attr == 'direction': if self._is_features: direction = distances.direction(feature,peak) else: direction = distances.direction(peak,feature) if direction == distances.UPSTREAM: return 'U' elif direction == distances.DOWNSTREAM: return 'D' else: return '.' elif attr == 'features_inbetween': raise NotImplementedError("'features_inbetween' not implemented") else: raise KeyError("Unrecognised report field: '%s'" % attr)
def _value_for(self,attr): """ Return the value for the specified attribute Given the name of a field/attribute (see above for a list and definition of each), return the value for the current peak/feature pair (which should have been set by the calling method in the '_context_peak' and '_context_feature' properties). Arguments: attr (string): attribute name Returns: Value of the field for the current peak/feature pair Raises: AttributeError: if valid ``attr`` cannot be derived KeyError: if ``attr`` is not a recognised attribute name """ peak = self._context_peak feature = self._context_feature extra_data = self._extra_data is_features = self._is_features if attr == 'peak.id': return peak.id elif attr == 'chr' or attr == 'peak.chr': return peak.chrom elif attr == 'peak.start' or attr == 'start': return peak.start elif attr == 'peak.end' or attr == 'end': return peak.end elif attr == 'peak.file': return peak.source_file elif attr == 'id' or attr == 'feature.id': return feature.id elif attr == 'feature.chr': return feature.chrom elif attr == 'feature.start': return feature.start elif attr == 'feature.end': return feature.end elif attr == 'feature.file': return feature.source_file elif attr == 'TSS': return feature.tss elif attr == 'TES': return feature.tes elif attr == 'strand' or attr == 'feature.strand': return feature.strand elif attr == 'differentially_expressed': return feature.flag elif attr == 'dist_closest': return distances.distance_closest_edge(peak,feature) elif attr == 'dist_TSS': return distances.distance_tss(peak,feature) elif attr == 'dist_TES': return distances.distance_tes(peak,feature) elif attr == 'overlap_feature' or attr == 'in_the_feature': if distances.regions_overlap((peak.start,peak.end), (feature.tss,feature.tes)): overlap_feature = 1 else: overlap_feature = 0 if attr == 'in_the_feature': overlap_feature = ('YES' if overlap_feature == 1 else 'NO') return overlap_feature elif attr == 'overlap_promoter': if self._promoter_region is not None: promoter = feature.getPromoterRegion(*self._promoter_region) if distances.regions_overlap((peak.start,peak.end), promoter): overlap_promoter = 1 else: overlap_promoter = 0 else: raise Exception("'overlap_promoter' requested but no " "promoter region has been defined") return overlap_promoter elif attr == 'direction': if self._is_features: direction = distances.direction(feature,peak) else: direction = distances.direction(peak,feature) if direction == distances.UPSTREAM: return 'U' elif direction == distances.DOWNSTREAM: return 'D' else: return '.' elif attr == 'features_inbetween': raise NotImplementedError("'features_inbetween' not implemented") else: # Check extra data items try: return extra_data[attr] except KeyError: raise KeyError("Unrecognised report field: '%s'" % attr)