Exemplo n.º 1
0
 def testFindAllClasses(self):
     """
     The find function should be able to find all trig point classes by
     name.
     """
     for klass in ALL_TRIG_CLASSES:
         self.assertIs(klass, findTrigPoint(klass.NAME))
Exemplo n.º 2
0
    def extractNonPairFeatures(self):
        """
        Find all non-pair features in the template.

        @raise ValueError: If a line contains an unknown landmark or trig
            point name or if the sequence for a feature does not match the
            overall full sequence given in the first line of the template
            for the subject/query.
        @return: A generator that yields 2-tuples containing:
            1. A C{str} feature name
            2. A C{light.features.Landmark} or a C{light.features.TrigPoint}
                instance
            as they are found in the template lines.
        """
        for line in self.noPipes[1:]:
            if line.find(',') != -1:
                # There is a comma on the line, so this is a pair of
                # features from the matched region. Ignore it as it will be
                # processed in extractPairedFeatures.
                continue

            featureName = line[:self.indentLength].strip()
            featureStr = line[self.indentLength:]

            landmark = findLandmark(featureName)

            if landmark is None:
                trigPoint = findTrigPoint(featureName)

                if trigPoint is None:
                    raise ValueError('Unknown feature name %r found in %s '
                                     'template' % (featureName, self.TYPE))

            for match in _NONWHITE_REGEXP.finditer(featureStr):
                offset = match.start()
                length = match.end() - offset
                sequence = featureStr[offset:offset + length]
                if sequence != self.read.sequence[offset:offset + length]:
                    raise ValueError(
                        '%s feature sequence %r found in %s template (offset '
                        '%d, length %d) does not match the full sequence for '
                        'the %s at those offsets' %
                        (featureName, sequence, self.TYPE, offset, length,
                         self.TYPE))
                if landmark:
                    yield (featureName,
                           Landmark(landmark.NAME, landmark.SYMBOL, offset,
                                    length))
                else:
                    yield (featureName,
                           TrigPoint(trigPoint.NAME, trigPoint.SYMBOL, offset))
Exemplo n.º 3
0
    def __init__(self,
                 landmarks=None,
                 trigPoints=None,
                 limitPerLandmark=None,
                 maxDistance=None,
                 minDistance=None,
                 distanceBase=None,
                 featureLengthBase=None,
                 randomLandmarkDensity=None,
                 randomTrigPointDensity=None,
                 acAlphaHelixFilename=None,
                 acAlphaHelix310Filename=None,
                 acAlphaHelixCombinedFilename=None,
                 acAlphaHelixPiFilename=None,
                 acExtendedStrandFilename=None):

        # First set the simple scalar parameters.
        self.limitPerLandmark = (self.DEFAULT_LIMIT_PER_LANDMARK
                                 if limitPerLandmark is None else
                                 limitPerLandmark)

        self.maxDistance = (self.DEFAULT_MAX_DISTANCE
                            if maxDistance is None else maxDistance)

        self.minDistance = (self.DEFAULT_MIN_DISTANCE
                            if minDistance is None else minDistance)

        self.distanceBase = (self.DEFAULT_DISTANCE_BASE
                             if distanceBase is None else distanceBase)

        if self.distanceBase <= 0:
            raise ValueError('distanceBase must be > 0.')

        self.featureLengthBase = (self.DEFAULT_FEATURE_LENGTH_BASE
                                  if featureLengthBase is None else
                                  featureLengthBase)

        if self.featureLengthBase <= 0:
            raise ValueError('featureLengthBase must be > 0.')

        self.randomLandmarkDensity = (self.DEFAULT_RANDOM_LANDMARK_DENSITY
                                      if randomLandmarkDensity is None else
                                      randomLandmarkDensity)

        self.randomTrigPointDensity = (self.DEFAULT_RANDOM_TRIG_POINT_DENSITY
                                       if randomTrigPointDensity is None else
                                       randomTrigPointDensity)

        self.acAlphaHelixFilename = (self.DEFAULT_AC_ALPHAHELIX_FILENAME
                                     if acAlphaHelixFilename is None else
                                     acAlphaHelixFilename)

        self.acAlphaHelix310Filename = (
            self.DEFAULT_AC_ALPHAHELIX_3_10_FILENAME
            if acAlphaHelix310Filename is None else acAlphaHelix310Filename)

        self.acAlphaHelixCombinedFilename = (
            self.DEFAULT_AC_ALPHAHELIX_COMBINED_FILENAME
            if acAlphaHelixCombinedFilename is None else
            acAlphaHelixCombinedFilename)

        self.acAlphaHelixPiFilename = (self.DEFAULT_AC_ALPHAHELIX_PI_FILENAME
                                       if acAlphaHelixPiFilename is None else
                                       acAlphaHelixPiFilename)

        self.acExtendedStrandFilename = (
            self.DEFAULT_AC_EXTENDED_STRAND_FILENAME
            if acExtendedStrandFilename is None else acExtendedStrandFilename)

        if landmarks is None:
            landmarkClasses = DEFAULT_LANDMARK_CLASSES
        else:
            landmarkClasses = set()
            for landmark in landmarks:
                if isinstance(landmark, string_types):
                    cls = findLandmark(landmark)
                    if cls:
                        landmarkClasses.add(cls)
                    else:
                        raise ValueError(
                            'Could not find landmark finder class %r.' %
                            landmark)
                else:
                    # Assume this is already a landmark class.
                    landmarkClasses.add(landmark)

        if trigPoints is None:
            trigPointClasses = DEFAULT_TRIG_CLASSES
        else:
            trigPointClasses = set()
            for trigPoint in trigPoints:
                if isinstance(trigPoint, string_types):
                    cls = findTrigPoint(trigPoint)
                    if cls:
                        trigPointClasses.add(cls)
                    else:
                        raise ValueError(
                            'Could not find trig point finder class %r.' %
                            trigPoint)
                else:
                    # Assume this is already a trig point class.
                    trigPointClasses.add(trigPoint)

        # The finders instantiated here are not used in this file. They are
        # used by the backend. We make sorted lists of them so we're guaranteed
        # to always process them in the same order (in printing, in checksums,
        # etc).
        self.landmarkFinders = sorted(cls(self) for cls in landmarkClasses)
        self.trigPointFinders = sorted(cls(self) for cls in trigPointClasses)
Exemplo n.º 4
0
    def extractPairedFeatures(self):
        """
        Find all paired features in the template.

        @raise ValueError: If a line contains an unknown landmark or trig
            point name or doesn't have exactly one comma (separating the two
            names).
        @return: A generator that yields 4-tuples containing
            1. A C{str} landmark feature name
            2. A C{light.features.Landmark} instance
            3. A C{str} trig point feature name (this may actually be a
                landmark, but we typically call it a trig point).
            4. A C{light.features.TrigPoint} instance (may actuall be a
                C{light.features.Landmark} instance)
            as they are found in the template lines.
        """
        for line in self.noPipes[1:]:
            if line.find(',') == -1:
                # This line contains non-paired features (it has just one
                # feature name - implied by the lack of a comma). Ignore it
                # as it will be processed in extractNonPairFeatures.
                continue

            if line.count(',') != 1:
                raise ValueError('%s template line %r contains multiple '
                                 'commas' % (self.TYPE, line))

            landmarkName, secondFeatureName = map(
                str.strip, line[:self.indentLength].split(','))
            featureStr = line[self.indentLength:]

            landmark = findLandmark(landmarkName)

            # The first feature of a matched pair must be a landmark.
            if landmark is None:
                raise ValueError('Unknown landmark name %r found in %s '
                                 'template line %r' %
                                 (landmarkName, self.TYPE, line))

            # The second feature of a matched pair can be a trig point or
            # another landmark.
            landmark2 = findLandmark(secondFeatureName)

            if landmark2 is None:
                trigPoint = findTrigPoint(secondFeatureName)

                if trigPoint is None:
                    raise ValueError('Unknown feature name %r found in %s '
                                     'template' % (secondFeatureName,
                                                   self.TYPE))

            for count, match in enumerate(
                    _NONWHITE_REGEXP.finditer(featureStr)):
                if count > 1:
                    raise ValueError(
                        'More than two features found in matched region pair '
                        'line in %s template. Line was %r' % (self.TYPE, line))
                first = count == 0
                offset = match.start()
                length = match.end() - offset
                sequence = featureStr[offset:offset + length]
                if sequence != self.read.sequence[offset:offset + length]:
                    if first:
                        raise ValueError(
                            '%s feature sequence %r found in %s template '
                            '(offset %d, length %d) does not match the full '
                            'sequence for the %s at those offsets' %
                            (landmarkName, sequence, self.TYPE, offset, length,
                             self.TYPE))
                    else:
                        raise ValueError(
                            '%s feature sequence %r found in %s template '
                            '(offset %d, length %d) does not match the full '
                            'sequence for the %s at those offsets' %
                            (secondFeatureName, sequence, self.TYPE, offset,
                             length, self.TYPE))

                if first:
                    landmarkFeature = Landmark(landmark.NAME, landmark.SYMBOL,
                                               offset, length)
                else:
                    if landmark2:
                        secondFeature = Landmark(landmark2.NAME,
                                                 landmark2.SYMBOL, offset,
                                                 length)
                    else:
                        secondFeature = TrigPoint(trigPoint.NAME,
                                                  trigPoint.SYMBOL, offset)

                    yield (landmarkName, landmarkFeature,
                           secondFeatureName, secondFeature)
Exemplo n.º 5
0
 def testFindDevTrigPoint(self):
     """
     The find function should be able to find development trig point
     classes.
     """
     self.assertIs(RandomTrigPoint, findTrigPoint('RandomTrigPoint'))
Exemplo n.º 6
0
 def testFindTrigPointFails(self):
     """
     The find function should return C{None} if asked to find a trig
     point class that doesn't exist.
     """
     self.assertIs(None, findTrigPoint('silly'))