示例#1
0
文件: dnal.py 项目: kaspermunch/sap
def _get_coords(filename):
    alb = file(filename)

    start_line = None
    end_line = None

    for line in alb:
        if line.startswith("["):
            if not start_line:
                start_line = line # rstrip not needed
            else:
                end_line = line

    if end_line is None: # sequence is too short
        return [(0, 0), (0, 0)]

    return list(zip(*map(_alb_line2coords, [start_line, end_line]))) # returns [(start0, end0), (start1, end1)]
示例#2
0
def _get_coords(filename):
    alb = file(filename)

    start_line = None
    end_line = None

    for line in alb:
        if line.startswith("["):
            if not start_line:
                start_line = line  # rstrip not needed
            else:
                end_line = line

    if end_line is None:  # sequence is too short
        return [(0, 0), (0, 0)]

    return list(zip(*map(_alb_line2coords, [start_line, end_line]))
                )  # returns [(start0, end0), (start1, end1)]
示例#3
0
 def __eq__(self, other):
     if len(self.data) != len(other.data):
         return 0
     ok = reduce(lambda x, y: x and y,
                 map(lambda x, y: x == y, self.data, other.data))
     return ok
示例#4
0
 def __eq__(self, other):
     if len(self.data) != len(other.data):
         return 0
     ok = reduce(lambda x, y: x and y, map(lambda x, y: x == y, self.data, other.data))
     return ok
示例#5
0
def train(training_set, results, feature_fns, update_fn=None,
          max_iis_iterations=10000, iis_converge=1.0e-5,
          max_newton_iterations=100, newton_converge=1.0e-10):
    """Train a maximum entropy classifier, returns MaxEntropy object.

    Train a maximum entropy classifier on a training set.
    training_set is a list of observations.  results is a list of the
    class assignments for each observation.  feature_fns is a list of
    the features.  These are callback functions that take an
    observation and class and return a 1 or 0.  update_fn is a
    callback function that is called at each training iteration.  It is
    passed a MaxEntropy object that encapsulates the current state of
    the training.

    The maximum number of iterations and the convergence criterion for IIS
    are given by max_iis_iterations and iis_converge, respectively, while
    max_newton_iterations and newton_converge are the maximum number
    of iterations and the convergence criterion for Newton's method.
    """
    if not training_set:
        raise ValueError("No data in the training set.")
    if len(training_set) != len(results):
        raise ValueError("training_set and results should be parallel lists.")

    # Rename variables for convenience.
    xs, ys = training_set, results

    # Get a list of all the classes that need to be trained.
    classes = sorted(set(results))

    # Cache values for all features.
    features = [_eval_feature_fn(fn, training_set, classes)
                for fn in feature_fns]
    # Cache values for f#.
    f_sharp = _calc_f_sharp(len(training_set), len(classes), features)

    # Pre-calculate the empirical expectations of the features.
    e_empirical = _calc_empirical_expects(xs, ys, classes, features)

    # Now train the alpha parameters to weigh each feature.
    alphas = [0.0] * len(features)
    iters = 0
    while iters < max_iis_iterations:
        nalphas = _train_iis(xs, classes, features, f_sharp,
                             alphas, e_empirical,
                             max_newton_iterations, newton_converge)
        diff = map(lambda x, y: numpy.fabs(x-y), alphas, nalphas)
        diff = reduce(lambda x, y: x+y, diff, 0)
        alphas = nalphas

        me = MaxEntropy()
        me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns
        if update_fn is not None:
            update_fn(me)

        if diff < iis_converge:   # converged
            break
    else:
        raise RuntimeError("IIS did not converge")

    return me