Exemplo n.º 1
0
    def build_msm(self, lag_time=None):
        """Build an MSM from the loaded trajectories."""
        if lag_time is None:
            lag_time = self.good_lag_time
        else:
            self.good_lag_time = lag_time

        # Do assignment
        trajs = get_data.get_shimtraj_from_trajlist(self.traj_list)
        metric = classic.Euclidean2d()

        # Allocate array
        n_trajs = len(self.traj_list)
        max_traj_len = max([t.shape[0] for t in self.traj_list])
        assignments = -1 * np.ones((n_trajs, max_traj_len), dtype='int')

        # Prepare generators
        pgens = metric.prepare_trajectory(self.clusterer.get_generators_as_traj())

        for i, traj in enumerate(trajs):
            ptraj = metric.prepare_trajectory(traj)

            for j in xrange(len(traj)):
                d = metric.one_to_all(ptraj, pgens, j)
                assignments[i, j] = np.argmin(d)

        counts = msml.get_count_matrix_from_assignments(assignments, n_states=None, lag_time=lag_time)
        rev_counts, t_matrix, populations, mapping = msml.build_msm(counts)
        return t_matrix
Exemplo n.º 2
0
    def test_1(self):

        C = MSMLib.get_count_matrix_from_assignments(self.assignments, 2)
        rc, t, p, m = MSMLib.build_msm(C, symmetrize="MLE", ergodic_trimming=True)

        eq(rc.todense(), np.matrix([[6.46159184, 4.61535527], [4.61535527, 2.30769762]]), decimal=4)
        eq(t.todense(), np.matrix([[0.58333689, 0.41666311], [0.66666474, 0.33333526]]), decimal=4)
        eq(p, np.array([0.61538595, 0.38461405]), decimal=5)
        eq(m, np.array([0, 1]))
Exemplo n.º 3
0
 def test_4(self):
     c, rc, t, p, m = MSMLib.build_msm(self.assignments, lag_time=2, symmetrize=None, sliding_window=True)
     npt.assert_array_equal(c.todense(), np.matrix('7 4; 3 2'))
     npt.assert_array_almost_equal(rc.todense(), np.matrix('7 4; 3 2'))
     npt.assert_array_almost_equal(t.todense(), 
         np.matrix([[ 0.63636364,  0.36363636],
             [  0.6,  0.4]]))
     assert p is None
     npt.assert_array_equal(m, [0,1])
Exemplo n.º 4
0
 def test_3(self):
     c, rc, t, p, m = MSMLib.build_msm(self.assignments, self.lag_time, symmetrize='Transpose')
     npt.assert_array_equal(c.todense(), np.matrix('7 5; 4 2'))
     npt.assert_array_almost_equal(rc.todense(),
         np.matrix([[ 7,  4.5],
                    [ 4.5,  2]]))
     npt.assert_array_almost_equal(t.todense(), 
         np.matrix([[ 0.60869565,  0.39130435],
             [ 0.69230769,  0.30769231]]))
     npt.assert_array_almost_equal(p, [ 0.63888889,  0.36111111])
     npt.assert_array_equal(m, [0,1])
Exemplo n.º 5
0
 def test_2(self):
     c, rc, t, p, m = MSMLib.build_msm(self.assignments, self.lag_time, symmetrize=None)
     npt.assert_array_equal(c.todense(), np.matrix('7 5; 4 2'))
     npt.assert_array_almost_equal(rc.todense(),
         np.matrix([[ 7,  5],
                    [ 4,  2]]))
     npt.assert_array_almost_equal(t.todense(), 
         np.matrix([[ 0.58333333,  0.41666667],
             [ 0.66666667,  0.33333333]]))
     assert p is None
     npt.assert_array_equal(m, [0,1])
Exemplo n.º 6
0
 def test_1(self):
     c, rc, t, p, m = MSMLib.build_msm(self.assignments, self.lag_time, symmetrize='MLE')
     npt.assert_array_equal(c.todense(), np.matrix('7 5; 4 2'))
     npt.assert_array_almost_equal(rc.todense(),
         np.matrix([[ 6.46159184,  4.61535527],
                    [ 4.61535527,  2.30769762]]))
     npt.assert_array_almost_equal(t.todense(), 
         np.matrix([[ 0.58333689,  0.41666311],
             [ 0.66666474,  0.33333526]]))
     npt.assert_array_almost_equal(p, [ 0.61538595,  0.38461405])
     npt.assert_array_equal(m, [0,1])
def parallel_get_matrix(input):
    print "working"
    (Ttest, multinom, NumStates)=input
    newT=scipy.sparse.lil_matrix((int(NumStates),int(NumStates)),dtype='float32')
    for i in range(0, Ttest.shape[1]):
        transitions = numpy.row_stack((numpy.array([i]*NumStates),numpy.arange(0, NumStates)))
        pvals=numpy.array([x/sum(Ttest[i]) for x in Ttest[i]])
        counts=numpy.random.multinomial(int(multinom), pvals, size=1)
        newT=newT+scipy.sparse.coo_matrix((counts[0], transitions),shape=(NumStates,NumStates))
    rev_counts, t_matrix, Populations, Mapping = MSMLib.build_msm(newT, symmetrize='MLE', ergodic_trimming=True)
    return rev_counts, t_matrix, Populations, Mapping
Exemplo n.º 8
0
def msm(traj_list, n_clusters, n_medoid_iters=10, lag_time=1, distance_cutoff=None):
    """Use classic clustering methods."""

    print "Building a classic MSM"
    hkm = cluster(traj_list, n_clusters, n_medoid_iters, distance_cutoff)
    # centroids = hkm.get_generators_as_traj()
    # centroids_nf = centroids['XYZList'][:, 0, 0:dim]

    counts = msml.get_count_matrix_from_assignments(hkm.get_assignments(), n_clusters, lag_time)
    rev_counts, t_matrix, populations, mapping = msml.build_msm(counts)

    return t_matrix
Exemplo n.º 9
0
 def test_2_point_1(self):
     "This doesn't work"
     # same as test_2, just with get_populations=True
     c, rc, t, p, m = MSMLib.build_msm(self.assignments, self.lag_time, symmetrize=None,
         get_populations=True)
     npt.assert_array_equal(c.todense(), np.matrix('7 5; 4 2'))
     npt.assert_array_almost_equal(rc.todense(),
         np.matrix([[ 7,  5],
                    [ 4,  2]]))
     npt.assert_array_almost_equal(t.todense(), 
         np.matrix([[ 0.58333333,  0.41666667],
             [ 0.66666667,  0.33333333]]))
     npt.assert_array_almost_equal(p, [ 0.61538595,  0.38461405])
     npt.assert_array_equal(m, [0,1])
Exemplo n.º 10
0
def run(lagtime, assignments, symmetrize='MLE', input_mapping="None", trim=True, out_dir="./Data/"):

    # set the filenames for output
    FnTProb = os.path.join(out_dir, "tProb.mtx")
    FnTCounts = os.path.join(out_dir, "tCounts.mtx")
    FnMap = os.path.join(out_dir, "Mapping.dat")
    FnAss = os.path.join(out_dir, "Assignments.Fixed.h5")
    FnPops = os.path.join(out_dir, "Populations.dat")

    # make sure none are taken
    outputlist = [FnTProb, FnTCounts, FnMap, FnAss, FnPops]
    arglib.die_if_path_exists(outputlist)

    # Check for valid lag time
    assert lagtime > 0, 'Please specify a positive lag time.'

    # if given, apply mapping to assignments
    if input_mapping != "None":
        MSMLib.apply_mapping_to_assignments(assignments, input_mapping)

    n_assigns_before_trim = len(np.where(assignments.flatten() != -1)[0])

    counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lagtime, sliding_window=True)

    rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(counts, symmetrize=symmetrize, ergodic_trimming=trim)

    if trim:
        MSMLib.apply_mapping_to_assignments(assignments, mapping)
        n_assigns_after_trim = len(np.where(assignments.flatten() != -1)[0])
        # if had input mapping, then update it
        if input_mapping != "None":
            mapping = mapping[input_mapping]
        # Print a statement showing how much data was discarded in trimming
        percent = (1.0 - float(n_assigns_after_trim) / float(n_assigns_before_trim)) * 100.0
        logger.warning("Ergodic trimming discarded: %f percent of your data", percent)
    else:
        logger.warning("No ergodic trimming applied")

    # Save all output
    np.savetxt(FnPops, populations)
    np.savetxt(FnMap, mapping, "%d")
    scipy.io.mmwrite(str(FnTProb), t_matrix)
    scipy.io.mmwrite(str(FnTCounts), rev_counts)
    io.saveh(FnAss, assignments)

    for output in outputlist:
        logger.info("Wrote: %s", output)

    return
Exemplo n.º 11
0
def run(LagTime, assignments, Symmetrize='MLE', input_mapping="None", Prior=0.0, OutDir="./Data/"):

    # set the filenames for output
    FnTProb = os.path.join(OutDir, "tProb.mtx")
    FnTCounts = os.path.join(OutDir, "tCounts.mtx")
    FnMap = os.path.join(OutDir, "Mapping.dat")
    FnAss = os.path.join(OutDir, "Assignments.Fixed.h5")
    FnPops = os.path.join(OutDir, "Populations.dat")
    
    # make sure none are taken
    outputlist = [FnTProb, FnTCounts, FnMap, FnAss, FnPops]
    arglib.die_if_path_exists(outputlist)

    # if given, apply mapping to assignments
    if input_mapping != "None":
        MSMLib.apply_mapping_to_assignments(assignments, input_mapping)

    n_states = np.max(assignments.flatten()) + 1
    n_assigns_before_trim = len( np.where( assignments.flatten() != -1 )[0] )
    
    rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(assignments,
        lag_time=LagTime, symmetrize=Symmetrize,
        sliding_window=True, trim=True)

    MSMLib.apply_mapping_to_assignments(assignments, mapping)
    n_assigns_after_trim = len( np.where( assignments.flatten() != -1 )[0] )

    # if had input mapping, then update it
    if input_mapping != "None":
        mapping = mapping[input_mapping]
    
    # Print a statement showing how much data was discarded in trimming
    percent = (1.0 - float(n_assigns_after_trim) / float(n_assigns_before_trim)) * 100.0
    logger.warning("Ergodic trimming discarded: %f percent of your data", percent)
 
    # Save all output
    np.savetxt(FnPops, populations)
    np.savetxt(FnMap, mapping,"%d")
    scipy.io.mmwrite(str(FnTProb), t_matrix)
    scipy.io.mmwrite(str(FnTCounts), rev_counts)
    msmbuilder.io.saveh(FnAss, assignments)

    for output in outputlist:
        logger.info("Wrote: %s", output)

    return
Exemplo n.º 12
0
def build_classic_from_memberships(memberships, lag_time=1):
    """Build a classic msm by turning a membership array into a state list.

    This function uses msmbuilder code to calculate the count matrix. Use this
    for compairing quantized versions of the fuzzy count matrix building
    for consistency.
    """
    states = np.zeros(memberships.shape[0], dtype='int')
    n_states = memberships.shape[1]

    for i in xrange(memberships.shape[0]):
        memb = memberships[i]
        state = np.argmax(memb)
        states[i] = state

    counts = msm.get_counts_from_traj(states, n_states, lag_time)
    rev_counts, t_matrix, populations, mapping = msm.build_msm(counts)
    return rev_counts, t_matrix, populations, mapping
Exemplo n.º 13
0
def classic(trajs, n_clusters, n_medoid_iters, metric, dim=2, lag_time=1, show=False, desc=None):
    """Use classic clustering methods."""

    if desc is None:
        desc = "Classic, n_clusters=%d" % n_clusters

    hkm = clustering.HybridKMedoids(metric, trajs, k=n_clusters, local_num_iters=n_medoid_iters)
    centroids = hkm.get_generators_as_traj()

    centroids_nf = centroids['XYZList'][:, 0, 0:dim]
    plot_centroids(centroids_nf)
    if show: pp.show()

    counts = msml.get_count_matrix_from_assignments(hkm.get_assignments(), n_clusters, lag_time)
    rev_counts, t_matrix, populations, mapping = msml.build_msm(counts)
    analyze_msm(t_matrix, centroids_nf, desc, show=show)

    return t_matrix
Exemplo n.º 14
0
def get_eigenvalues( count_matrix ):


    bad_states = np.array(np.where( count_matrix.sum(axis=1) == 0 )[0]).flatten()

    i_ary = count_matrix.nonzero()[0]
    j_ary = count_matrix.nonzero()[1]

    i_ary = np.concatenate( (i_ary, bad_states) )
    j_ary = np.concatenate( (j_ary, bad_states) )
    new_data = np.concatenate( (count_matrix.data, np.ones(len(bad_states))) )

    print i_ary.shape, count_matrix.data.shape, new_data.shape, len(bad_states)

    count_matrix = scipy.sparse.csr_matrix( (new_data, (i_ary, j_ary)) )

    #count_matrix = count_matrix.tolil()
    #count_matrix[(bad_states, bad_states)] = 1
    #count_matrix = count_matrix.tocsr()

    print count_matrix.data.shape, count_matrix.nonzero()[0].shape
    #NZ = np.array(count_matrix.nonzero()).T

    #keep_ind = []
    #for i in xrange(len(NZ)):
    #    if NZ[i][0] in bad_states or NZ[i][1] in bad_states:
    #        pass
    #    else:
    #        keep_ind.append(i)
    #keep_ind = np.array(keep_ind)

    #N = NZ.max()+1

    #count_matrix = scipy.sparse.csr_matrix( (np.array(count_matrix.data)[keep_ind], NZ[keep_ind].T), shape=(N,N), copy=True )

    try:
        t_matrix = MSMLib.build_msm(count_matrix, symmetrize=args.symmetrize)[1]
    except:
        return None
    
    vals = msm_analysis.get_eigenvectors(t_matrix, args.num_vals, epsilon=1)[0]
    vals.sort()

    return vals[::-1]
Exemplo n.º 15
0
    def __init__(self):
        super(FourStateTmat, self).__init__()

        counts = [
            [100, 30, 1, 1],
            [30, 100, 1, 1],
            [3, 3, 100, 30],
            [3, 3, 30, 100]
        ]
        counts = np.array(counts)
        counts = scipy.sparse.csr_matrix(counts, dtype=np.int)
        rev_counts, tmat, populations, mapping = msml.build_msm(
            counts, symmetrize='MLE', ergodic_trimming=True)

        self.n_states = tmat.shape[0]
        self.tmat = tmat
        self.counts = counts
        self.rev_counts = rev_counts
        self.step_func = self.step_sparse
Exemplo n.º 16
0
def build_from_memberships(memberships, lag_time=1):
    """Build an MSM from a time array of membership vectors."""

    # Sliding window
    from_states = memberships[:-lag_time: 1]
    to_states = memberships[lag_time:: 1]

    assert len(from_states) == len(to_states)

    n_pairs = len(from_states)
    n_times = 2
    n_clusters = memberships.shape[1]

    pairs = np.zeros((n_pairs, n_times, n_clusters))
    pairs[:, 0, :] = from_states
    pairs[:, 1, :] = to_states

    counts = get_counts_from_pairs(pairs, n_clusters)
    rev_counts, t_matrix, populations, mapping = msm.build_msm(counts, ergodic_trimming=False)
    return rev_counts, t_matrix, populations, mapping
Exemplo n.º 17
0
def build_new(centroids, trajs, fuzziness, dist, soft=True, neigen=4, show=False, desc=None):
    """Build an MSM from points and centroids.

    First this function generates membership vectors.

    if soft is False, 'Quantize' the membership vectors to mirror the
    hard clustering case, else use the fuzzy nature of the clusters in
    building the MSM.
    """
    n_states = len(centroids)
    time_pairs = get_giant_state_list(centroids, trajs, fuzziness, dist, soft=soft)
    print("Got state list")
    counts_mat = buildmsm.get_counts_from_pairs(time_pairs, n_states)
    print("Got count matrix")
    rev_counts, t_matrix, populations, mapping = msml.build_msm(counts_mat)

    if desc is None:
        if soft:
            desc = 'New, Fuzzy'
        else:
            desc = 'New, not-so-fuzzy'
    analyze_msm(t_matrix, centroids, desc=desc, show=show, neigen=neigen)
Exemplo n.º 18
0
    def set_coordinate_as_committors(self, lag_time=1, symmetrize='transpose'):
        """
        Set the reaction coordinate to be the committors (pfolds).

        Employs the reactant, product states provided as the sources, sinks
        respectively for the committor calculation.

        Parameters
        ----------
        lag_time : int
            The MSM lag time to use (in units of frames) in the estimation
            of the MSM transition probability matrix from the `counts` matrix.

        symmetrize : str {'mle', 'transpose', 'none'}
            Which symmetrization method to employ in the estimation of the
            MSM transition probability matrix from the `counts` matrix.
        """

        t_matrix = MSMLib.build_msm(self.counts, symmetrize)
        self.reaction_coordinate_values = tpt.calculate_committors([self.reactant],
                                                                   [self.product],
                                                                   t_matrix)
        return
Exemplo n.º 19
0
    def __init__(self, link_prob_f, link_prob_b):
        super(EightStateTmat, self).__init__()

        n = self.n_states * 2

        # Do outer product
        connecty_mat = np.array([
            [1.0, link_prob_b],
            [link_prob_f, 1.0]
        ])
        double_counts = np.multiply.outer(connecty_mat, self.counts.todense())
        # Turn it into a 2d matrix
        double_counts = np.swapaxes(double_counts, 1, 2)
        double_counts = np.reshape(double_counts, (n, n))

        double_counts = scipy.sparse.csr_matrix(double_counts, dtype=np.int)
        rev_counts, tmat, populations, mapping = msml.build_msm(
            double_counts, symmetrize='MLE', ergodic_trimming=True)

        # Record that we now have twice as many states
        self.n_states = n
        self.tmat = tmat
        self.counts = double_counts
        self.rev_counts = rev_counts
Exemplo n.º 20
0
    ratemtx_fn = pjoin(args.outdir, "K.mtx")
    tcounts_fn = pjoin(args.outdir, "tCounts.mtx")
    unsym_fn = pjoin(args.outdir, "tCounts.UnSym.mtx")
    mapping_fn = pjoin(args.outdir, "Mapping.dat")
    fixed_fn = pjoin(args.outdir, "Assignments.Fixed.h5")
    pops_fn = pjoin(args.outdir, "Populations.dat")
    if not os.path.exists(args.outdir):
        os.mkdir(args.outdir)
    outlist = [ratemtx_fn, tcounts_fn, unsym_fn, fixed_fn, pops_fn]
    for e in outlist:
        arglib.die_if_path_exists(e)

    # if lag time is not one, there's going to be a unit mispatch between
    # what you get and what you're expecting.
    lag_time = 1
    counts, rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(
        assignments, lag_time=lag_time, symmetrize=args.symmetrize
    )
    K = MSMLib.estimate_rate_matrix(rev_counts, assignments)

    np.savetxt(pops_fn, populations)
    np.savetxt(mapping_fn, mapping, "%d")
    scipy.io.mmwrite(ratemtx_fn, K)
    scipy.io.mmwrite(tcounts_fn, rev_counts)
    scipy.io.mmwrite(unsym_fn, counts)
    Serializer.SaveData(fixed_fn, assignments)

    for e in outlist:
        logger.info("Saved %s" % e)
Exemplo n.º 21
0
def get_implied_timescales_helper(args):
    """Helper function to compute implied timescales with multiprocessing

    Does not work in interactive mode

    Parameters
    ----------
    assignments_fn : str
        Path to Assignments.h5 file on disk
    n_states : int
        Number of states
    lag_time : list
        List of lag times to calculate the timescales at
    n_implied_times : int, optional
        Number of implied timescales to calculate at each lag time
    sliding_window : bool, optional
        Use sliding window
    trimming : bool, optional
        Use ergodic trimming
    symmetrize : {'MLE', 'Transpose', None}
        Symmetrization method

    Returns
    -------
    lagTimes : ndarray
        vector of lag times
    impTimes : ndarray
        vector of implied timescales

    See Also
    --------
    MSMLib.build_msm
    get_eigenvectors
    """
    
    assignments_fn, lag_time, n_implied_times, sliding_window, trimming, symmetrize = args
    
    try:
        assignments = io.loadh(assignments_fn, 'arr_0')
    except KeyError:
        assignments = io.loadh(assignments_fn, 'Data')
    
    try:
        from msmbuilder import MSMLib
        
        counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lag_time, 
                                                          sliding_window=sliding_window)
        rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(counts, symmetrize, trimming)

    except ValueError as e:
        logger.critical(e)
        sys.exit(1)

    #TJL: set Epsilon high, should not raise err here
    n_eigenvectors = n_implied_times + 1
    e_values = get_eigenvectors(t_matrix, n_eigenvectors, epsilon=1)[0]

    # make sure to leave off equilibrium distribution
    lag_times = lag_time * np.ones((n_implied_times))
    imp_times = -lag_times / np.log(e_values[1: n_eigenvectors])

    # save intermediate result in case of failure
    # res = np.zeros((n_implied_times, 2))
    # res[:,0] = lag_times
    # res[:,1] = np.real(imp_times)

    return (lag_times, imp_times)
Exemplo n.º 22
0
def get_implied_timescales_helper(args):
    """Helper function to compute implied timescales with multiprocessing

    Does not work in interactive mode

    Parameters
    ----------
    assignments_fn : str
        Path to Assignments.h5 file on disk
    n_states : int
        Number of states
    lag_time : list
        List of lag times to calculate the timescales at
    n_implied_times : int, optional
        Number of implied timescales to calculate at each lag time
    sliding_window : bool, optional
        Use sliding window
    trimming : bool, optional
        Use ergodic trimming
    symmetrize : {'MLE', 'Transpose', None}
        Symmetrization method

    Returns
    -------
    lagTimes : ndarray
        vector of lag times
    impTimes : ndarray
        vector of implied timescales

    See Also
    --------
    MSMLib.build_msm
    get_eigenvectors
    """
    assignments_fn, lag_time, n_implied_times, sliding_window, trimming, symmetrize = args
    logger.info("Calculating implied timescales at lagtime %d" % lag_time)

    try:
        assignments = io.loadh(assignments_fn, "arr_0")
    except KeyError:
        assignments = io.loadh(assignments_fn, "Data")

    try:
        from msmbuilder import MSMLib

        counts = MSMLib.get_count_matrix_from_assignments(assignments, lag_time=lag_time, sliding_window=sliding_window)
        rev_counts, t_matrix, populations, mapping = MSMLib.build_msm(counts, symmetrize, trimming)

    except ValueError as e:
        logger.critical(e)
        sys.exit(1)

    n_eigenvectors = n_implied_times + 1
    if symmetrize in ["MLE", "Transpose"]:
        e_values = get_reversible_eigenvectors(t_matrix, n_eigenvectors, populations=populations)[0]
    else:
        e_values = get_eigenvectors(t_matrix, n_eigenvectors, epsilon=1)[0]

    # Correct for possible change in n_eigenvectors from trimming
    n_eigenvectors = len(e_values)
    n_implied_times = n_eigenvectors - 1

    # make sure to leave off equilibrium distribution
    lag_times = lag_time * np.ones((n_implied_times))
    imp_times = -lag_times / np.log(e_values[1:n_eigenvectors])

    return (lag_times, imp_times)
Exemplo n.º 23
0
def run(lag_time, assignments_list, symmetrize='MLE', input_mapping="None", 
        out_dir="./Data/"):

    # set the filenames for output
    tProb_fn = os.path.join(out_dir, "tProb.mtx")
    tCounts_fn = os.path.join(out_dir, "tCounts.mtx")
    map_fn = os.path.join(out_dir, "Mapping.dat")
    pops_fn = os.path.join(out_dir, "Populations.dat")
    if len(assignments_list) == 1:
        assignments_fn_list = [os.path.join(out_dir, "Assignments.Fixed.h5")]
    else:
        assignments_fn_list = [os.path.join(out_dir, 
                                            "Assignments.Fixed.%d.h5" % i)
                               for i in xrange(len(assignments_list))]


    # make sure none are taken
    output_list = [tProb_fn, tCounts_fn, map_fn, pops_fn] + assignments_fn_list
    arglib.die_if_path_exists(output_list)

    # if given, apply mapping to assignments
    for i in xrange(len(assignments_list)):
        if input_mapping != "None":
            MSMLib.apply_mapping_to_assignments(assignments_list[i], 
                                                input_mapping)

    n_assigns_before_trim = get_num_assignments(assignments_list)

    #num_states = np.unique(np.concatenate([ np.unique(ass[np.where(ass != -1)]) 
    #                                       for ass in assignments_list])).shape[0]

    num_states = np.max([np.max(ass) for ass in assignments_list]) + 1
    counts = MSMLib.get_count_matrix_from_assignments(assignments_list[0], 
                                                      n_states=None,
                                                      lag_time=lag_time, 
                                                      sliding_window=False)

    for i in xrange(1, len(assignments_list)):
        print i
        counts = counts + \
                 MSMLib.get_count_matrix_from_assignments(assignments_list[i],
                                                          n_states=num_states,
                                                          lag_time=lag_time,
                                                          sliding_window=False)

    rev_counts, t_matrix, populations, mapping = \
        MSMLib.build_msm(counts, symmetrize=symmetrize, ergodic_trimming=True)

    for i in xrange(len(assignments_list)):
        MSMLib.apply_mapping_to_assignments(assignments_list[i], mapping)

    n_assigns_after_trim = get_num_assignments(assignments_list)

    # if had input mapping, then update it
    if input_mapping != "None":
        mapping = mapping[input_mapping]

    # Print a statement showing how much data was discarded in trimming
    percent = (1.0 - float(n_assigns_after_trim) / 
                     float(n_assigns_before_trim)) * 100.0
    logger.warning("Ergodic trimming discarded: "
                   "%f percent of your data", percent)

    # Save all output
    scipy.io.mmwrite(tProb_fn, t_matrix)
    scipy.io.mmwrite(tCounts_fn, rev_counts)
    np.savetxt(map_fn, mapping, "%d")
    np.savetxt(pops_fn, populations)
    for i in xrange(len(assignments_fn_list)):
        assignments_fn = assignments_fn_list[i]
        assignments = assignments_list[i]
        msmbuilder.io.saveh(assignments_fn, assignments)

    for output in output_list:
        logger.info("Wrote: %s", output)

    return