def test_1(): fn = get_fn("frame0.h5") with enter_temp_directory(): assert os.system("mixtape DRIDFeaturizer --trjs {} --out a.pkl".format(fn)) == 0 assert os.system("mixtape DihedralFeaturizer --types phi psi --trjs {} --out b.pkl".format(fn)) == 0 assert os.system("mixtape tICA --inp a.pkl --out ticamodel.pkl --transformed tics.pkl") == 0 assert ( os.system( "mixtape KMeans --random_state 0 --n_init 1 --inp b.pkl --out kmeans.pkl --transformed labels.pkl" ) == 0 ) kmeans0 = verboseload("labels.pkl") kmeans1 = KMeans(random_state=0, n_init=1).fit_predict(verboseload("b.pkl")) tica0 = verboseload("tics.pkl") tica1 = tICA().fit_transform(verboseload("a.pkl")) eq(kmeans0[0], kmeans1[0]) eq(tica0[0], tica1[0])
def start(self): print(self.instance) dataset = verboseload(self.inp) if not isinstance(dataset, list): self.error('--inp must contain a list of arrays. "%s" has type %s' % (self.inp, type(dataset))) print('fitting...') self.instance.fit(dataset) verbosedump(self.instance, self.out) print('All done')
def start(self): print(self.instance) if self.out is '' and self.transform is '': self.error('One of --out or --model should be specified') dataset = verboseload(self.inp) if not isinstance(dataset, list): self.error('--inp must contain a list of arrays. "%s" has type %s' % (self.inp, type(dataset))) print('fit() on %d sequences of shape %s...' % ( len(dataset), ', '.join([str(dataset[e].shape) for e in range(min(3, len(dataset)))]))) self.instance.fit(dataset) if self.transformed is not '': transformed = self.instance.transform(dataset) verbosedump(transformed, self.transformed) if self.out is not '': verbosedump(self.instance, self.out) print('All done')
def load_quadwell(data_home=None, random_state=None): """Loader for quad-well dataset Parameters ---------- data_home : optional, default: None Specify another cache folder for the datasets. By default all mixtape data is stored in '~/mixtape_data' subfolders. random_state : {int, None}, default: None Seed the psuedorandom number generator to generate trajectories. If seed is None, the global numpy PRNG is used. If random_state is an int, the simulations will be cached in ``data_home``, or loaded from ``data_home`` if simulations with that seed have been performed already. With random_state=None, new simulations will be performed and the trajectories will not be cached. Notes ----- """ # V = 4*(x**8 + 0.8*np.exp(-80*x**2) + 0.2*(-80*(x-0.5)**2) + 0.5*np.exp(-40*(x+0.5)**2)) random = check_random_state(random_state) data_home = join(get_data_home(data_home=data_home), 'quadwell') if not exists(data_home): makedirs(data_home) if random_state is None: trajectories = _simulate_quadwell(random) else: if not isinstance(random_state, numbers.Integral): raise TypeError('random_state must be an int') path = join(data_home, 'version-0_random-state-%d.pkl' % random_state) if exists(path): trajectories = verboseload(path) else: trajectories = _simulate_quadwell(random) verbosedump(trajectories, path) return Bunch(trajectories=trajectories, DESCR=QUADWELL_DESCRIPTION)
def load_doublewell(data_home=None, random_state=None): """Loader for double-well dataset Parameters ---------- data_home : optional, default: None Specify another cache folder for the datasets. By default all mixtape data is stored in '~/mixtape_data' subfolders. random_state : {int, None}, default: None Seed the psuedorandom number generator to generate trajectories. If seed is None, the global numpy PRNG is used. If random_state is an int, the simulations will be cached in ``data_home``, or loaded from ``data_home`` if simulations with that seed have been performed already. With random_state=None, new simulations will be performed and the trajectories will not be cached. Notes ----- """ random = check_random_state(random_state) data_home = join(get_data_home(data_home=data_home), 'doublewell') if not exists(data_home): makedirs(data_home) if random_state is None: trajectories = _simulate_doublewell(random) else: assert isinstance(random_state, numbers.Integral), 'random_state but be an int' path = join(data_home, 'version-1_random-state-%d.pkl' % random_state) if exists(path): trajectories = verboseload(path) else: trajectories = _simulate_doublewell(random) verbosedump(trajectories, path) return Bunch(trajectories=trajectories, DESCR=DOUBLEWELL_DESCRIPTION)