import sys from simplex import * from data import Data_From_Raw from pandas import read_csv path = './datasets/ivt_nov_mar.csv' # cols = [int(x) for x in sys.argv[1:]] col_idx = [int(x) for x in sys.argv[1:]] for x in sys.argv[1:]: print(x) if True: emp_path = './output/fmix_3d/empirical_{}_{}_{}.csv'.format(*col_idx) res_path = './output/fmix_3d/results_{}_{}_{}.db'.format(*col_idx) out_path = './output/fmix_3d/postpred_{}_{}_{}.csv'.format(*col_idx) raw = read_csv(path).iloc[:,col_idx] data = Data_From_Raw(raw, True) data.write_empirical(emp_path.format(*col_idx)) fmix = FMIX_Chain(data, 10, GammaPrior(0.1,0.1), DirichletPrior(1.)) fmix.sample(50000) fmix.write_to_disk(res_path, 25000, 5) res = FMIX_Result(res_path) res.write_posterior_predictive(out_path) # EOF
from dp_projgamma import DPMPG, ResultDPMPG from data import Data_From_Raw from pandas import read_csv from random import shuffle path = './datasets/ivt_nov_mar.csv' if __name__ == '__main__': raw = read_csv(path) # cols = raw.columns.values.tolist() # shuffle(cols) # raw = raw.reindex(columns = cols) data = Data_From_Raw(raw, True) data.write_empirical('./output/dpmpg_empirical_decluster.csv') dpmpg = DPMPG(data) dpmpg.initialize_sampler(10000) dpmpg.sample(10000) dpmpg.write_to_disk('./output/dpmpg_results_decluster.db', 5000, 1) res = ResultDPMPG('./output/dpmpg_results_decluster.db') res.write_posterior_predictive('./output/dpmpg_postpred_decluster.csv') # EOF
from simplex import * from data import Data_From_Raw from pandas import read_csv path = './datasets/ivt_nov_mar.csv' if __name__ == '__main__': raw = read_csv(path) data = Data_From_Raw(raw, True) data.write_empirical('./output/dpmix/empirical.csv') model = DPSimplex_Chain(data, GammaPrior( 0.5, 0.5, ), GammaPrior( 2., 2., ), GammaPrior(2., 1e-1)) model.sample(20000) model.write_to_disk('./output/dpmix/results_2_1e-1.db', 10000, 2) res = DPSimplex_Result('./output/dpmix/results_2_1e-1.db') res.write_posterior_predictive('./output/dpmix/postpred_2_1e-1.csv') # EOF
def instantiate_data(self, path, decluster=True): """ path: raw data path """ raw = pd.read_csv(path) self.data = Data_From_Raw(raw, decluster) return
from data import Data_From_Raw from pandas import read_csv from mpi4py import MPI comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() pt.MPI_MESSAGE_SIZE = 2**20 if rank > 0: chain = pt.PTSlave(comm=comm, statmodel=DPMPG_Chain) chain.watch() if rank == 0: raw = read_csv('./datasets/ivt_nov_mar.csv') raw2 = np.hstack((raw.T[-2:].T, raw.T[:-2].T)) data = Data_From_Raw(raw2, True) data.write_empirical('./output/dppgln2/empirical.csv') model = pt.PTMaster(comm, temperature_ladder=1.05**np.array(range(size - 1)), data=data, prior_eta=GammaPrior(2., .1)) model.sample(10000) model.write_to_disk('./output/dppgln2/results_2_1e-1.db', 5000, 1) model.complete() # res = DPMPG_Result('./output/dppgln/results_2_1e-1.db') # not working yet. # res.write_posterior_predictive('./output/dppgln/postpred_2_1e-1.csv') ## relevant to non-MPI version # if __name__ == '__main__':
) except: data = MixedData( raw, eval(p.cats), decluster=eval(p.decluster), quantile=float(p.quantile), ) else: if eval(p.sphere): data = Data_From_Sphere(raw) else: try: data = Data_From_Raw( raw, decluster=eval(p.decluster), quantile=float(p.quantile), ) except: data = Data_From_Raw( raw, decluster=eval(p.decluster), quantile=float(p.quantile), ) ## If there's a supplied outcome, initialize it if os.path.exists(p.outcome): outcome = read_csv(p.outcome).values data.fill_outcome(outcome) ## Initialize Chain
def load_raw(self, path): raw = pd.read_csv(path) self.data = Data_From_Raw(raw, True) return
size = comm.Get_size() pt.MPI_MESSAGE_SIZE = 2**24 args = argparser() Chain = models.Chains[args.model] Result = models.Results[args.model] if rank > 0: chain = pt.PTSlave(comm=comm, statmodel=Chain) chain.watch() if rank == 0: raw = read_csv(args.in_path) data = Data_From_Raw(raw, decluster=eval(args.decluster), quantile=float(args.quantile)) if args.model.startswith('dp'): emp_path = os.path.join( args.out_folder, args.model, 'empirical.csv', ) out_path = os.path.join( args.out_folder, args.model, 'results_{}_{}.db'.format(args.eta_shape, args.eta_rate), ) pp_path = os.path.join( args.out_folder,
from gendirichlet import * from data import Data_From_Raw from pandas import read_csv path = './datasets/ivt_nov_mar.csv' if __name__ == '__main__': raw = read_csv(path) data = Data_From_Raw(raw, True) model = MGD_Chain( data, 10, DirichletPrior(0.5), GammaPrior(0.5, 0.5), GammaPrior(2.0, 2.0), GammaPrior(0.5, 0.5), GammaPrior(2.0, 2.0), ) model.sample(20000) model.write_to_disk('./output/mgd/results_test.db', 10000, 2) res = MGD_Result('./output/mgd/results_test.db') res.write_posterior_predictive('./output/mgd/postpred_test.csv') # model = DPPG_Chain( # data, # GammaPrior(2., 0.5), # ) # model.sample(20000) # model.write_to_disk('./output/dpmpg/results_test.db', 10000, 2) # res = DPPG_Result('./output/dpmpg/results_test.db') # res.write_posterior_predictive('./output/dpmpg/postpred_test.csv')
sigmas[np.where(sigmas.T[0] == i)[0], 1:] for i in range(self.nSamp) ] self.samples.mu = mus self.samples.Sigma = Sigmas.reshape(self.nSamp, self.nCol, self.nCol) self.samples.r = rs return def __init__(self, path): self.load_data(path) return if __name__ == '__main__': from data import Data_From_Raw from projgamma import GammaPrior from pandas import read_csv import os import time raw = read_csv('./datasets/ivt_nov_mar.csv') data = Data_From_Raw(raw, decluster=True, quantile=0.95) data.write_empirical('./test/empirical.csv') model = Chain(data, prior_eta=GammaPrior(2, 1), p=10) model.sample(10000) model.write_to_disk('./test/results.pkl', 5000, 5) res = Result('./test/results.pkl') res.write_posterior_predictive('./test/postpred.csv') # EOF
def instantiate_data(self, path, quantile=0.95, decluster=True): """ path: raw data path """ raw = pd.read_csv(path) self.data = Data_From_Raw(raw, decluster=decluster, quantile=quantile) self.postpred = self.generate_posterior_predictive_hypercube(10) return
self.samples.alpha = alphas self.samples.beta = betas self.samples.zeta = [ zetas[np.where(zetas.T[0] == i)[0], 1:] for i in range(self.nSamp) ] self.samples.r = rs return def __init__(self, path): self.load_data(path) return if __name__ == '__main__': pass from data import Data_From_Raw from projgamma import GammaPrior from pandas import read_csv import os raw = read_csv('./datasets/ivt_nov_mar.csv') data = Data_From_Raw(raw, decluster=True, quantile=0.95) model = Chain(data, prior_eta=GammaPrior(2, 1), p=10) model.sample(5000) model.write_to_disk('./test/results.pickle', 3000, 2) res = Result('./test/results.pickle') # res.write_posterior_predictive('./test/postpred.csv') # EOF