from cPickle import load import pandas as pd from gmm_with_weighting import gen_params #----------------------------------------------------------------------------- # Data IO base = '/Volumes/HDD/Users/tom/DataStorage/Comext/yearly/' gmm = pd.HDFStore(base + 'gmm_store.h5') gmm_results = pd.HDFStore(base + 'gmm_results.h5') with open(base + 'declarants_no_002_dict.pkl', 'r') as declarants: country_code = load(declarants) ctry = '002' df = gmm.select('by_ctry_' + ctry) sub = df[:1000] sub_p = sub.dropna().groupby(level='PRODUCT_NC') #----------------------------------------------------------------------------- # Python version res = {name: gen_params(group, x0=[2, 1]) for name, group in sub_p} # %timeit {name: gen_params(group, x0=[2, 1]) for name, group in sub_p} # 1 loops, best of 3: 1.98 s per loop #----------------------------------------------------------------------------- # Cythonized
declarants = sorted(country_code.keys()) #----------------------------------------------------------------------------- # Main loop. Optimize to get params for each good, for each declarant. ctry = '001' try: df = gmm.select('ctry_' + ctry) df = df.dropna() df = df[~(df == np.inf)] by_product = df.groupby(level='good') except KeyError, AssertionError: with open('gmm_logging.txt', 'a') as f: f.write('Failed to open or group ctry: {}'.format(ctry)) pass #--------------------------------------------------------------------- # GMM Estimation. # Without Weighting # res = {name: gen_params(group, [2, 1], name, country=ctry, W=None) # for name, group in by_product} #--------------------------------------------------------------------- g = by_product.groups.iteritems() l1 = df.ix[g.next()[1]] l2 = df.ix[g.next()[1]] l3 = df.ix[g.next()[1]] l4 = df.ix[g.next()[1]] l5 = df.ix[g.next()[1]] test = pd.concat([l1, l2, l3, l4, l5]) gr = test.groupby(level='good') res = {name: gen_params(group, [2, 1], name, country=ctry, W=None, options={'disp': True}) for name, group in gr}