if j.is_dir(): yield (j, freq) def acc(args): (path, freq) = args (observation, prediction) = [ int(x) for x in path.parts[-2:] ] logger.getlogger().info('o: {0} p: {1}'.format(observation, prediction)) data = [ pd.read_pickle(str(x)) for x in path.glob('*.pkl') ] df = pd.concat(data, axis=1) df = df.resample(freq).sum().mean() return (observation, prediction, df) args = cli.CommandLine(cli.optsfile('characterisation-plot')).args top_level = Path(args.source) target = Path(args.target) target.mkdir(parents=True, exist_ok=True) freqs = args.freqs if args.freqs else [ 'D' ] # XXX defaults? names = [ 'observation', 'prediction' ] log = logger.getlogger(True) for fq in freqs: log.info('collect {0}'.format(fq)) with Pool(cpu_count() // 2, maxtasksperchild=1) as pool: f = pool.imap_unordered d = { tuple(i): j.values for (*i, j) in f(acc, mkargs(top_level, fq)) }
# items = zip(range(oneday), [ [] ] * oneday) totals = OrderedDict() for i in range(oneday): totals[i] = [0] for i in df.index: key = cp.bucket(i) totals[key].append(df.ix[i]) vals = [agg(x) for x in totals.values()] vals.append(nid) # this is important return vals cargs = cli.CommandLine(cli.optsfile('chgpt')) args = cargs.args oneday = round(constant.day / constant.minute) window = nd.Window(args.window_obs, args.window_pred, args.window_trgt) if args.resume: with open(args.resume, mode='rb') as fp: observations = pickle.load(fp) (measurements, nodes) = data.cleanse(observations) else: db.genop(args.reporting) opts = [window, oneday, args.threshold, np.mean] with Pool() as pool: observations = pool.starmap(f, nd.nodegen(opts)) observations = list(filter(None, observations))
from lib import db from lib import cli from configparser import ConfigParser cargs = cli.CommandLine(cli.optsfile("prediction")) # /etc/opts/prediction args = cargs.args config = ConfigParser() config.read(args.config) # --config dbinfo = config["database"] if "database" in config else None db.EstablishCredentials(**dbinfo) db.genop(int(config["parameters"]["intra-reporting"]))
if all([x in row for x in tbl]): self.data.append(row) class Ireland(GetRemoteXML): def __init__(self, url, retries, timeout): super().__init__(url, retries, timeout, None, None) processors = { 'nyc': NYC, 'mass': Massachusetts, 'ie': Ireland, } cargs = cli.CommandLine(cli.optsfile('storage')) args = cargs.args handler = processors[args.source] try: data = handler(args.url, args.retries, args.timeout) data.parse(args.table, args.root) data.to_file(args.output) # data.check(args.output) except AttributeError as err: log = logger.getlogger() log.critical(err) except AssertionError: (*_, tb) = sys.exc_info() (*_, tb_info) = map(list, traceback.extract_tb(tb))
def f(*args): (_, node, cargs) = args log = Logger().log log.info('{0}: setup +'.format(node)) with DatabaseConnection() as conn: source = nd.Node(node, conn) neighbors = [ nd.Node(x, conn) for x in source.neighbors ] log.info('{0}: setup -'.format(node)) classes = [ WindowInfluence ] # [ MinuteInfluence, WindowInfluence ] return [ i(source, neighbors, cargs).run() for i in classes ] with Pool() as pool: cargs = cli.CommandLine(cli.optsfile('main')) results = pool.starmap(f, nd.nodegen(cargs.args)) with NamedTemporaryFile(mode='wb', delete=False) as fp: pickle.dump(results, fp) msg = 'pickle: {0}'.format(fp.name) Logger().log.error(msg) # with open('/tmp/tmpe2x8wi0d', mode='rb') as fp: # results = pickle.load(fp) header = [ 'type', 'source', 'target', 'pearson',
except ValueError: break if all([ x in row for x in tbl ]): self.data.append(row) class Ireland(GetRemoteXML): def __init__(self, url, retries, timeout): super().__init__(url, retries, timeout, None, None) processors = { 'nyc': NYC, 'mass': Massachusetts, 'ie': Ireland, } cargs = cli.CommandLine(cli.optsfile('storage')) args = cargs.args handler = processors[args.source] try: data = handler(args.url, args.retries, args.timeout) data.parse(args.table, args.root) data.to_file(args.output) # data.check(args.output) except AttributeError as err: log = logger.getlogger() log.critical(err) except AssertionError: (*_, tb) = sys.exc_info() (*_, tb_info) = map(list, traceback.extract_tb(tb))
mkplot(node, 'ols', res, cargs.output) except (LinAlgError, ValueError) as err: log.error('{0}: {1},{2}'.format(err, endog.shape, exog.shape)) def var_(*args): (_, nid, cargs) = args node = nd.Cluster(nid) log.info('var: {0}'.format(str(node))) endog = node.readings.dropna() if not endog.empty and cargs.lags: maxlags = max(cargs.lags) try: res = vm.VAR(endog=endog).fit(maxlags=maxlags) mkplot(node, 'var', res, cargs.output, maxlags) except (LinAlgError, ValueError) as err: log.error(err) # Fit = namedtuple('Fit', [ 'node', 'model', 'lags' ]) with Pool() as pool: cargs = cli.CommandLine(cli.optsfile('regression')) for i in [var_, ols_]: results = pool.starmap(i, nd.nodegen(cargs.args)) # fname = os.path.join(cargs.args.output, i.__name__, '.pkl') # with open(fname, mode='wb') as fp: # r = list(filter(None, results)) # pickle.dump(r, fp)
# items = zip(range(oneday), [ [] ] * oneday) totals = OrderedDict() for i in range(oneday): totals[i] = [ 0 ] for i in df.index: key = cp.bucket(i) totals[key].append(df.ix[i]) vals = [ agg(x) for x in totals.values() ] vals.append(nid) # this is important return vals cargs = cli.CommandLine(cli.optsfile('chgpt')) args = cargs.args oneday = round(constant.day / constant.minute) window = nd.Window(args.window_obs, args.window_pred, args.window_trgt) if args.resume: with open(args.resume, mode='rb') as fp: observations = pickle.load(fp) (measurements, nodes) = data.cleanse(observations) else: db.genop(args.reporting) opts = [ window, oneday, args.threshold, np.mean ] with Pool() as pool: observations = pool.starmap(f, nd.nodegen(opts)) observations = list(filter(None, observations))
N = k * len(groups) # between Group degrees of freedom msg = '{0:2d} {1:2d} '.format(i, len(groups)) fmt = 'F({2}, {3}) = {0:6.3f}, p = {1:.3f} {4} ' for j in (stats.f_oneway, stats.kruskal): (v, p) = j(*samples) msg += fmt.format(v, p, k - 1, N - k, issig(p)) print(msg) plotargs = { # keys must be valid --display options 'presentation': PlotArgs((120, 20), 36), 'paper': PlotArgs((7, 3), 10), } user = cli.CommandLine(cli.optsfile('prediction-plot')) if not user.args.gfilter: user.args.gfilter = [] raw = pd.DataFrame.from_csv(user.args.data, sep=';', index_col=None) assert(all([ x in raw.columns for x in user.args.gfilter])) raw = raw.loc[raw['confusion_matrix'] != np.nan] grouped = raw.groupby(user.args.gfilter + ['node'])[user.args.metric] df = grouped.agg([ np.mean, stats.sem ]).unstack(0) if user.args.gfilter: args = { 'kind': 'bar', 'yerr': df['sem'], 'ylim': (0, 1),
values = model.predict(model.classify()) except ValueError as v: log.error(v) return Results(keys, values) # # Setup # log = logger.getlogger(True) log.info('phase 1') log.info('db version: {0}'.format(db.mark())) cargs = cli.CommandLine(cli.optsfile('prediction')) # /etc/opts/prediction config = ConfigParser() config.read(cargs.args.config) # --config params = config['parameters'] writer = ResultsWriter(config['output'].getboolean('print-header')) # Establish the database credentials. Passing None uses the # defaults. dbinfo = config['database'] if 'database' in config else None db.EstablishCredentials(**dbinfo) # # Processing #
# import itertools import configparser from lib import db from lib import cli from lib import node from tempfile import NamedTemporaryFile # http://stackoverflow.com/a/5228294 def product(d): for i in itertools.product(*d.values()): yield dict(zip(d, i)) cargs = cli.CommandLine(cli.optsfile('config')) # /etc/opts/config args = cargs.args tmpargs = { 'mode': 'w', 'delete': False, 'dir': args.output, 'prefix': '', # the default (None) is actually 'tmp' 'suffix': '.ini', } # # Options that can be simultaneous during a single run # machines = [ # svm # bayes
try: res = sm.OLS(endog=endog, exog=exog, missing='drop').fit() mkplot(node, 'ols', res, cargs.output) except (LinAlgError, ValueError) as err: log.error('{0}: {1},{2}'.format(err, endog.shape, exog.shape)) def var_(*args): (_, nid, cargs) = args node = nd.Cluster(nid) log.info('var: {0}'.format(str(node))) endog = node.readings.dropna() if not endog.empty and cargs.lags: maxlags = max(cargs.lags) try: res = vm.VAR(endog=endog).fit(maxlags=maxlags) mkplot(node, 'var', res, cargs.output, maxlags) except (LinAlgError, ValueError) as err: log.error(err) # Fit = namedtuple('Fit', [ 'node', 'model', 'lags' ]) with Pool() as pool: cargs = cli.CommandLine(cli.optsfile('regression')) for i in [ var_, ols_ ]: results = pool.starmap(i, nd.nodegen(cargs.args)) # fname = os.path.join(cargs.args.output, i.__name__, '.pkl') # with open(fname, mode='wb') as fp: # r = list(filter(None, results)) # pickle.dump(r, fp)
import itertools import configparser from lib import db from lib import cli from lib import node from tempfile import NamedTemporaryFile # http://stackoverflow.com/a/5228294 def product(d): for i in itertools.product(*d.values()): yield dict(zip(d, i)) cargs = cli.CommandLine(cli.optsfile('config')) # /etc/opts/config args = cargs.args tmpargs = { 'mode': 'w', 'delete': False, 'dir': args.output, 'prefix': '', # the default (None) is actually 'tmp' 'suffix': '.ini', } # # Options that can be simultaneous during a single run # machines = [ # svm # bayes