def fun1(): i = 0 while i < float('inf'): if i == 0 or i % 100 == 0: ip_get.run() i += 1 else: query.run() i += 1
def main(): if (len(sys.argv) not in [2, 3]) or (sys.argv[1] not in ['load', 'query', 'clear']): print 'Error: invalid usage\nUsage: python main.py load|query' sys.exit(1) client = connect() if sys.argv[1] == 'load': if len(sys.argv) == 3 and sys.argv[2] == "loopdata": loader.load_loopdata(client) else: loader.load(client) if sys.argv[1] == 'query': query.run(client) if sys.argv[1] == 'clear': loader.clear(client)
# since we don't currently have any way of passing args into mappers args = parser.parse_args() PAST_CHANGE = args.past_change * 10**-4 if args.future_change: FUTURE_CHANGE = args.future_change * 10 ** -4 else: FUTURE_CHANGE = PAST_CHANGE OFFSET_TICKS = args.time_offset * 10 START_HOUR = args.start_hour END_HOUR = args.end_hour assert START_HOUR < END_HOUR print "past change %", PAST_CHANGE print "future change %", FUTURE_CHANGE print "time offset in ticks", OFFSET_TICKS all_ccys = query.run(args.pattern, map_hdf = mapper, init = {}, combine = combine) # Once all the counts have been collected, do the simple task of # computing probs by dividing event counts by the totals result = {} for (ccy, (event_count, total)) in all_ccys.items(): if total == 0: prob = 0 else: prob = event_count / float(total) result[ccy] = {'prob':prob, 'count':event_count, 'total':total} print result
def combine(all_amts, (ccy, amts)): all_amts.setdefault(ccy, []).extend(amts) from argparse import ArgumentParser parser = ArgumentParser(description='Process some integers.') parser.add_argument('pattern', metavar='P', type=str, help='s3://capk-bucket/some-hdf-pattern') parser.add_argument( '--min-duration', dest='min_dur', type=int, default=None, help= 'ignore crosses which last shorter than this min. duration in milliseconds' ) if __name__ == '__main__': args = parser.parse_args() assert args.pattern assert len(args.pattern) > 0 MIN_DUR = args.min_dur df = query.run(args.pattern, map_hdf=cross_amounts, init={}, combine=combine, post_process=query_helpers.summarize_continuous) print df
import query r = query.run() r.full_query()
from optparse import OptionParser import query import cross_info import query_helpers def cross_durations(hdf): ccy = hdf.attrs['ccy'] durs = [cross.dur for cross in cross_info.find_crossed_markets_in_hdf(hdf)] return ccy, durs def combine(all_durs, (ccy,durs)): all_durs.setsdefault(ccy, []).extend(durs) parser = OptionParser(usage = "usage: %prog s3://bucket-name/key-pattern") if __name__ == '__main__': (options, args) = parser.parse_args() assert len(args)==1 df = query.run(args[0], map_hdf = cross_durations, init = {}, combine = combine, post_process = query_helpers.summarize_continuous) print df
def combine(all_amts, (ccy, amts)): all_amts.setdefault(ccy, []).extend(amts) from argparse import ArgumentParser parser = ArgumentParser(description="Process some integers.") parser.add_argument("pattern", metavar="P", type=str, help="s3://capk-bucket/some-hdf-pattern") parser.add_argument( "--min-duration", dest="min_dur", type=int, default=None, help="ignore crosses which last shorter than this min. duration in milliseconds", ) parser.add_argument("--future", dest="future", type=int, default=10000) if __name__ == "__main__": args = parser.parse_args() print "Args", args assert args.pattern assert len(args.pattern) > 0 MIN_DUR = args.min_dur FUTURE_HORIZON = args.future df = query.run( args.pattern, map_hdf=does_cross_return, init={}, combine=combine, post_process=query_helpers.summarize_bool ) print df
import query import numpy as np def map(hdf): """Takes an HDF, returns a tuple of min/max midprices""" # Note: I slice into the HDF's column to pull out a numpy array midprice = (hdf['bid'][:]+hdf['offer'][:])/2.0 return np.min(midprice), np.max(midprice) def combine(values, (curr_min, curr_max)): values.append(curr_min) values.append(curr_max) from argparse import ArgumentParser parser = ArgumentParser(description='min/max for ccy pair') parser.add_argument('--ccy', dest='ccy', type=str, required=True, help="e.g. USDJPY") parser.add_argument('--bucket', dest='bucket', type=str, default='capk-fxcm-hdf') if __name__ == '__main__': args = parser.parse_args() print "args", args pattern = '*' + args.ccy + '*.hdf' values = query.run(args.bucket, pattern, map_hdf = map, combine = combine, init = []) print "Min: %s, Max: %s" % (np.min(values), np.max(values))
def count_crosses(hdf): ccy = hdf.attrs['ccy'] date = (hdf.attrs['year'], hdf.attrs['month'], hdf.attrs['day']) count = np.sum(hdf['bid'][:] > hdf['offer'][:]) print "Found", count, "crossed tick for", ccy, "on", date return ccy, count def combine(counts, (ccy, count)): old = counts.get(ccy, 0) counts[ccy] = old + count def convert_to_dataframe(total_counts): return pandas.DataFrame({"count": total_counts.values()}, index=total_counts.keys()) parser = OptionParser(usage="usage: %prog s3://bucket-name/key-pattern") if __name__ == '__main__': (options, args) = parser.parse_args() assert len(args) == 1 df = query.run(args[0], map_hdf=count_crosses, init={}, combine=combine, post_process=convert_to_dataframe) print df
MIN_DUR = None def cross_amounts(hdf): ccy = hdf.attrs['ccy'] amts = [cross.amt * cross.min_vol \ for cross in cross_info.find_crossed_markets_in_hdf(hdf) if MIN_DUR is None or cross.dur >= MIN_DUR] return ccy, amts def combine(all_amts, (ccy,amts)): all_amts.setdefault(ccy, []).extend(amts) from argparse import ArgumentParser parser = ArgumentParser(description='Process some integers.') parser.add_argument('pattern', metavar='P', type=str, help='s3://capk-bucket/some-hdf-pattern') parser.add_argument('--min-duration', dest='min_dur', type=int, default=None, help = 'ignore crosses which last shorter than this min. duration in milliseconds') if __name__ == '__main__': args = parser.parse_args() assert args.pattern assert len(args.pattern) > 0 MIN_DUR = args.min_dur df = query.run(args.pattern, map_hdf = cross_amounts, init = {}, combine = combine, post_process = query_helpers.summarize_continuous) print df
from optparse import OptionParser import numpy as np import query import pandas def count_crosses(hdf): ccy = hdf.attrs["ccy"] date = (hdf.attrs["year"], hdf.attrs["month"], hdf.attrs["day"]) count = np.sum(hdf["bid"][:] > hdf["offer"][:]) print "Found", count, "crossed tick for", ccy, "on", date return ccy, count def combine(counts, (ccy, count)): old = counts.get(ccy, 0) counts[ccy] = old + count def convert_to_dataframe(total_counts): return pandas.DataFrame({"count": total_counts.values()}, index=total_counts.keys()) parser = OptionParser(usage="usage: %prog s3://bucket-name/key-pattern") if __name__ == "__main__": (options, args) = parser.parse_args() assert len(args) == 1 df = query.run(args[0], map_hdf=count_crosses, init={}, combine=combine, post_process=convert_to_dataframe) print df
# either the cross was shorter than 50ms or both prices changed # at the end returns = np.sum(future_bid_ok & future_offer_ok & future_vol_ok) print cross, "returns", returns, "times in", FUTURE_HORIZON / 1000.0, "seconds" result.append(returns > 0) return ccy, result def combine(all_amts, (ccy,amts)): all_amts.setdefault(ccy, []).extend(amts) from argparse import ArgumentParser parser = ArgumentParser(description='Process some integers.') parser.add_argument('pattern', metavar='P', type=str, help='s3://capk-bucket/some-hdf-pattern') parser.add_argument('--min-duration', dest='min_dur', type=int, default=None, help = 'ignore crosses which last shorter than this min. duration in milliseconds') parser.add_argument('--future', dest = 'future', type=int, default=10000) if __name__ == '__main__': args = parser.parse_args() print "Args", args assert args.pattern assert len(args.pattern) > 0 MIN_DUR = args.min_dur FUTURE_HORIZON = args.future df = query.run(args.pattern, map_hdf = does_cross_return, init = {}, combine = combine, post_process = query_helpers.summarize_bool) print df
midprice = (hdf['bid'][:] + hdf['offer'][:]) / 2.0 return np.min(midprice), np.max(midprice) def combine(values, (curr_min, curr_max)): values.append(curr_min) values.append(curr_max) from argparse import ArgumentParser parser = ArgumentParser(description='min/max for ccy pair') parser.add_argument('--ccy', dest='ccy', type=str, required=True, help="e.g. USDJPY") parser.add_argument('--bucket', dest='bucket', type=str, default='capk-fxcm-hdf') if __name__ == '__main__': args = parser.parse_args() print "args", args pattern = '*' + args.ccy + '*.hdf' values = query.run(args.bucket, pattern, map_hdf=map, combine=combine, init=[]) print "Min: %s, Max: %s" % (np.min(values), np.max(values))
args = parser.parse_args() PAST_CHANGE = args.past_change * 10**-4 if args.future_change: FUTURE_CHANGE = args.future_change * 10**-4 else: FUTURE_CHANGE = PAST_CHANGE OFFSET_TICKS = args.time_offset * 10 START_HOUR = args.start_hour END_HOUR = args.end_hour assert START_HOUR < END_HOUR print "past change %", PAST_CHANGE print "future change %", FUTURE_CHANGE print "time offset in ticks", OFFSET_TICKS all_ccys = query.run(args.pattern, map_hdf=mapper, init={}, combine=combine) # Once all the counts have been collected, do the simple task of # computing probs by dividing event counts by the totals result = {} for (ccy, (event_count, total)) in all_ccys.items(): if total == 0: prob = 0 else: prob = event_count / float(total) result[ccy] = {'prob': prob, 'count': event_count, 'total': total} print result