def extract_plugs_from_house(h_id, datadir, outdir): def process_plug_stream(hh_id, plug_stream): outfile = '%s/%d/%d/%d_%d_%d.csv' % (outdir, h_id, hh_id, plug_stream.key, hh_id, h_id) f = open(outfile, 'w') def on_next(r): f.write('%d,%d,%f,%d,%d,%d,%d\n'%\ (r.id,r.ts,r.value,r.property,r.plug_id,r.hh_id,r.h_id)) def on_completed(): f.close() plug_stream.subscribe(on_next=on_next,\ on_error=None,on_completed=on_completed) def process_hh_stream(hh_stream): if not os.path.exists('%s/%d/%d' % (outdir, h_id, hh_stream.key)): os.makedirs('%s/%d/%d' % (outdir, h_id, hh_stream.key)) hh_stream.\ group_by(lambda r: r.plug_id).\ subscribe(lambda plug_stream: process_plug_stream(hh_stream.key,plug_stream)) lines('%s/%d.csv'%(datadir,h_id)).\ group_by(lambda r: r.hh_id).\ subscribe(process_hh_stream)
def plot_hh_load_vs_time_per_house(house_id,out_dir,data_dir): print('Processing house_id:%d'%house_id) with open('%s/%d.csv'%(out_dir,house_id),'w') as f: last_ts=-1 hh_load_map={} def on_next(r): nonlocal last_ts,hh_load_map if (last_ts==-1): last_ts=r.ts if(last_ts==r.ts): hh_load_map[r.hh_id]= hh_load_map.get(r.hh_id,0.0)+r.value else: print_results() for key in hh_load_map.keys(): hh_load_map[key]=0.0 last_ts=r.ts hh_load_map[r.hh_id]=r.value def on_completed(): print_results() f.close() def print_results(): nonlocal last_ts,hh_load_map max_hh_id=max(hh_load_map.keys()) output_str='%d,'%last_ts for i in range(max_hh_id+1): output_str+='%f,'%hh_load_map.get(i,0.0) f.write(output_str.rstrip(',')+'\n') parse.lines('%s/%d.csv'%(data_dir,house_id)).\ subscribe(on_next=on_next,on_error=None,on_completed=on_completed)
def process_plugs(datadir, time_step): houses = os.listdir(datadir) for house in houses: households = os.listdir('%s/%s' % (datadir, house)) for hh in households: plugs = os.listdir('%s/%s/%s' % (datadir, house, hh)) for plug in plugs: print('Processing house:%s hh:%s plug:%s' % (house, hh, plug)) plug_file = '%s/%s/%s/%s' % (datadir, house, hh, plug) out_file = '%s/%s/%s/processed_%s' % (datadir, house, hh, plug) f = open(out_file, 'w') last_ts = -1 written_ts = -1 sum = 0.0 count = 0 def on_next(r): nonlocal last_ts, written_ts, sum, count if (last_ts == -1): last_ts = r.ts if (r.ts % time_step == 0) else (r.ts - (r.ts % time_step)) if ((r.ts - last_ts) < time_step): sum += r.value count += 1 else: avg = (sum / count) if (count > 0) else 0.0 f.write('%d,%f\n' % (last_ts, avg)) written_ts = last_ts last_ts = r.ts if (r.ts % time_step == 0) else (r.ts - (r.ts % time_step)) sum = r.value count = 1 def on_completed(): if (last_ts > written_ts): avg = (sum / count) if (count > 0) else 0.0 f.write('%d,%f\n' % (last_ts, avg)) f.close() lines(plug_file).subscribe(on_next=on_next,\ on_error=None,on_completed=on_completed)
def extract_houses(datafile, outdir): def write_stream(h_stream): print("Extracting house stream for hosue:%s\n" % h_stream.key) outfile = '%s/%d.csv' % (outdir, h_stream.key) f = open(outfile, 'w') def on_next(r): f.write('%d,%d,%f,%d,%d,%d,%d\n'%\ (r.id,r.ts,r.value,r.property,r.plug_id,r.hh_id,r.h_id)) def on_completed(): f.close() h_stream.subscribe(on_next=on_next,\ on_error=None,on_completed=on_completed) lines(datafile).\ filter(lambda r: r.property==1).\ group_by(lambda r: r.h_id).\ subscribe(write_stream)
def plot_plugs_vs_time_per_house(house_id,out_dir,data_dir): print('Processing house_id:%d'%house_id) with open('%s/%d.csv'%(out_dir,house_id),'w') as f: last_ts=-1 plug_ids=set() def on_next(r): nonlocal last_ts,plug_ids if (last_ts==-1): last_ts=r.ts if(last_ts==r.ts): plug_ids.add('%d_%d_%d'%(r.plug_id,r.hh_id,r.h_id)) else: f.write('%d,%d\n'%(last_ts,len(plug_ids))) last_ts=r.ts plug_ids.clear() def on_completed(): f.write('%d,%d\n'%(last_ts,len(plug_ids))) f.close() parse.lines('%s/%d.csv'%(data_dir,house_id)).\ filter(lambda r: r.value > 0).\ subscribe(on_next=on_next,on_error=None,on_completed=on_completed)
def timesteps(datafile, outfile, step_size): with open(outfile, 'w') as f: last_ts = -1 def on_next(r): nonlocal last_ts if (last_ts == -1): last_ts = r.ts if (r.ts % step_size == 0) else (r.ts - (r.ts % step_size)) f.write('%d\n' % last_ts) if ((r.ts - last_ts) >= step_size): last_ts = r.ts if (r.ts % step_size == 0) else (r.ts - (r.ts % step_size)) f.write('%d\n' % last_ts) def on_completed(): f.close() lines(datafile).\ filter(lambda r: r.property==1).\ subscribe(on_next=on_next,on_error=None,on_completed=on_completed)