def fetch_thread(i): global outf global grad_q global b s3 = boto3.resource('s3') my_bucket = s3.Bucket('camus-pywren-489') test = get_test() num = 0 start_time = time.time() while time.time() - start_time < total_time: key = 'gradient_indiv_%d' % i begin = time.time() while time.time() - start_time < total_time and (not check_key(key)): if time.time() - begin > 10: print("Thread %d took too long" % i) b.wait() break pass obj = my_bucket.Object('gradient_indiv_%d' % i) try: grad = pickle.loads(obj.get()['Body'].read()) except: continue grad_q.put(grad) #model = get_model() #model = update_model(model, grad) #store_model(model) print("Thread %d waiting..." % i) b.wait() if num % 10 == 0: print('[ERROR]', num, time.time() - start_time, loglikelihood(test, model)) print("Thread %d moving..." % i) obj.delete() #if i == 0: # b.reset() num += 1 if time.time() - start_time > total_time: return
def dump_thread(q, f): global total_time start = time.time() print("DUMP THREAD STARTED") outf = open(fname[:-4] + ".csv2", "w") testdata = get_test() while time.time() - start < total_time or not q.empty(): if time.time() - start > total_time and q.empty(): break if not q.empty(): t, model = q.get() print("dumping") s = time.time() #pickle.dump(time_model, f) loss = loglikelihood(testdata, model) print("wrote: %f %f" % (t, loss)) outf.write("%f, %f\n" % (t, loss)) print("dump done took", time.time() - s) q.task_done() outf.close() print("DUMP THREAD STOPPED")
def get_local_test_data(): return get_test()
def error_thread(model, outf): global grad_q global log global fname global index s3 = boto3.resource('s3') my_bucket = s3.Bucket('camus-pywren-489') num = 0 print("Starting error thread") start_time = time.time() # Clear existing gradients test_data = get_test_data() saves = 0 if True: print(fname[:-4] + ".pkl") f = open(fname[:-4] + ".pkl", 'wb') time_model_lst = [] last_dump = -100 while time.time() - start_time < total_time: if not grad_q.empty(): sz = grad_q.qsize() print("Saw", sz) grads = [] for _ in range(sz): grad = grad_q.get() model = update_model(model, grad) store_model(model) grad_q.task_done() num += 1 #error = loglikelihood(test_data, model) curr_time = time.time() - start_time print("[ERROR_TASK]", curr_time, loglikelihood(test_data, model), "this many grads:", num, "Sec / Grad:", (time.time() - start_time) / num) outf.write("[ERROR_TASK] " + str(curr_time) + " this many grads: " + str(num) + " Sec / Grad: " + str((time.time() - start_time) / num)) if True and curr_time - last_dump > 1: print("dumping") pickle.dump((curr_time, model), f) print("dump done") saves += 1 last_dump = curr_time if time.time() - start_time > total_time: break print("Saves: ", saves, "Index:", index) if True: large_test = get_test() f.close() outf = open(fname[:-4] + ".csv", "w") with open(fname[:-4] + ".pkl", 'rb') as f: last = -1000 for i in range(saves): t, model = pickle.load(f) if t - last < 5: continue last = t error = loglikelihood(large_test, model) print("wrote: %f %f" % (t, error)) outf.write("%f, %f\n" % (t, error)) outf.close()