def fast_adapt(batch, learner, loss, adaptation_steps, i, mode): batch['train'][0] = batch['train'][0].squeeze() batch['test'][0] = batch['test'][0].squeeze() batch['train'][1] = batch['train'][1].squeeze() batch['test'][1] = batch['test'][1].squeeze() inference_array = [] target_array = [] #data, labels = batch #data, labels = data.to(device), labels.to(device) # Separate data into adaptation/evalutation sets #adaptation_indices = np.zeros(data.size(0), dtype=bool) #adaptation_indices[np.arange(shots*ways) * 2] = True #evaluation_indices = torch.from_numpy(~adaptation_indices) #adaptation_indices = torch.from_numpy(adaptation_indices) #adaptation_data, adaptation_labels = data[adaptation_indices], labels[adaptation_indices] #evaluation_data, evaluation_labels = data[evaluation_indices], labels[evaluation_indices] adaptation_data, adaptation_labels = batch['train'] evaluation_data, evaluation_labels = batch['test'] # Adapt the model for step in range(adaptation_steps): train_error = loss(learner(adaptation_data), adaptation_labels) learner.adapt(train_error) # Evaluate the adapted model predictions = learner(evaluation_data) valid_error = loss(predictions, evaluation_labels) #valid_accuracy = accuracy(predictions, evaluation_labels) if mode == 'test' and i % 30 == 1: plt.rcParams["figure.figsize"] = (14, 5) #plt.plot(peaks, np.array(inference_array)[peaks], 'xr'); plt.plot(normalize(predictions.cpu().detach().numpy()[0])) #plt.plot(peaks2, np.array(target_array)[peaks2], 'xb'); plt.plot(normalize(evaluation_labels.cpu().detach().numpy()[0])) plt.show() #print(len(peaks), len(peaks2)) return valid_error
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', iterations = 10, epochmult = 4): filename = 'runMLP' outputFile = '{}/{}.p'.format(outputFolder,filename) data = funcs.loadData(datafile) #Filter away bottom 75% data = funcs.filterByPercRank(data, 75) print 'iterations: {}\nMultiplier Samplesize Epochs: {}\n output file: {}'.format(iterations,epochmult,outputFile) #Get first 10 values and try to decide whether people will keep on playing past 20 games samples = np.fromiter(((funcs.normalize(np.array(k[:10])),0 if len(k) < 20 else 1) for k in data if len(k) >= 10), dtype=[('input', float, 10), ('output', float, 1)]) print 'Learning from {} samples...'.format(samples.size) network = MLP(10,10,10,1) def processResults(network,results): stepf = lambda x: 0 if x < .5 else 1 test_data = [(t[0], t[1], stepf(t[2])) for t in results] percHits = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 1]) # Percentage right hits falseAlarm = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 0]) # Percentage false positives dPrime = funcs.dprime(percHits, falseAlarm) out = (percHits, falseAlarm, dPrime, network.weights) return out #print 'Hit % = {}, but false alarm % = {}, d\' = {}'.format(percHits,falseAlarm, dPrime) out = network.learnLoop(samples, iterations = iterations, epochs = epochmult * samples.size, processResults = processResults) #40 million epochs for full dataset.. Too many? pickle.dump(out,open(outputFile, 'wb')) #print out #results = network.test(samples) dprimes = pickle.load(open(outputFile,'rb')) #set nan to 0 dprimes = [[0 if np.isnan(i) or np.isinf(i) else i for i in k[2]] for k in dprimes] print print 'Results:' print 'Mean d\' score for each quit opportunity: {}'.format([np.mean([k[i] for k in dprimes]) for i in xrange(1)]) print 'Std : {}'.format([np.std([k[i] for k in dprimes]) for i in xrange(1)]) print 'Max : {}'.format([np.max([k[i] for k in dprimes]) for i in xrange(1)]) print print
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', iterations = 10, epochmult = 4): print 'Local vs Global quit analysis with MLP' timethresh = 2 timesize = 10 filename = 'lgquit' outputFile = '{}/{}.p'.format(outputFolder,filename) print 'Quit locations: {}\ntime threshold: {} hours\niterations: {}\nMultiplier Samplesize Epochs: {}\noutput file: {}'.format(timesize-1,timethresh,iterations,epochmult,outputFile) fh=open(datafile) data=json.load(fh) #Init values #Properly sort data filter games data = [[data[k][l] for l in collections.OrderedDict(sorted(data[k].items()))] for k in data] inputdata = [[l[0] for l in k] for k in data if len(k) == timesize] #Calculate time differences timediff_data = [[((k[l][1] - k[l-1][1]) * 24 + (k[l][2] - k[l-1][2])) for l in range(1,len(k))] for k in data if len(k) == timesize] #Threshold it timediff_padthresh = [[1 if len(k) <= l or k[l] >= timethresh else 0 for l in range(timesize-1)] for k in timediff_data] #Set up samples samples = np.fromiter(((funcs.normalize(inputdata[i]),timediff_padthresh[i]) for i in range(len(inputdata))), dtype=[('input', float, timesize), ('output', float, timesize-1)]) print 'Learning from {} samples...'.format(samples.size) network = MLP(timesize,10,10,10,timesize-1) #Calculate d' on a per element basis def processResults(network,results): stepf = lambda x: [0 if i < .5 else 1 for i in x] test_data = [(t[0], t[1], stepf(t[2])) for t in results] outnum = len(test_data[1][1]) percHits = [np.mean(k) for k in [[1 if t[2][i] == 1 else 0 for t in test_data if t[1][i] == 1] for i in range(outnum)]] # Percentage right hits per element falseAlarm = [np.mean(k) for k in [[1 if t[2][i] == 1 else 0 for t in test_data if t[1][i] == 0] for i in range(outnum)]] # Percentage false positives per element dPrime = funcs.dprime(percHits, falseAlarm) out = (percHits,falseAlarm, dPrime, network.weights) #print 'Hit % = {}, but false alarm % = {}, d\' = {}'.format(percHits,falseAlarm, dPrime) return out out = network.learnLoop(samples, iterations = iterations, epochs = epochmult * samples.size, processResults = processResults) #40 million epochs for full dataset.. Too many? #results = network.learn(samples, epochs = samples.size * 400) pickle.dump(out,open(outputFile, 'wb')) #print out #results = network.test(samples) dprimes = pickle.load(open(outputFile,'rb')) #set nan to 0 #Set NaN to 0 (tends to be Inf - Inf, so 0 makes sense). -Inf to 0 as well, #usually is due to small but negligible difference in hit rate and false alarm rate, #that cause false alarm rate to become exactly 0 and hit rate to be slightly above it. dprimes = [[0 if np.isnan(i) or np.isinf(i) else i for i in k[2]] for k in dprimes] print print 'Results:' print 'Mean d\' score for each quit opportunity: {}'.format([np.mean([k[i] for k in dprimes]) for i in xrange(timesize-1)]) print 'Std : {}'.format([np.std([k[i] for k in dprimes]) for i in xrange(timesize-1)]) print 'Max : {}'.format([np.max([k[i] for k in dprimes]) for i in xrange(timesize-1)]) print print
def main(datafile = '../data_by_cookie_slim.json', outputFolder = '.', iterations = 10, epochmult = 4): filename = 'runPerceptronReal' outputFile = '{}/{}.p'.format(outputFolder,filename) #plt.close('all') #Load data print 'Perceptron quit after 20 from 10 games with Perceptron' data = funcs.loadData(datafile) #Generate artificial training data #mu = np.mean([k[:10] for k in data if len(k) >= 10],axis=0) #signalf = lambda x: 0 if x[2] < 18000 else 1 #training_data = funcs.generateSamples(500000, signalf) #Set up perceptron s = 10 # games eta = 0.2 # learning rate dPrimes = [0]*iterations # endweights = [] out = [] training_data = [(np.array(k[:s]),0 if len(k) < 2*s else 1) for k in data if len(k) >= s] n = len(training_data) * epochmult print 'iterations: {}\nMultiplier Samplesize Epochs: {}\noutput file: {}\n'.format(iterations,epochmult,outputFile) print 'Overall plays over 20 plays: {}'.format(np.mean([t[1] for t in training_data])) print 'Learning from {} samples...'.format(len(training_data)) for i in xrange(iterations): #print 'Preparing training data' w = 2 * np.random.rand(s) - 1 stepf = lambda x: 0 if x < 0 else 1 #print 'Training perceptron - n = {} and s = {}'.format(n,s) for j in xrange(n): x, expected = choice(training_data) result = np.dot(w,x) error = expected - stepf(result) w += eta * error * funcs.normalize(x) #print 'Training completed' #print 'Testing performance' #test_data consists of rows (x, expected, result) or (x, signal, response) #print w test_data = [(t[0], t[1], stepf(np.dot(w,t[0]))) for t in training_data] percHits = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 1]) # Percentage right hits falseAlarm = np.mean([1 if t[2] == 1 else 0 for t in test_data if t[1] == 0]) # Percentage false positives dPrime = funcs.dprime(percHits, falseAlarm) #if percHits> .65: endweights.append(funcs.normalize(w)) dPrimes[i] = dPrime #print 'Hit % = {}, but false alarm % = {}, d\' = {}'.format(percHits,falseAlarm, dPrime) out.append((percHits, falseAlarm, dPrime, w)) #print w # print the weights pickle.dump(out,open(outputFile, 'wb')) #print out #results = network.test(samples) dprimes = pickle.load(open(outputFile,'rb')) #set nan to 0 print print 'Results:' print 'Mean d\' score for each quit opportunity: {}'.format([np.mean([k[i] for k in dprimes]) for i in xrange(1)]) print 'Std : {}'.format([np.std([k[i] for k in dprimes]) for i in xrange(1)]) print 'Max : {}'.format([np.max([k[i] for k in dprimes]) for i in xrange(1)]) print print
loss = criterion(outputs, target) else: loss_0 = criterion(outputs[:][0], target[:][0]) loss_1 = criterion(outputs[:][1], target[:][1]) loss_2 = criterion(outputs[:][2], target[:][2]) loss = loss_0 + loss_2 + loss_1 if ~torch.isfinite(loss): continue running_loss += loss.item() tepoch.set_postfix(loss=running_loss / (params["train_batch_size"] / params["test_batch_size"])) if model_params["name"] in ["PhysNet", "PhysNet_LSTM"]: inference_array.extend( normalize(outputs.cpu().numpy()[0])) target_array.extend( normalize(target.cpu().numpy()[0])) else: inference_array.extend(outputs[:][0].cpu().numpy()) target_array.extend(target[:][0].cpu().numpy()) if tepoch.n == 0 and __TIME__: save_time = time.time() # postprocessing if model_params["name"] in ["DeepPhys"]: inference_array = detrend(np.cumsum(inference_array), 100) target_array = detrend(np.cumsum(target_array), 100) if __TIME__ and epoch == 0: log_info_time(