def cross_validate(self, Z, r, not_missing, folds = 4, ncount = 11, penalty_min = 0., penalty_max = 1000., xv_chunk = 1, xv_chunks = 1, print_iter = False, max_epochs = None, reps = 1): T = Z.shape[0] uni_pen = np.linspace(penalty_min, penalty_max, ncount) pens = [(x, y) for x in uni_pen for y in uni_pen] pens = util.chunkify(pens, xv_chunks)[xv_chunk-1] xvN = len(pens) means = np.zeros((reps, folds, xvN)) variances = np.zeros((reps, folds, xvN)) iter_sharpe = np.zeros(means.size) iter_params = means.size * [0] samples = [[tt for tt in range(T) if int(folds * tt / T) == fold] for fold in range(folds)] iterii = 0 for jj in range(reps): for fold in range(folds): if folds > 1: fold_sample = np.concatenate([samp for kk, samp in enumerate(samples) if kk != fold]) xv_sample = samples[fold] else: fold_sample = list(range(T)) xv_sample = fold_sample for ii, penvals in enumerate(pens): prior_prec, mean_prec = penvals self.restart() self.set_prior_precision(prior_prec, mean_prec) self._fit(Z, r, not_missing, iter_indices = fold_sample, print_iter = print_iter) xv_fit = self._fit(Z, r, not_missing, learning_rate = 0., iter_indices = xv_sample, print_iter = print_iter, max_epochs = max_epochs) means[jj, fold, ii] = xv_fit['mean'] variances[jj, fold, ii] = xv_fit['variance'] iter_sharpe[iterii] = xv_fit['sharpe_ratio'] iter_params[iterii] = xv_fit['model_parameters'] if print_iter: print(('Cross Validation:', fold, ii, means[jj, fold, ii] / np.sqrt(variances[jj, fold, ii]))) iterii += 1 means = np.mean(np.mean(means, axis = 0), axis = 0) variances = np.mean(np.mean(variances, axis = 0), axis = 0) sharpes = means / np.sqrt(variances) best_prec, best_mean_prec = pens[np.argmax(sharpes)] xv_results = np.zeros((xvN, len(pens[0]) + 1)) xv_results[:, :-1] = np.array(pens) xv_results[:, -1] = sharpes self.set_prior_precision(best_prec, best_mean_prec) self.set_model_variables(iter_params[np.argmax(iter_sharpe)]) return xv_results
def get_messages(user_email=None, tag=None, process_id=None): imap = None msg_ids = [] try: imap = IMAPHelper() imap.oauth1_2lo_login(user_email=user_email) try: if tag: logging.info('Creating label [%s]', tag) imap.create_label(tag) msg_ids = imap.list_messages(only_from_trash=True) except Exception as e: logging.exception('Error creating label or retrieving messages for ' 'user [%s]', user_email) processed_user = ProcessedUser.get_by_id(email) if not processed_user: processed_user = ProcessedUser(id=user_email, ok_count=0, error_count=0, total_count=list(), error_description=list()) processed_user.error_description.append(e.message) processed_user.put() return [] except Exception as e: logging.exception('Authentication or connection problem for user ' '[%s]', user_email) processed_user = ProcessedUser.get_by_id(user_email) if not processed_user: processed_user = ProcessedUser(id=user_email, ok_count=0, error_count=0, total_count=list(), error_description=list()) processed_user.error_description.append(e.message) processed_user.put() return [] finally: if imap: imap.close() # Assuming IMAP connection was OK if len(msg_ids) > 0: counter.load_and_increment_counter('%s_total_count' % user_email, delta=len(msg_ids), namespace=str(process_id)) return chunkify(msg_ids, num_chunks=constants.USER_CONNECTION_LIMIT) else: counter.load_and_increment_counter('%s_total_count' % user_email, delta=0, namespace=str(process_id)) return []
def schedule_user_move(user_email=None, tag=None, move_process_key=None, domain_name=None): if domain_name: try: primary_domain = PrimaryDomain.get_or_create( domain_name=domain_name) if primary_domain.credentials: email_settings_helper = EmailSettingsHelper( credentials_json=primary_domain.credentials, domain=domain_name, refresh_token=primary_domain.refresh_token ) email_settings_helper.enable_imap(user_email) logging.info('IMAP enabled for [%s]', user_email) else: logging.warn('Error trying to enable IMAP for user [%s]', user_email) except: logging.exception('Domain [%s] is not authorized, IMAP not enabled', domain_name) for chunk_ids in get_messages(user_email=user_email, tag=tag, process_id=move_process_key.id()): if len(chunk_ids) > 0: new_chunk_ids = [] chunk_sizes = [] for chunk in chunkify(chunk_ids, chunk_size=constants.MESSAGE_BATCH_SIZE): new_chunk_ids.append([chunk[0], chunk[-1]]) chunk_sizes.append(len(chunk)) logging.info('Scheduling user [%s] messages move', user_email) deferred.defer(move_messages, user_email=user_email, tag=tag, chunk_ids=new_chunk_ids, process_id=move_process_key.id(), chunk_sizes=chunk_sizes)
def train_dA(train_x, learning_rate=0.1, training_epochs=500, batch_size=30, chunk=3,corruption_level=.3,rel_hidden=.6): # transform training data into what we want xs=train_x.get_value(borrow=True) real_train = [] for x in xs: real_train += chunkify(x,chunk) train_x = theano.shared(numpy.asarray(real_train, dtype=theano.config.floatX), borrow=True) n_train_batches = train_x.get_value(borrow=True).shape[0] / batch_size index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) image_size = train_x.get_value(borrow=True).shape[1] da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=image_size, n_hidden=int(rel_hidden*image_size), chunk=chunk) cost, updates = da.get_cost_updates(corruption_level=corruption_level, learning_rate=learning_rate) train_da = theano.function([index], cost, updates=updates, givens={x: train_x[index * batch_size:(index + 1) * batch_size]}) for epoch in xrange(training_epochs): c = [] for batch_index in xrange(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) image = PIL.Image.fromarray(tile_raster_images( X=da.W.get_value(borrow=True).T, img_shape=(chunk, chunk), tile_shape=(int((rel_hidden*image_size)**.5+1),int((rel_hidden*image_size)**.5+1)), tile_spacing=(1, 1))) image.save('filters_corruption_30.png') return da
def elevation(self, pts, elevation_refs, interpolation_refs, console): '''Gets elevation from reference data.''' def ElevationURL(url, num, outof, attempt): '''Get Elevation data from a url.''' if attempt == self.max_failed_queries: console.add('elevation', num, outof, False, 'Quitting this query. Data will be inaccurate') return None, False current_time = time.time() if current_time < self.next_query_time: #before earliest possible query time time.sleep(self.next_query_time - current_time) #wait until next query time self.next_query_time = current_time + 0.2 #set the next time a query can be fired to 0.2 seconds from now try: #a ton of things can go wrong! data = json.load(urlopen(url)) if data['status'] == 'OK': #nothing wrong with query console.add('elevation', num, outof) return data, True else: #if an error, pass it on console.add( 'elevation', num, outof, False, 'Problem with query or data: ' + data['status']) print 'Elevation Error (if)' return ElevationURL(url, num, outof, attempt + 1) except: #if an error, pass it on console.add('elevation', num, outof, False, 'Problem with connection') print 'Elevation Error' return ElevationURL(url, num, outof, attempt + 1) api_url = 'https://maps.googleapis.com/maps/api/elevation/json?locations=enc:' requests = [] chunks = list(util.chunkify(elevation_refs, 200)) for chunk in chunks: '''polyline = Polyline.encode('|'.join(map(lambda a: pts[a[0]].string, chunk)))''' polyline = Polyline.encode('|'.join( [pts[a[0]].string for a in chunk])) requests.append((polyline, chunk, api_url + polyline + '&key=AIzaSyCnHT7IpJu0O7n-apLNW8iKkW_rTIuANuE')) request_num = 1 for poly, refs_list, url in requests: data, success = ElevationURL(url, request_num, len(requests), 0) if not success: return None, None, False request_num += 1 if data: for res, refs in zip(data['results'], refs_list): for ref in refs: pts[ref].elevation = res['elevation'] for to_int, ref1, ref2 in interpolation_refs: pt = pts[to_int[0]] pt1 = pts[ref1[0]] pt2 = pts[ref2[0]] dist1 = pt.pt.distanceTo(pt1.pt) dist2 = pt.pt.distanceTo(pt2.pt) total = dist1 + dist2 slope = (pt2.elevation - pt1.elevation) / total elevation = pt1.elevation + dist1 * slope for pt in to_int: pts[pt].elevation = elevation return pts, requests, True
def fit_epoch(self, T, batch_T, learning_rate = None, print_iter = False, iter_indices = None): if self.active_session == False: self.initialize_session() if iter_indices is None: obs = T iter_indices = np.arange(obs) else: obs = len(iter_indices) np.random.shuffle(iter_indices) iter_batches = util.chunkify(iter_indices, max(len(iter_indices) // batch_T, 1)) itern = len(iter_batches) average_ll = 0 tan_exrets = [] average_mean = 0 average_vari = 0 average_exrets = 0 sse = 0. ssr = 0. for step, indices in enumerate(iter_batches): Tval = len(indices) sample_ratio = Tval / obs feed_dict = {self.T: Tval, self.sample_ratio: sample_ratio, self.use_indices: np.array(indices).astype(np.int32)} if learning_rate is not None: feed_dict.update({self.learning_rate: learning_rate}) else: feed_dict.update({self.learning_rate: self.current_learning_rate}) session_output = self.session.run([self.optimizer, self.loss, self.tangency_exrets, self.average_mean, self.average_exrets, self.sse, self.ssr, self.vari_mean], feed_dict = feed_dict) _, nll, rr, amn, aex, ssep, ssrp, vm = session_output ### print the log-likelihood stuff #self.print_ll(feed_dict) average_ll -= nll / obs tan_exrets += list(rr) average_mean += amn / itern average_exrets += aex / itern average_vari += vm / itern ssr += ssrp sse += ssep ### save data to output mean_rr = np.mean(tan_exrets) var_rr = np.var(tan_exrets) average_rr = mean_rr / np.sqrt(var_rr) desc_str = 'Cross Validation' self.epoch += 1 desc_str = 'Training' if print_iter: print('Epoch %d' % (self.epoch)) print('\t%s LL at step %d: %f' % (desc_str, self.epoch, average_ll)) print('\tR2: %f' % (1. - sse / ssr)) print('\t%s Sharpe at step %d: %f' % (desc_str, self.epoch, average_rr)) print('\t%s Average mu at step %d: %f' % (desc_str, self.epoch, average_mean)) print('\t%s Average variance at step %d: %f' % (desc_str, self.epoch, average_vari)) print('\t%s Average excess returns at step %d: %f' % (desc_str, self.epoch, average_exrets)) print('\t\t\t%s Learning Rate at step %d: %f' % (desc_str, self.epoch, self.current_learning_rate)) outdata = {'loglikelihood': average_ll, 'sharpe_ratio': average_rr, 'learning_rate': self.current_learning_rate, 'mean': mean_rr, 'variance': var_rr} return outdata
def elevation(self, pts, elevation_refs, interpolation_refs, console): '''Gets elevation from reference data.''' def ElevationURL(url, num, outof, attempt): '''Get Elevation data from a url.''' if attempt == self.max_failed_queries: console.add('elevation', num, outof, False, 'Quitting this query. Data will be inaccurate') return None, False current_time = time.time() if current_time < self.next_query_time: #before earliest possible query time time.sleep(self.next_query_time - current_time) #wait until next query time self.next_query_time = current_time + 0.2 #set the next time a query can be fired to 0.2 seconds from now try: #a ton of things can go wrong! data = json.load(urlopen(url)) if data['status'] == 'OK': #nothing wrong with query console.add('elevation', num, outof) return data, True else: #if an error, pass it on console.add('elevation', num, outof, False, 'Problem with query or data: '+data['status']) print 'Elevation Error (if)' return ElevationURL(url, num, outof, attempt+1) except: #if an error, pass it on console.add('elevation', num, outof, False, 'Problem with connection') print 'Elevation Error' return ElevationURL(url, num, outof, attempt+1) api_url = 'https://maps.googleapis.com/maps/api/elevation/json?locations=enc:' requests = [] chunks = list(util.chunkify(elevation_refs, 200)) for chunk in chunks: '''polyline = Polyline.encode('|'.join(map(lambda a: pts[a[0]].string, chunk)))''' polyline = Polyline.encode('|'.join([pts[a[0]].string for a in chunk])) requests.append((polyline, chunk, api_url + polyline + '&key=AIzaSyCnHT7IpJu0O7n-apLNW8iKkW_rTIuANuE')) request_num = 1 for poly, refs_list, url in requests: data, success = ElevationURL(url, request_num, len(requests), 0) if not success: return None, None, False request_num += 1 if data: for res, refs in zip(data['results'], refs_list): for ref in refs: pts[ref].elevation = res['elevation'] for to_int, ref1, ref2 in interpolation_refs: pt = pts[to_int[0]] pt1 = pts[ref1[0]] pt2 = pts[ref2[0]] dist1 = pt.pt.distanceTo(pt1.pt) dist2 = pt.pt.distanceTo(pt2.pt) total = dist1 + dist2 slope = (pt2.elevation - pt1.elevation)/total elevation = pt1.elevation + dist1 * slope for pt in to_int: pts[pt].elevation = elevation return pts, requests, True