def data_range(data, headers): result = [] for header in headers: colmax = np.amax(data.get_data((header,))) #colmax = np.amax(data.census_strip_totals((header,))) #print 'max' #print colmax colmin = np.amin(data.get_data((header,))) #colmin = np.amin(data.census_strip_totals((header,))) result.append((colmax,colmin)) #print 'min' #print colmin return result
def get_data(self, blocks): """ Data - return data """ subdata = data.get_data( blocks) if subdata is None: self._response_not_found() else: self._response_json( subdata)
def normalize_columns_together(data, headers): temp_matrix = data.get_data(headers) rows = len(temp_matrix) homogenous_coordinates= np.ones(shape =(rows, 1)) temp_matrix = np.hstack((temp_matrix, homogenous_coordinates)) min_max = data_range(data, headers) mins = [] mins = [] for i in range(len(headers)): mins.append(min_max[i][1]) totmin = min(float(num) for num in mins) maxes = [] for i in range(len(headers)): maxes.append(min_max[i][0]) totmax = max(float(num) for num in maxes) totrange = totmax - totmin Tx = np.eye(len(headers)+1) for i in range(len(headers)): Tx[i, len(headers)] = -totmin Ss = np.eye(len(headers)+1) for i in range(len(headers)): Ss[i, i] = 1/totrange result = None for i in range(rows): temp_row = np.matrix(temp_matrix[i, :]).T row = Tx * temp_row row = Ss * row if result is None: result = row.T else: result = np.vstack((result, row.T)) return result[:,range(len(headers))]
def delete_data(self, blocks): # Check that the resource exists subdata = data.get_data( blocks) if subdata is None: self._response_not_found() else: self._response_forbidden()
def load_data(dht_node_list): ''' @param {List} dht_node_list --- Each element is a dht node @returns {List} --- Each element is a a data.DataItem, which is a thin wrapper for data key and value. ''' start = time.time() data_items = data.get_data(conf.NUMBER_DATA_ITEMS) if len(dht_node_list) == 0: dht_util.dht_assert( 'No dht nodes passed in when loading data') dht_load_node = dht_node_list[0] for counter in range(0, len(data_items)): # if (counter % 50) == 0: # print 'Loading data ' + str(counter) + ' of ' + str(len(data_items)) print 'Loading data ' + str(counter) + ' of ' + str(len(data_items)) data_item = data_items[counter] dht_load_node.add_data(data_item.key,data_item.val) elapsed = time.time() - start print '\nLoad time: ' + str(elapsed) print '\n' return data_items
def normalize_columns_together(data, header_names_list): target = data.get_data(header_names_list) minA = np.min(target) maxA = np.max(target) rangeA = maxA - minA new_matrix = 1 - ((maxA - target)/rangeA) return new_matrix
def main(): """Driver routine""" # Global params with open('input/params.json') as params: input = json.load(params) base_dir = input['base_dir'] data_dir = input['data_dir'] db_cred_file = input['db_cred_file'] machines = input['machines'] tstart = input['tstart'] tend = input['tend'] one_hot = bool(input['one_hot']) train_fraction = float(input['train_fraction']) validation_fraction = float(input['validation_fraction']) test_fraction = float(input['test_fraction']) for machine in machines: #debug = DebugQueue(machine) #regular = RegQueue(machine) #shared = SharedQueue(machine) queue,completed = data.get_data(machine,base_dir,data_dir,db_cred_file,tstart,tend) hotdf = data.create_df(queue,completed,one_hot) test.create_all_sets(hotdf,train_fraction,validation_fraction,test_fraction)
def get_csv(database, table, date_start=None, date_end=None): timezone = database.tables[table]["timezone"] data = get_data(database, table, date_start, date_end) with StringIO() as csvfile: writer = csv.writer(csvfile) writer.writerows(data) return csvfile.getvalue()
def normalize_columns_separately(data, header_names_list): target = data.get_data(header_names_list) minC = np.min(target, axis=0) maxC = np.max(target, axis=0) rangeC = maxC - minC new_matrix = 1- ((maxC - target)/rangeC) return new_matrix
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_return = (prices["Adj Close"] / prices["Adj Close"].shift(1)).apply(np.log) vol = pandas.rolling_std(log_return, window=window) * math.sqrt(252) adj_factor = math.sqrt( ( 1.0 / ( 1.0 - (window / (log_return.count() - (window - 1.0))) + (window ** 2 - 1.0) / (3.0 * (log_return.count() - (window - 1.0)) ** 2) ) ) ) result = vol * adj_factor result[: window - 1] = np.nan if clean: return result.dropna() else: return result
def http_json(tables, start, end): db = Database(DATABASE) table_list = tables.split("+") table_set = set() table_data = dict() for table in table_list: if table in db.tables: table_set.add(table) name = get_cache_name(table, start, end) cached = cache.get(name) if cached is not None: table_data[table] = cached else: data = get_data(db, table, start, end) table_data[table] = data cache.set(name, data) # We don't fail if at least one table is found. While the client should # never request an unknown table, it will not error if it doesn't receive # a requested table, and will just draw those given. if len(table_set) == 0: abort(404) return json.jsonify(table_data)
def gen(**kwargs): """ 提供命令行接口,用以生成相应的诗 """ for k, v in kwargs.items(): setattr(opt, k, v) data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), 128, 256); map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model.cuda() if sys.version_info.major == 3: if opt.start_words.isprintable(): start_words = opt.start_words prefix_words = opt.prefix_words if opt.prefix_words else None else: start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode('ascii', 'surrogateescape').decode( 'utf8') if opt.prefix_words else None else: start_words = opt.start_words.decode('utf8') prefix_words = opt.prefix_words.decode('utf8') if opt.prefix_words else None start_words = start_words.replace(',', u',') \ .replace('.', u'。') \ .replace('?', u'?') gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) print(''.join(result))
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_ho = (prices['Adj High'] / prices['Adj Open']).apply(np.log) log_lo = (prices['Adj Low'] / prices['Adj Open']).apply(np.log) log_co = (prices['Adj Close'] / prices['Adj Open']).apply(np.log) log_oc = (prices['Adj Open'] / prices['Adj Close'].shift(1)).apply(np.log) log_oc_sq = log_oc**2 log_cc = (prices['Close'] / prices['Close'].shift(1)).apply(np.log) log_cc_sq = log_cc**2 rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co) close_vol = pandas.rolling_sum(log_cc_sq, window=window) * (1.0 / (window - 1.0)) open_vol = pandas.rolling_sum(log_oc_sq, window=window) * (1.0 / (window - 1.0)) window_rs = pandas.rolling_sum(rs, window=window) * (1.0 / (window - 1.0)) result = (open_vol + 0.164333 * close_vol + 0.835667 * window_rs).apply(np.sqrt) * math.sqrt(252) result[:window-1] = np.nan if clean: return result.dropna() else: return result
def get_estimator(symbol, start, end, window=30, clean=True): prices = data.get_data(symbol, start, end) log_ho = (prices['High'] / prices['Open']).apply(np.log) log_lo = (prices['Low'] / prices['Open']).apply(np.log) log_co = (prices['Close'] / prices['Open']).apply(np.log) log_oc = (prices['Open'] / prices['Close'].shift(1)).apply(np.log) log_oc_sq = log_oc**2 log_cc = (prices['Close'] / prices['Close'].shift(1)).apply(np.log) log_cc_sq = log_cc**2 rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co) close_vol = log_cc_sq.rolling(window=window,center=False).sum() * (1.0 / (window - 1.0)) open_vol = log_oc_sq.rolling(window=window,center=False).sum() * (1.0 / (window - 1.0)) window_rs = rs.rolling(window=window,center=False).sum() * (1.0 / (window - 1.0)) result = (open_vol + 0.164333 * close_vol + 0.835667 * window_rs).apply(np.sqrt) * math.sqrt(252) result[:window-1] = np.nan if clean: return result.dropna() else: return result
def normalize_columns_separately(data, headers): temp_matrix = data.get_data(headers) rows = len(temp_matrix) homogenous_coordinates= np.ones(shape =(rows, 1)) temp_matrix = np.hstack((temp_matrix, homogenous_coordinates)) min_max = data_range(data, headers) Tx = np.eye(len(headers)+1) for i in range(len(headers)): Tx[i, len(headers)] = -min_max[i][1] #print 'Tx' #print Tx Ss = np.eye(len(headers)+1) for i in range(len(headers)): colrange = min_max[i][0] - min_max[i][1] Ss[i, i] = 1/colrange #print 'Ss' #print Ss result = None #for i in range(data.get_raw_num_rows()): for i in range(rows): temp_row = np.matrix(temp_matrix[i, :]).T #temp_row = temp_matrix[i,:] #print 'row as vector' #print temp_row row = Tx * temp_row row = Ss * row #print temp_matrix[i, :].T * TransformationMatrix if result is None: result = row.T else: result = np.vstack((result, row.T)) #print 'row added' #print result[:,range(len(headers))] return result[:,range(len(headers))]
def run(): coordinator_master = start_coordinator() dht_node_list = add_dht_nodes() print '\nAbout to load data' data_to_load = data.get_data(dht_util.NUMBER_DATA_ITEMS) load_data(dht_node_list[0],data_to_load) print '\nAbout to query data' query_loaded_data(dht_node_list,data_to_load)
def data_range(data, header_names_list): target = data.get_data(header_names_list) #print target minlist = target.min(0) maxlist = target.max(0) #print "maslist and minlist are", maxlist, minlist result = np.concatenate((minlist.T, maxlist.T), axis=1) return result.tolist()
def dendrodata(): rows, cols, matrix = data.get_data('blogdata.txt') print "Calculating clusters...", tree = groups.cluster_dict(groups.cluster_hierarchy(matrix), rows) print " DONE." return {"tree": tree}
def fuzzyCmeans(data, headers, C): A = data.get_data(headers) centroids,partitionMatrix = fuzzyCinit(A, C, headers) partitionMatrix,centroids = fuzzyC_algorithm(A,centroids,partitionMatrix) #print centroids #print partitionMatrix return partitionMatrix, centroids
def kmeans(data, headers, K, whiten = True, categories = None): A = data.get_data(headers) if whiten: W = vq.whiten(A) else: W = A codebook = kmeans_init(W, K, categories) codebook,codes,errors = kmeans_algorithm(W, codebook) return codebook, codes, errors
def get_estimator(symbol, start, end, window=30, clean=True): prices = data.get_data(symbol, start, end) log_return = (prices['Close'] / prices['Close'].shift(1)).apply(np.log) result = log_return.rolling(window=window,center=False).skew() result[:window-1] = np.nan if clean: return result.dropna() else: return result
def main(): """ """ placeholders = ['input', 'label'] train_ops = ['train'] log_ops = ['accuracy'] files = get_data(config.DATA_DIRECTORY) queue_graph = create_image_queue_graph(files, config.PIXEL_DEPTH, config.HEIGHT, config.WIDTH, config.CHANNELS, config.BATCH_SIZE, config.CAPACITY) model_graph = create_model_graph(config.HEIGHT, config.WIDTH, config.CHANNELS, config.NUM_LABELS) train_model(queue_graph, model_graph, placeholders, train_ops, log_ops)
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply(np.log) result = pandas.rolling_skew(log_return, window=window) result[:window-1] = np.nan if clean: return result.dropna() else: return result
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply( np.log) result = pandas.rolling_skew(log_return, window=window) result[:window - 1] = np.nan if clean: return result.dropna() else: return result
def main(args): x, fx = get_data(args) device = torch.device("cuda" if args.cuda else "cpu") train_data, val_data = split_data(args, x, fx) if args.save_splits: save_splits(train_data, val_data) train_loader, val_loader = get_loaders(train_data, val_data) model = get_model(args) trainer = get_trainer(model, train_loader, val_loader, device, args) trainer.train()
def range_(headers, data): """ Takes in a list of column headers and the Data object and returns a list of 2-element lists with the minimum and maximum values for each column. The function is required to work only on numeric data types.""" column_matrix = data.get_data(headers).getT( ) # get columns as rows, as this makes analysis much easier by just perfoming operations on column list directly if column_matrix == []: print "wrong headers, not present in data Object" return [] column_max = column_matrix.max(1) column_min = column_matrix.min(1) final = np.concatenate((column_min, column_max), axis=1) rng = final.tolist() return rng
def sort(headers, data): # extension """ Return the numeric matrices with sorted columns """ column_matrix = data.get_data( headers) # get raw matrix data for numeric values print "\n before sorting \n " print column_matrix column_matrix = column_matrix.tolist() column_array = np.asarray(column_matrix) column_array.sort(axis=0) print "\n \n done sorting here is your matrix \n" return column_array
def run_test_experiments(config): dir_path = '/path/to/working/dir' train_file = dir_path + '/data/ontonotes.development.ner' test_file = dir_path + '/data/ontonotes.test.ner' model_path = dir_path + '/models/MLPNet_' + config['para_option'] + '.pt' print('load data') train_data = get_data(train_file) test_data = get_data(test_file) print('get vocabulary and embeddings') word_to_ix, pos_to_ix, ner_to_ix = get_vocabulary(train_data, config) config['ner_to_ix'] = ner_to_ix config['pos_to_ix'] = pos_to_ix config['word_to_ix'] = word_to_ix config['output_size'] = len(ner_to_ix) print('ner_to_ix', ner_to_ix) vocab_embeddings = get_vocab_embeddings(word_to_ix) print('process data') test_input_ids, test_sent_ids, test_pos_ids, test_ner_ids = process_data( test_data, word_to_ix, pos_to_ix, ner_to_ix) print('get test input features') test_input_features = get_word_features(test_input_ids, test_sent_ids, vocab_embeddings) test_data = { 'inputs': test_input_features, 'sent_ids': test_sent_ids, 'labels': test_ner_ids, 'confidences': [1.0] * len(test_input_features) } print('test words', len(test_input_features)) print('build model') model, loss_function, optimizer = build_model(config) print('load model') model.load_state_dict(torch.load(model_path)) print('test model') test_accuracy = evaluate(test_data, model, ner_to_ix, config) print('test accuracy', test_accuracy)
def fit(self, validate=False, quantize=False, verbosity=0, epochs=40): train_data, val_data = data.get_data(self.dataset, self.device, shuffle=True, batch_size=128, augment=True) loss, acc = backend.fit( self.model, self.optimizer, train_data=train_data, val_data=None if validate is False else val_data, epochs=epochs, verbosity=verbosity, quan_paras=None if quantize is False else self.quan_paras) return loss, acc
def train(self, batch_size=128, epochs=40, verbosity=True, validate=False): train_data, val_data = data.get_data( self.dataset, self.device, shuffle=True, batch_size=batch_size, augment=True) acc = backend.fit( self.model, self.optimizer, train_data=train_data, val_data=None if validate is False else val_data, epochs=epochs, verbosity=verbosity ) return acc
def main(args): X, y = get_data() X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=args.test_size, random_state=args.random_state) models = create_models(None) models = dict(models) for name, clf in models.items(): clf.fit(X_train, y_train) scores = pd.Series( {name: clf.score(X_test, y_test) for name, clf in models.items()}, name="Accuracy") print(scores)
def char_enc(fn, cmap): data = get_data(fn) result = [] out_fn = fn[:-4] + '_enc.txt' print('Starting.') for line in data: new_line = '' for char in line: new_line += cmap[char] result.append(new_line) with open(out_fn, 'w') as f: f.write('\n'.join(result)) print('Done. Output file:', out_fn)
def get_word_sentiment(word): """Return a number between -1 and +1 representing the degree of positive or negative feeling in the given word. Return None if the word is not in the sentiment dictionary. (0 represents a neutral feeling, not an unknown feeling.) >>> get_word_sentiment('good') 0.875 >>> get_word_sentiment('bad') -0.625 >>> get_word_sentiment('winning') 0.5 >>> get_word_sentiment('Berkeley') # Returns None """ return get_data(word)
def get_batch(size,BATCH_SIZE=20,SHUFFLE_BUFFER_SIZE = 1000): train,val,test,labels=get_data(size,'rock_paper_scissors',3) train_batches=train.shuffle(SHUFFLE_BUFFER_SIZE).repeat().batch(BATCH_SIZE) val_batches=val.shuffle(SHUFFLE_BUFFER_SIZE).repeat().batch(BATCH_SIZE) test_batches=test.batch(BATCH_SIZE).repeat() if SHUFFLE_BUFFER_SIZE % BATCH_SIZE != 0: parallel_steps = SHUFFLE_BUFFER_SIZE // BATCH_SIZE + 1 else: parallel_steps = SHUFFLE_BUFFER_SIZE // BATCH_SIZE return train_batches,val_batches,test_batches,parallel_steps
def main(args): np.random.seed() x1s_trn, x2s_trn, ys_trn, x1s_vld, x2s_vld, ys_vld = get_data() model = Model(64, 64, 1, model_id=args.model_id) model.train(x1s=x1s_trn, x2s=x2s_trn, ys=ys_trn, validation_x1s=x1s_vld, validation_x2s=x2s_vld, validation_ys=ys_vld, num_epochs=2000, embedding_dimension=128, mini_batch_size=50, learning_rate=0.00035, margin=0.5)
def hande_new_post(): if request.method == "POST": # if is_authed(session): body = request.json post_id = hashlib.sha256(json.dumps(body)).hexdigest()[:11] data.submit_post(body['user_id'], post_id, body) return jsonify({"id": post_id, "success": True}) # else: # return 400 if request.method == "GET": post_id = request.args.get('post_id') user_id = request.args.get('user_id') return jsonify(data.get_data(user_id, post_id)) else: return 400
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) rs = (1 / (4 * math.log(2))) * ((prices['Adj High'] / prices['Adj Low']).apply(np.log))**2 def f(v): return math.sqrt(252 * v.mean()) result = pandas.rolling_apply(rs, window, f) result[:window-1] = np.nan if clean: return result.dropna() else: return result
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply(np.log) vol = pandas.rolling_std(log_return, window=window) * math.sqrt(252) adj_factor = math.sqrt((1.0 / (1.0 - (window / (log_return.count() - (window - 1.0)))+(window**2 - 1.0)/(3.0 * (log_return.count() - (window - 1.0))**2)))) result = vol * adj_factor result[:window-1] = np.nan if clean: return result.dropna() else: return result
def execute_job(jid): job_dict = get_job_by_jid(jid) points = get_data() points = points.in_between(start=int(job_dict['start']), end=int(job_dict['end'])).data years = [int(p['Year']) for p in points] rainfall = [p['Annual rainfall at fortaleza'] for p in points] plt.scatter(years, rainfall) plt.title(_create_job_key(jid)) plt.xlabel('Year') plt.ylabel('Rainfall (mm)') tmp_file = '/tmp/{}.png'.format(jid) plt.savefig(tmp_file, dpi=150) finalize_job(jid, tmp_file)
def update_film_table(index_name='films'): films, places = get_data('film') for k, f in films.iteritems(): o = Film.get_by_id(f.get('id')) if not o: o = set_film_model(f) ModelSearch.add_document( ModelSearch.create_film_document( doc_id=o.key.urlsafe(), film=o ), index_name=index_name )
def get_estimator(symbol, start, end, window=30, clean=True): prices = data.get_data(symbol, start, end) rs = (1 / (4 * math.log(2))) * ( (prices['High'] / prices['Low']).apply(np.log))**2 def f(v): return math.sqrt(252 * v.mean()) result = rs.rolling(window=window, center=False).apply(func=f) result[:window - 1] = np.nan if clean: return result.dropna() else: return result
def input_gen(): """ Infinite sample generator """ data_in = get_data() data = [] for d in data_in: for i in range(3): d0 = np.zeros((STEPS, 12), np.float) length = min(STEPS, d[0].shape[0]-2) d0[:length,:] = d[0][i:length+i,:] d1 = d[1] data.append((d0, d1)) index = 0 while True: index = (index +1)%len(data) yield data[index]
def get_ruta_data(url, js_barrios, tresh): durations = list() js = data.get_data(url) calculo = dict() calculo = {'rutas': []} print(calculo) conteo = int() for route in js['routes']: print('\n' + route['summary']) for leg in route['legs']: durations.append(re.findall('\d+', leg['duration']['text'])[0]) for step in leg['steps']: # print ('origin is' + json.dumps(step['start_location']) + 'end is= ' + json.dumps(step['end_location'])) lat2 = step['end_location']['lat'] lon2 = step['end_location']['lng'] for barrio in js_barrios['barrios']: lat1 = barrio['latitud'] lon1 = barrio['longitud'] print('=======' + barrio['nombre']) dist = math.acos( math.sin(lat1) * math.sin(lat2) + math.cos(lat1) * math.cos(lat2) * math.cos(lon1 - lon2)) * 6371 print(dist) if dist > tresh: conteo = conteo + 1 calculo['rutas'].append({'nombre': route['summary'], 'conteo': conteo}) conteo = 0 print(json.dumps(calculo, indent=4)) # print(json.dumps(js_barrios, indent=4)) cont_min = 9999 for ruta in calculo['rutas']: if ruta['conteo'] < cont_min: cont_min = ruta['conteo'] nombre = ruta['nombre'] for route in js['routes']: if nombre in route['summary']: return route
async def lab(self, ctx): labs_subset = get_labs_subset(LABS_OPTIONS) output = "Select a lab by typing what is in `this text`:" options = {} for lab in labs_subset: output += "\n\n- `{}` **{}** \n\t\t*{}*".format( lab["option"], lab["name"], lab["description"]) options[lab["option"]] = lab def check(m): return m.author == ctx.author and (m.content in options) await ctx.send('', embed=discord.Embed(description=output, colour=discord.Color.greyple())) msg = await self.bot.wait_for('message', check=check, timeout=120) lab = options[msg.content] outcome = random.choice(lab["outcomes"]) output = "*{}*".format(outcome["description"]) base_sc = SC_LAB multiplier = random.uniform(outcome["min_sc"], outcome["max_sc"]) item_boost = items.get_player_boost(ctx.author.id, "labs") sc_add = round(base_sc * multiplier * (1 + item_boost)) player_sc = get_data(ctx.author.id, "sc", default_val=0) add_data(ctx.author.id, "sc", player_sc + sc_add) output += "\n\nYour demonstrator gave you {} **{}**.".format( SC_EMOJI, sc_add) if item_boost: output += "\n_**{:.1f}%** boost from_ **Labs** _items in your inventory._".format( item_boost * 100) output += "\n\nYou get **`{}`<:xp:699934983074349086>**.".format( XP_LAB) lab_disp = discord.Embed(description=output, colour=discord.Color.greyple()) lab_disp.set_author(name=lab["name"], url='', icon_url=ctx.author.avatar_url) await ctx.send('', embed=lab_disp) await give_xp(ctx, ctx.author.id, XP_LAB) await ctx.send(tips.get_random_tip())
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) rs = (1 / (4 * math.log(2))) * ( (prices['Adj High'] / prices['Adj Low']).apply(np.log))**2 def f(v): return math.sqrt(252 * v.mean()) result = pandas.rolling_apply(rs, window, f) result[:window - 1] = np.nan if clean: return result.dropna() else: return result
def predict1(): config = img_config() config.seq_len = 2 img = utils.load_image( "/home/tusimple/junechen/ml_data/data/train2014/COCO_train2014_000000318556.jpg" ) #img = utils.load_image("/home/tusimple/junechen/ml_data/data/train2014/COCO_train2014_000000160629.jpg") #/home/tusimple/junechen/ml_data/data/train2014/COCO_train2014_000000318556.jpg #img = utils.load_image("./test_data/tiger.jpeg") img = img.reshape((1, 224, 224, 3)) w2d, d2w = data.get_word_to_id() config.vob_size = len(w2d) print "read w2d size:", len(w2d) if len(w2d) == 0: f, image, label, word, target, w2d, d2w = data.get_data( FLAGS.caption_path, FLAGS.image_path, max_len=config.num_steps + 1, batch_size=config.batch_size) images = tf.placeholder("float", [None, 224, 224, 3], name="image") word = tf.placeholder(tf.int32, [None, None], name="word_seq") image_caption = IMAGE_ATT_CAP(images, word, None, config, is_training=True) config_proto = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config_proto) as sess: sv = load_session(sess, FLAGS.save_path) word = [3, 1] words = np.array(word).reshape(1, -1) output, c, h = sess.run( [image_caption.logits, image_caption.c, image_caption.h], feed_dict={ image_caption.image: img, image_caption.word: words }) print(output.shape) print "c:", c print "h:", h print "output:", output predict = output[0][-1] sort_idx = predict.argsort()[::-1] print sort_idx print[d2w[p] for p in word + [sort_idx[0]]]
def get_rainfall(): dict_class = get_data() # tests if both start/end and limit/offset used together if ('start' in request.args or 'end' in request.args) and ('limit' in request.args or 'offset' in request.args): return jsonify({'msg':'Please do not use start/end with limit/offset'}), 400 # if start/end provided, returns appropriate data if 'start' in request.args or 'end' in request.args: start = None end = None if 'start' in request.args: try: start = int(request.args.get('start')) except: return jsonify({'msg':'Please Enter a Valid Start'}), 400 if 'end' in request.args: try: end = int(request.args.get('end')) except: return jsonify({'msg':'Please Enter a Valid End'}), 400 return jsonify(dict_class.in_between(start=start,end=end).data) # if limit/offset provided, returns appropriate data if 'limit' in request.args or 'offset' in request.args: limit = None offset = None if 'limit' in request.args: try: limit = int(request.args.get('limit')) except: return jsonify({'msg':'Please Enter a Valid Limit'}), 400 if 'offset' in request.args: try: offset = int(request.args.get('offset')) except: return jsonify({'msg':'Please Enter a Valid Offset'}), 400 return jsonify(dict_class.limset(limit=limit,offset=offset).data) return jsonify(dict_class.data)
def main(): # get data X_train, y_train, X_test = get_data() # hyper - parameters params = dict(n_estimators=400, max_depth=4, eta=0.09, gamma=0, min_child_weight=0, subsample=0.8, colsample_bytree=0.8, colsample_bylevel=0.6, colsample_bynode=0.2) # fit parameters #fit_params = {'early_stopping_rounds': 10, 'verbose': True} fit_params = {'verbose': True} # cross validation print('=============================================') cv(X=X_train, y=y_train, k=10, verbose=True, mode='r', model_params=params, fit_params=fit_params) # find result def res(): df_test = pd.read_csv('data/test.csv', skipinitialspace=True, verbose=True) model = XGBRegressor(**params) model.fit(X_train, y_train, **fit_params) predictions = model.predict(X_test) print(predictions) df_test['SalePrice'] = np.exp(predictions) result = df_test[['Id', 'SalePrice']] result.to_csv('data/new/xgb.csv', index=False) print('Done!') res()
def get_layout_for_app(customer_list, app_id): form_layout = QFormLayout() for cust_id in customer_list: status_button = QPushButton("...") check_box = QCheckBox() row = QHBoxLayout() # creates a mapping [cust_id,"app_id]: status_button mapping_key = cust_id + "," + app_id execute.status_button_list.setdefault(mapping_key, [status_button]) execute_for_customer.setdefault(mapping_key, check_box) status_button.clicked.connect(partial(open_status_dialog, mapping_key)) row.addWidget(check_box) row.addWidget(status_button) form_layout.addRow(QLabel(data.get_data("customers")[cust_id]["name"]), row) return form_layout
def random_forest(cfg): # Load data train_df, valid_df, test_df = get_data(cfg) df = pd.concat([train_df, valid_df]) # Remove columns and split data into (X,y) df = df.drop([ 'State_AL', 'State_NC', 'isNaN_rep_income', 'State_FL', 'State_LA', 'isNaN_uti_card_50plus_pct', 'State_SC', 'State_GA', 'State_MS', 'auto_open_36_month_num', 'card_open_36_month_num', 'ind_acc_XYZ' ], axis=1) X = df.drop("Default_ind", axis=1).values y = df["Default_ind"].values # Below 2 lines needed for cross-validation in RandomizedSearchCV split_index = [-1] * len(train_df) + [0] * len(valid_df) pds = PredefinedSplit(test_fold=split_index) # Create classifier and the hyperparameter search space classifier = RandomForestClassifier(n_jobs=-1, verbose=1) param_grid = { "n_estimators": np.arange(50, 1000, 100), "max_depth": np.arange(1, 20), "criterion": ["gini", "entropy"], "min_samples_split": np.arange(2, 10), "max_features": [0.8, "sqrt", "log2"], "min_samples_leaf": np.arange(1, 5), "bootstrap": [True, False], } model = RandomizedSearchCV( estimator=classifier, param_distributions=param_grid, scoring="f1", n_iter=700, verbose=1, n_jobs=1, cv=pds, ) model.fit(X, y) print(model.best_score_) print(model.best_estimator_.get_params()) with open("rf.pkl", "wb") as f: pickle.dump(model.best_estimator_, f)
def gen(**kwargs): """ 提供命令行接口,用以生成相应的诗 """ for k, v in kwargs.items(): print(k, v) setattr(opt, k, v) data, word2ix, ix2word = get_data(opt) model = PoetryModel(len(word2ix), 128, 256) map_location = lambda s, l: s state_dict = t.load(opt.model_path, map_location=map_location) model.load_state_dict(state_dict) if opt.use_gpu: model.cuda() # python2和python3 字符串兼容 if sys.version_info.major == 3: if opt.start_words.isprintable(): start_words = opt.start_words prefix_words = opt.prefix_words if opt.prefix_words else None else: start_words = opt.start_words.encode( 'ascii', 'surrogateescape').decode('utf8') prefix_words = opt.prefix_words.encode( 'ascii', 'surrogateescape').decode('utf8') if opt.prefix_words else None else: start_words = opt.start_words.decode('utf8') prefix_words = opt.prefix_words.decode( 'utf8') if opt.prefix_words else None prefix_words = prefix_words.replace(',', u',') \ .replace('.', u'。') \ .replace('?', u'?') start_words = start_words.replace(',', u',') \ .replace('.', u'。') \ .replace('?', u'?') gen_poetry = gen_acrostic if opt.acrostic else generate result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words) with open('result.txt', 'w') as f: f.writelines(result) print(''.join(result))
def train(): config = img_config() config.batch_size = 128 f, image, label, word, target, w2d, d2w = data.get_data( FLAGS.caption_path, FLAGS.image_path, max_len=config.seq_len + 1, batch_size=config.batch_size) epoch_size = 10000 config.vob_size = len(w2d) print("vb size:", len(w2d)) image_caption = IMAGE_ATT_CAP(image, word, target, config) #summary_op = tf.merge_all_summaries() #sv = tf.train.Supervisor(logdir=FLAGS.save_path) config_proto = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) #with sv.managed_session(config=config_proto) as sess: with tf.Session(config=config_proto) as sess: sv = load_session(sess, FLAGS.save_path) threads = tf.train.start_queue_runners(sess) summary_writer = tf.summary.FileWriter(FLAGS.log_path, sess.graph) for i in range(config.max_max_epoch): x_lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) print("lr:", x_lr_decay) image_caption.assign_lr(sess, config.learning_rate * x_lr_decay) for j in range(10000): loss, lr, sum_str = image_caption.run_epoch( sess, x_lr_decay, epoch_size, summary_writer, sv) if j % 10 == 0: print("step %d per %f, lr %f" % (i, loss, lr)) summary = tf.Summary() summary.value.add(tag='loss', simple_value=loss) i_global = sess.run(tf.train.get_or_create_global_step()) print("cost %f global step %d" % (loss, i_global)) summary_writer.add_summary( summary, i_global) #write eval to tensorboard summary_writer.add_summary(sum_str, i_global) if j % 100 == 0: save_model(sess, sv, FLAGS.save_path, i_global)
def kmeans(d, headers, K, whiten=True, categories = ''): '''Takes in a Data object, a set of headers, and the number of clusters to create Computes and returns the codebook, codes and representation errors. If given an Nx1 matrix of categories, it uses the category labels to calculate the initial cluster means. ''' A = d.get_data(headers) if whiten: W = vq.whiten(A) else: W = A codebook = kmeans_init(W,K,categories) codebook, codes, errors = kmeans_algorithm(W,codebook) return [codebook,codes, errors]
def get_real_data(self): self.country_real = {} c_remove = [] for country in self.countries: time, cases, deaths, recovered = data.get_data( country, self.all_data) if time == []: c_remove.append(country) else: self.country_real[country] = { "time": time, "cases": cases, "deaths": deaths, "recovered": recovered } # remove all the unwanted countries - less data for rem in c_remove: self.countries.remove(rem)
def main(): _, x_test = data.get_data() # x_test = x_test.reshape((len(x_test), 28,28,1)) decoder = load_model('models/decoder.h5') encoded_imgs = np.load('data/encoded_imgs.npy') reconstructions = decoder.predict(encoded_imgs) reconstructions = reconstructions.reshape((len(x_test), 28, 28)) reconstructions = 255 * reconstructions for i in range(len(reconstructions)): temp = np.expand_dims(reconstructions[i], axis=2) temp = np.repeat(temp.astype(np.uint8), 3, 2) save_img('data/' + str(i) + '.png', temp)
def run(): to_kill = [] # start coordinator cmd = ['pypy', dht_util.SETUP_BIN,dht_util.CMD_START_COORDINATOR] print '\nStarting discovery coordinator' proc = subprocess.Popen(cmd,shell=False) to_kill.append(proc) time.sleep(2) # start nodes for i in range(0,len(conf.NODE_HOST_PORT_PAIRS) -1 ): print 'Starting node %s of %s ' % (str(i + 1), str(len(conf.NODE_HOST_PORT_PAIRS))) host_port_pair = conf.NODE_HOST_PORT_PAIRS[i] encoded_node_host_port_pair = dht_util.encode_node_start_args( host_port_pair) cmd = ( ['pypy',dht_util.SETUP_BIN,dht_util.CMD_START_NODE] + encoded_node_host_port_pair) proc = subprocess.Popen(cmd,shell=False) to_kill.append(proc) time.sleep(5) print ( 'Starting node %s of %s ' % (str(len(conf.NODE_HOST_PORT_PAIRS)),str(len(conf.NODE_HOST_PORT_PAIRS)))) local_node_host_port_pair = conf.NODE_HOST_PORT_PAIRS[-1] local_dht_node = dht_lib.add_single_dht_node(local_node_host_port_pair) time.sleep(5) data_to_load = data.get_data(conf.NUMBER_DATA_ITEMS) print 'Starting loading %s data items' % str(len(data_to_load)) dht_lib.load_data(local_dht_node,data_to_load) print 'Waiting period' time.sleep(10) print 'Querying data (once for each loaded item)' dht_lib.query_loaded_data([local_dht_node],data_to_load) print 'Shutting down' for proc_to_kill in to_kill: proc_to_kill.kill()
def get_estimator(symbol, start, end, window=30, clean=True): prices = data.get_data(symbol, start, end) log_hl = (prices['High'] / prices['Low']).apply(np.log) log_co = (prices['Close'] / prices['Open']).apply(np.log) rs = 0.5 * log_hl**2 - (2*math.log(2)-1) * log_co**2 def f(v): return math.sqrt(252 * v.mean()) result = rs.rolling(window=window, center=False).apply(func=f) result[:window-1] = np.nan if clean: return result.dropna() else: return result
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_ho = (prices['Adj High'] / prices['Adj Open']).apply(np.log) log_lo = (prices['Adj Low'] / prices['Adj Open']).apply(np.log) log_co = (prices['Adj Close'] / prices['Adj Open']).apply(np.log) rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co) def f(v): return math.sqrt(252 * v.mean()) result = pandas.rolling_apply(rs, window, f) result[:window-1] = np.nan if clean: return result.dropna() else: return result
def blogdata(clustered=False): def dict_clusters(clust): count = dict(zip(cols, matrix[clust.id])) count.update({'Blog': rows[clust.id]}) return count rows, cols, matrix = data.get_data('blogdata.txt') if clustered: print "Calculating clusters...", clusters = groups.cluster_list(groups.cluster_hierarchy(matrix)) print " DONE." counts = map(dict_clusters, clusters) else: counts = [] for i, vector in enumerate(matrix): d = dict(zip(cols, vector)) d.update({'Blog': rows[i]}) counts.append(d) return {"cols": cols, "counts": counts}