Exemplo n.º 1
0
def data_range(data, headers):
	result = []
	for header in headers:
		colmax = np.amax(data.get_data((header,)))
		#colmax = np.amax(data.census_strip_totals((header,)))
		#print 'max'
		#print colmax
		colmin = np.amin(data.get_data((header,)))
		#colmin = np.amin(data.census_strip_totals((header,)))
		result.append((colmax,colmin))
		#print 'min'
		#print colmin
	return result
Exemplo n.º 2
0
	def get_data(self, blocks):
		""" Data - return data """
		subdata = data.get_data( blocks)
		if subdata is None:
			self._response_not_found()
		else:
			self._response_json( subdata)
Exemplo n.º 3
0
def normalize_columns_together(data, headers):
	temp_matrix = data.get_data(headers)
	rows = len(temp_matrix)
	homogenous_coordinates= np.ones(shape =(rows, 1))
	temp_matrix = np.hstack((temp_matrix, homogenous_coordinates))
	min_max = data_range(data, headers)
	mins = []
	mins = []
	for i in range(len(headers)):
		 mins.append(min_max[i][1])
	totmin = min(float(num) for num in mins)
	maxes = []
	for i in range(len(headers)):
		maxes.append(min_max[i][0])
	totmax = max(float(num) for num in maxes)
	totrange = totmax - totmin
	
	Tx = np.eye(len(headers)+1)
	for i in range(len(headers)):
		Tx[i, len(headers)] = -totmin
	Ss = np.eye(len(headers)+1)
	for i in range(len(headers)):
		Ss[i, i] = 1/totrange
	result = None
	for i in range(rows):
		temp_row = np.matrix(temp_matrix[i, :]).T
		row = Tx * temp_row
		row = Ss * row
		if result is None:
			result = row.T
		else:
			result = np.vstack((result, row.T))
	return result[:,range(len(headers))]
Exemplo n.º 4
0
	def delete_data(self, blocks):
		# Check that the resource exists
		subdata = data.get_data( blocks)
		if subdata is None:
			self._response_not_found()
		else:
			self._response_forbidden()
Exemplo n.º 5
0
def load_data(dht_node_list):
    '''
    @param {List} dht_node_list --- Each element is a dht node

    @returns {List} --- Each element is a a data.DataItem, which is a thin
    wrapper for data key and value.
    '''
    start = time.time()
    data_items = data.get_data(conf.NUMBER_DATA_ITEMS)
    if len(dht_node_list) == 0:
        dht_util.dht_assert(
            'No dht nodes passed in when loading data')
    dht_load_node = dht_node_list[0]

    for counter in range(0, len(data_items)):
        # if (counter % 50) == 0:
        #     print 'Loading data ' + str(counter) + ' of ' + str(len(data_items))
        print 'Loading data ' + str(counter) + ' of ' + str(len(data_items))
        
        data_item = data_items[counter]
        dht_load_node.add_data(data_item.key,data_item.val)

    elapsed = time.time() - start
    print '\nLoad time: ' + str(elapsed)
    print '\n'
        
    return data_items
Exemplo n.º 6
0
def normalize_columns_together(data, header_names_list):
	target = data.get_data(header_names_list)
	minA = np.min(target)
	maxA = np.max(target)
	rangeA = maxA - minA
	new_matrix = 1 - ((maxA - target)/rangeA)
	return new_matrix
Exemplo n.º 7
0
def main():
    """Driver routine"""
    # Global params
    with open('input/params.json') as params:
    	input = json.load(params)

	base_dir = input['base_dir']
	data_dir = input['data_dir']
	db_cred_file = input['db_cred_file']
	machines = input['machines']
	tstart = input['tstart']
	tend = input['tend']
	one_hot = bool(input['one_hot'])
	train_fraction = float(input['train_fraction'])
	validation_fraction = float(input['validation_fraction'])
	test_fraction = float(input['test_fraction'])
    
    for machine in machines:
		#debug = DebugQueue(machine)
		#regular = RegQueue(machine)
		#shared = SharedQueue(machine)
	
		queue,completed = data.get_data(machine,base_dir,data_dir,db_cred_file,tstart,tend)
		hotdf = data.create_df(queue,completed,one_hot)
		test.create_all_sets(hotdf,train_fraction,validation_fraction,test_fraction) 
Exemplo n.º 8
0
def get_csv(database, table, date_start=None, date_end=None):
    timezone = database.tables[table]["timezone"]
    data = get_data(database, table, date_start, date_end)
    with StringIO() as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(data)
        return csvfile.getvalue()
Exemplo n.º 9
0
def normalize_columns_separately(data, header_names_list):
	target = data.get_data(header_names_list)
	minC = np.min(target, axis=0)
	maxC = np.max(target, axis=0)
	rangeC = maxC - minC
	new_matrix = 1- ((maxC - target)/rangeC)
	return new_matrix
Exemplo n.º 10
0
def get_estimator(ticker, start, end, window=30, clean=True):

    prices = data.get_data(ticker, start, end)

    log_return = (prices["Adj Close"] / prices["Adj Close"].shift(1)).apply(np.log)

    vol = pandas.rolling_std(log_return, window=window) * math.sqrt(252)
    adj_factor = math.sqrt(
        (
            1.0
            / (
                1.0
                - (window / (log_return.count() - (window - 1.0)))
                + (window ** 2 - 1.0) / (3.0 * (log_return.count() - (window - 1.0)) ** 2)
            )
        )
    )

    result = vol * adj_factor
    result[: window - 1] = np.nan

    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 11
0
def http_json(tables, start, end):
    db = Database(DATABASE)
    table_list = tables.split("+")
    table_set = set()
    table_data = dict()
    for table in table_list:
        if table in db.tables:
            table_set.add(table)

            name = get_cache_name(table, start, end)
            cached = cache.get(name)
            if cached is not None:
                table_data[table] = cached
            else:
                data = get_data(db, table, start, end)
                table_data[table] = data
                cache.set(name, data)


    # We don't fail if at least one table is found. While the client should
    # never request an unknown table, it will not error if it doesn't receive
    # a requested table, and will just draw those given.
    if len(table_set) == 0:
        abort(404)
    
    return json.jsonify(table_data)
Exemplo n.º 12
0
def gen(**kwargs):
    """
    提供命令行接口,用以生成相应的诗
    """

    for k, v in kwargs.items():
        setattr(opt, k, v)
    data, word2ix, ix2word = get_data(opt)
    model = PoetryModel(len(word2ix), 128, 256);
    map_location = lambda s, l: s
    state_dict = t.load(opt.model_path, map_location=map_location)
    model.load_state_dict(state_dict)

    if opt.use_gpu:
        model.cuda()
    if sys.version_info.major == 3:
        if opt.start_words.isprintable():
            start_words = opt.start_words
            prefix_words = opt.prefix_words if opt.prefix_words else None
        else:
            start_words = opt.start_words.encode('ascii', 'surrogateescape').decode('utf8')
            prefix_words = opt.prefix_words.encode('ascii', 'surrogateescape').decode(
                'utf8') if opt.prefix_words else None
    else:
        start_words = opt.start_words.decode('utf8')
        prefix_words = opt.prefix_words.decode('utf8') if opt.prefix_words else None

    start_words = start_words.replace(',', u',') \
        .replace('.', u'。') \
        .replace('?', u'?')

    gen_poetry = gen_acrostic if opt.acrostic else generate
    result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words)
    print(''.join(result))
Exemplo n.º 13
0
def get_estimator(ticker, start, end, window=30, clean=True):
    
    prices = data.get_data(ticker, start, end)
    
    log_ho = (prices['Adj High'] / prices['Adj Open']).apply(np.log)
    log_lo = (prices['Adj Low'] / prices['Adj Open']).apply(np.log)
    log_co = (prices['Adj Close'] / prices['Adj Open']).apply(np.log)
    
    log_oc = (prices['Adj Open'] / prices['Adj Close'].shift(1)).apply(np.log)
    log_oc_sq = log_oc**2
    
    log_cc = (prices['Close'] / prices['Close'].shift(1)).apply(np.log)
    log_cc_sq = log_cc**2
    
    rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co)
    
    close_vol = pandas.rolling_sum(log_cc_sq, window=window) * (1.0 / (window - 1.0))
    open_vol = pandas.rolling_sum(log_oc_sq, window=window) * (1.0 / (window - 1.0))
    window_rs = pandas.rolling_sum(rs, window=window) * (1.0 / (window - 1.0))
    
    result = (open_vol + 0.164333 * close_vol + 0.835667 * window_rs).apply(np.sqrt) * math.sqrt(252)
    
    result[:window-1] = np.nan

    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 14
0
def get_estimator(symbol, start, end, window=30, clean=True):
    
    prices = data.get_data(symbol, start, end)
    
    log_ho = (prices['High'] / prices['Open']).apply(np.log)
    log_lo = (prices['Low'] / prices['Open']).apply(np.log)
    log_co = (prices['Close'] / prices['Open']).apply(np.log)
    
    log_oc = (prices['Open'] / prices['Close'].shift(1)).apply(np.log)
    log_oc_sq = log_oc**2
    
    log_cc = (prices['Close'] / prices['Close'].shift(1)).apply(np.log)
    log_cc_sq = log_cc**2
    
    rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co)
    
    close_vol = log_cc_sq.rolling(window=window,center=False).sum() * (1.0 / (window - 1.0))
    open_vol = log_oc_sq.rolling(window=window,center=False).sum() * (1.0 / (window - 1.0))
    window_rs = rs.rolling(window=window,center=False).sum() * (1.0 / (window - 1.0))
    
    result = (open_vol + 0.164333 * close_vol + 0.835667 * window_rs).apply(np.sqrt) * math.sqrt(252)
    
    result[:window-1] = np.nan

    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 15
0
def normalize_columns_separately(data, headers):
	temp_matrix = data.get_data(headers)
	rows = len(temp_matrix)
	homogenous_coordinates= np.ones(shape =(rows, 1))
	temp_matrix = np.hstack((temp_matrix, homogenous_coordinates))
	min_max = data_range(data, headers)
	Tx = np.eye(len(headers)+1)
	for i in range(len(headers)):
		Tx[i, len(headers)] = -min_max[i][1]
	#print 'Tx'
	#print Tx
	Ss = np.eye(len(headers)+1)
	for i in range(len(headers)):
		colrange = min_max[i][0] - min_max[i][1]
		Ss[i, i] = 1/colrange
	#print 'Ss'
	#print Ss
	result = None
	#for i in range(data.get_raw_num_rows()):
	for i in range(rows):
		temp_row = np.matrix(temp_matrix[i, :]).T
		#temp_row = temp_matrix[i,:]
		#print 'row as vector'
		#print temp_row
		row = Tx * temp_row
		row = Ss * row
		#print temp_matrix[i, :].T * TransformationMatrix
		if result is None:
			result = row.T
		else:
			result = np.vstack((result, row.T))
		#print 'row added'
	#print result[:,range(len(headers))]
	return result[:,range(len(headers))]
Exemplo n.º 16
0
def run():
    coordinator_master = start_coordinator()
    dht_node_list = add_dht_nodes()
    print '\nAbout to load data'
    data_to_load = data.get_data(dht_util.NUMBER_DATA_ITEMS)
    load_data(dht_node_list[0],data_to_load)
    print '\nAbout to query data'    
    query_loaded_data(dht_node_list,data_to_load)
Exemplo n.º 17
0
def data_range(data, header_names_list):
	target = data.get_data(header_names_list)
	#print target
	minlist = target.min(0)
	maxlist = target.max(0)
	#print "maslist and minlist are", maxlist, minlist
	result = np.concatenate((minlist.T, maxlist.T), axis=1)
	return result.tolist()
Exemplo n.º 18
0
def dendrodata():
    rows, cols, matrix = data.get_data('blogdata.txt')

    print "Calculating clusters...",
    tree = groups.cluster_dict(groups.cluster_hierarchy(matrix), rows)
    print " DONE."

    return {"tree": tree}
Exemplo n.º 19
0
def fuzzyCmeans(data, headers, C):
	A = data.get_data(headers)
	
	centroids,partitionMatrix = fuzzyCinit(A, C, headers)
	partitionMatrix,centroids = fuzzyC_algorithm(A,centroids,partitionMatrix)
	#print centroids
	#print partitionMatrix
	return partitionMatrix, centroids
Exemplo n.º 20
0
def kmeans(data, headers, K, whiten = True, categories = None):
	A = data.get_data(headers)
	if whiten:
		W = vq.whiten(A)
	else:
		W = A
	codebook = kmeans_init(W, K, categories)
	codebook,codes,errors = kmeans_algorithm(W, codebook)
	return codebook, codes, errors
Exemplo n.º 21
0
def get_estimator(symbol, start, end, window=30, clean=True):
    
    prices = data.get_data(symbol, start, end)
    
    log_return = (prices['Close'] / prices['Close'].shift(1)).apply(np.log)
    
    result = log_return.rolling(window=window,center=False).skew()
    
    result[:window-1] = np.nan
    
    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 22
0
def main():
    """
    """
    placeholders = ['input', 'label']
    train_ops = ['train']
    log_ops = ['accuracy']
    files = get_data(config.DATA_DIRECTORY)
    queue_graph = create_image_queue_graph(files, config.PIXEL_DEPTH,
                                           config.HEIGHT, config.WIDTH,
                                           config.CHANNELS,
                                           config.BATCH_SIZE, config.CAPACITY)
    model_graph = create_model_graph(config.HEIGHT, config.WIDTH,
                                     config.CHANNELS, config.NUM_LABELS)
    train_model(queue_graph, model_graph, placeholders, train_ops, log_ops)
Exemplo n.º 23
0
Arquivo: Skew.py Projeto: caitouwh/kod
def get_estimator(ticker, start, end, window=30, clean=True):
    
    prices = data.get_data(ticker, start, end)
    
    log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply(np.log)

    result = pandas.rolling_skew(log_return, window=window)
    
    result[:window-1] = np.nan
    
    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 24
0
Arquivo: Skew.py Projeto: verawatk/kod
def get_estimator(ticker, start, end, window=30, clean=True):

    prices = data.get_data(ticker, start, end)

    log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply(
        np.log)

    result = pandas.rolling_skew(log_return, window=window)

    result[:window - 1] = np.nan

    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 25
0
def main(args):
    x, fx = get_data(args)

    device = torch.device("cuda" if args.cuda else "cpu")
    train_data, val_data = split_data(args, x, fx)

    if args.save_splits:
        save_splits(train_data, val_data)

    train_loader, val_loader = get_loaders(train_data, val_data)

    model = get_model(args)

    trainer = get_trainer(model, train_loader, val_loader, device, args)
    trainer.train()
Exemplo n.º 26
0
def range_(headers, data):
    """ Takes in a list of column headers and the Data object and returns a list of 2-element 
		lists with the minimum and maximum values for each column. 
		The function is required to work only on numeric data types."""
    column_matrix = data.get_data(headers).getT(
    )  # get columns as rows, as this makes analysis much easier by just perfoming operations on column list directly
    if column_matrix == []:
        print "wrong headers, not present in data Object"
        return []
    column_max = column_matrix.max(1)
    column_min = column_matrix.min(1)
    final = np.concatenate((column_min, column_max), axis=1)

    rng = final.tolist()
    return rng
Exemplo n.º 27
0
def sort(headers, data):  # extension
    """ Return the numeric matrices with sorted columns	 """
    column_matrix = data.get_data(
        headers)  # get raw matrix data for numeric values
    print "\n before sorting \n "
    print column_matrix

    column_matrix = column_matrix.tolist()
    column_array = np.asarray(column_matrix)

    column_array.sort(axis=0)

    print "\n \n done sorting here is your matrix \n"

    return column_array
Exemplo n.º 28
0
def run_test_experiments(config):
    dir_path = '/path/to/working/dir'
    train_file = dir_path + '/data/ontonotes.development.ner'
    test_file = dir_path + '/data/ontonotes.test.ner'
    model_path = dir_path + '/models/MLPNet_' + config['para_option'] + '.pt'
    print('load data')
    train_data = get_data(train_file)
    test_data = get_data(test_file)
    print('get vocabulary and embeddings')
    word_to_ix, pos_to_ix, ner_to_ix = get_vocabulary(train_data, config)
    config['ner_to_ix'] = ner_to_ix
    config['pos_to_ix'] = pos_to_ix
    config['word_to_ix'] = word_to_ix
    config['output_size'] = len(ner_to_ix)
    print('ner_to_ix', ner_to_ix)
    vocab_embeddings = get_vocab_embeddings(word_to_ix)
    print('process data')
    test_input_ids, test_sent_ids, test_pos_ids, test_ner_ids = process_data(
        test_data, word_to_ix, pos_to_ix, ner_to_ix)
    print('get test input features')
    test_input_features = get_word_features(test_input_ids, test_sent_ids,
                                            vocab_embeddings)
    test_data = {
        'inputs': test_input_features,
        'sent_ids': test_sent_ids,
        'labels': test_ner_ids,
        'confidences': [1.0] * len(test_input_features)
    }
    print('test words', len(test_input_features))
    print('build model')
    model, loss_function, optimizer = build_model(config)
    print('load model')
    model.load_state_dict(torch.load(model_path))
    print('test model')
    test_accuracy = evaluate(test_data, model, ner_to_ix, config)
    print('test accuracy', test_accuracy)
Exemplo n.º 29
0
Arquivo: child.py Projeto: ND-SCL/NAQS
 def fit(self, validate=False, quantize=False, verbosity=0, epochs=40):
     train_data, val_data = data.get_data(self.dataset,
                                          self.device,
                                          shuffle=True,
                                          batch_size=128,
                                          augment=True)
     loss, acc = backend.fit(
         self.model,
         self.optimizer,
         train_data=train_data,
         val_data=None if validate is False else val_data,
         epochs=epochs,
         verbosity=verbosity,
         quan_paras=None if quantize is False else self.quan_paras)
     return loss, acc
 def train(self, batch_size=128, epochs=40,
           verbosity=True, validate=False):
     train_data, val_data = data.get_data(
         self.dataset, self.device,
         shuffle=True,
         batch_size=batch_size,
         augment=True)
     acc = backend.fit(
         self.model, self.optimizer,
         train_data=train_data,
         val_data=None if validate is False else val_data,
         epochs=epochs,
         verbosity=verbosity
         )
     return acc
Exemplo n.º 31
0
def main(args):
    X, y = get_data()
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=args.test_size, random_state=args.random_state)

    models = create_models(None)
    models = dict(models)

    for name, clf in models.items():
        clf.fit(X_train, y_train)
    scores = pd.Series(
        {name: clf.score(X_test, y_test)
         for name, clf in models.items()},
        name="Accuracy")
    print(scores)
Exemplo n.º 32
0
def char_enc(fn, cmap):
    data = get_data(fn)
    result = []
    out_fn = fn[:-4] + '_enc.txt'

    print('Starting.')
    for line in data:
        new_line = ''
        for char in line:
            new_line += cmap[char]
        result.append(new_line)

    with open(out_fn, 'w') as f:
        f.write('\n'.join(result))

    print('Done. Output file:', out_fn)
Exemplo n.º 33
0
def get_word_sentiment(word):
    """Return a number between -1 and +1 representing the degree of positive or
    negative feeling in the given word. 

    Return None if the word is not in the sentiment dictionary.
    (0 represents a neutral feeling, not an unknown feeling.)
    
    >>> get_word_sentiment('good')
    0.875
    >>> get_word_sentiment('bad')
    -0.625
    >>> get_word_sentiment('winning')
    0.5
    >>> get_word_sentiment('Berkeley')  # Returns None
    """
    return get_data(word)
Exemplo n.º 34
0
def get_batch(size,BATCH_SIZE=20,SHUFFLE_BUFFER_SIZE = 1000):


    train,val,test,labels=get_data(size,'rock_paper_scissors',3)


    train_batches=train.shuffle(SHUFFLE_BUFFER_SIZE).repeat().batch(BATCH_SIZE)
    val_batches=val.shuffle(SHUFFLE_BUFFER_SIZE).repeat().batch(BATCH_SIZE)
    test_batches=test.batch(BATCH_SIZE).repeat()

    if SHUFFLE_BUFFER_SIZE % BATCH_SIZE != 0:
        parallel_steps = SHUFFLE_BUFFER_SIZE // BATCH_SIZE + 1
    else:
        parallel_steps = SHUFFLE_BUFFER_SIZE // BATCH_SIZE

    return train_batches,val_batches,test_batches,parallel_steps
Exemplo n.º 35
0
def main(args):
    np.random.seed()

    x1s_trn, x2s_trn, ys_trn, x1s_vld, x2s_vld, ys_vld = get_data()
    model = Model(64, 64, 1, model_id=args.model_id)
    model.train(x1s=x1s_trn,
                x2s=x2s_trn,
                ys=ys_trn,
                validation_x1s=x1s_vld,
                validation_x2s=x2s_vld,
                validation_ys=ys_vld,
                num_epochs=2000,
                embedding_dimension=128,
                mini_batch_size=50,
                learning_rate=0.00035,
                margin=0.5)
Exemplo n.º 36
0
def hande_new_post():
    if request.method == "POST":
        # if is_authed(session):
        body = request.json
        post_id = hashlib.sha256(json.dumps(body)).hexdigest()[:11]
        data.submit_post(body['user_id'], post_id, body)
        return jsonify({"id": post_id, "success": True})
        # else:
        #     return 400

    if request.method == "GET":
        post_id = request.args.get('post_id')
        user_id = request.args.get('user_id')
        return jsonify(data.get_data(user_id, post_id))
    else:
        return 400
Exemplo n.º 37
0
def get_estimator(ticker, start, end, window=30, clean=True):
    
    prices = data.get_data(ticker, start, end)

    rs = (1 / (4 * math.log(2))) * ((prices['Adj High'] / prices['Adj Low']).apply(np.log))**2

    def f(v):
        return math.sqrt(252 * v.mean())
    
    result = pandas.rolling_apply(rs, window, f)
    result[:window-1] = np.nan
    
    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 38
0
def get_estimator(ticker, start, end, window=30, clean=True):
    
    prices = data.get_data(ticker, start, end)
    
    log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply(np.log)

    vol = pandas.rolling_std(log_return, window=window) * math.sqrt(252)
    adj_factor = math.sqrt((1.0 / (1.0 - (window / (log_return.count() - (window - 1.0)))+(window**2 - 1.0)/(3.0 * (log_return.count() - (window - 1.0))**2))))

    result = vol * adj_factor
    result[:window-1] = np.nan
    
    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 39
0
def execute_job(jid):
    job_dict = get_job_by_jid(jid)

    points = get_data()
    points = points.in_between(start=int(job_dict['start']),
                               end=int(job_dict['end'])).data
    years = [int(p['Year']) for p in points]
    rainfall = [p['Annual rainfall at fortaleza'] for p in points]
    plt.scatter(years, rainfall)
    plt.title(_create_job_key(jid))
    plt.xlabel('Year')
    plt.ylabel('Rainfall (mm)')

    tmp_file = '/tmp/{}.png'.format(jid)
    plt.savefig(tmp_file, dpi=150)
    finalize_job(jid, tmp_file)
Exemplo n.º 40
0
def update_film_table(index_name='films'):

    films, places = get_data('film')

    for k, f in films.iteritems():

        o = Film.get_by_id(f.get('id'))

        if not o:
            o = set_film_model(f)

            ModelSearch.add_document(
                ModelSearch.create_film_document(
                    doc_id=o.key.urlsafe(),
                    film=o
                ), index_name=index_name
            )
Exemplo n.º 41
0
def get_estimator(symbol, start, end, window=30, clean=True):

    prices = data.get_data(symbol, start, end)

    rs = (1 / (4 * math.log(2))) * (
        (prices['High'] / prices['Low']).apply(np.log))**2

    def f(v):
        return math.sqrt(252 * v.mean())

    result = rs.rolling(window=window, center=False).apply(func=f)
    result[:window - 1] = np.nan

    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 42
0
def input_gen():
    """
        Infinite sample generator
    """
    data_in = get_data()
    data = []
    for d in data_in:
        for i in range(3):
            d0 = np.zeros((STEPS, 12), np.float)
            length = min(STEPS, d[0].shape[0]-2)
            d0[:length,:] = d[0][i:length+i,:]
            d1 = d[1]
            data.append((d0, d1))
    index = 0
    while True:
        index = (index +1)%len(data)
        yield data[index]
def get_ruta_data(url, js_barrios, tresh):

    durations = list()
    js = data.get_data(url)
    calculo = dict()

    calculo = {'rutas': []}
    print(calculo)

    conteo = int()

    for route in js['routes']:
        print('\n' + route['summary'])
        for leg in route['legs']:
            durations.append(re.findall('\d+', leg['duration']['text'])[0])
            for step in leg['steps']:
                # print ('origin is' + json.dumps(step['start_location']) + 'end is= ' + json.dumps(step['end_location']))
                lat2 = step['end_location']['lat']
                lon2 = step['end_location']['lng']

                for barrio in js_barrios['barrios']:
                    lat1 = barrio['latitud']
                    lon1 = barrio['longitud']
                    print('=======' + barrio['nombre'])

                    dist = math.acos(
                        math.sin(lat1) * math.sin(lat2) + math.cos(lat1) *
                        math.cos(lat2) * math.cos(lon1 - lon2)) * 6371
                    print(dist)
                    if dist > tresh:
                        conteo = conteo + 1

        calculo['rutas'].append({'nombre': route['summary'], 'conteo': conteo})
        conteo = 0

    print(json.dumps(calculo, indent=4))
    # print(json.dumps(js_barrios, indent=4))
    cont_min = 9999
    for ruta in calculo['rutas']:
        if ruta['conteo'] < cont_min:
            cont_min = ruta['conteo']
            nombre = ruta['nombre']

    for route in js['routes']:
        if nombre in route['summary']:
            return route
Exemplo n.º 44
0
    async def lab(self, ctx):

        labs_subset = get_labs_subset(LABS_OPTIONS)

        output = "Select a lab by typing what is in `this text`:"
        options = {}
        for lab in labs_subset:
            output += "\n\n- `{}` **{}** \n\t\t*{}*".format(
                lab["option"], lab["name"], lab["description"])
            options[lab["option"]] = lab

        def check(m):
            return m.author == ctx.author and (m.content in options)

        await ctx.send('',
                       embed=discord.Embed(description=output,
                                           colour=discord.Color.greyple()))

        msg = await self.bot.wait_for('message', check=check, timeout=120)
        lab = options[msg.content]
        outcome = random.choice(lab["outcomes"])
        output = "*{}*".format(outcome["description"])

        base_sc = SC_LAB
        multiplier = random.uniform(outcome["min_sc"], outcome["max_sc"])
        item_boost = items.get_player_boost(ctx.author.id, "labs")
        sc_add = round(base_sc * multiplier * (1 + item_boost))
        player_sc = get_data(ctx.author.id, "sc", default_val=0)
        add_data(ctx.author.id, "sc", player_sc + sc_add)

        output += "\n\nYour demonstrator gave you {} **{}**.".format(
            SC_EMOJI, sc_add)
        if item_boost:
            output += "\n_**{:.1f}%** boost from_ **Labs** _items in your inventory._".format(
                item_boost * 100)
        output += "\n\nYou get **`{}`<:xp:699934983074349086>**.".format(
            XP_LAB)
        lab_disp = discord.Embed(description=output,
                                 colour=discord.Color.greyple())
        lab_disp.set_author(name=lab["name"],
                            url='',
                            icon_url=ctx.author.avatar_url)
        await ctx.send('', embed=lab_disp)
        await give_xp(ctx, ctx.author.id, XP_LAB)

        await ctx.send(tips.get_random_tip())
Exemplo n.º 45
0
def get_estimator(ticker, start, end, window=30, clean=True):

    prices = data.get_data(ticker, start, end)

    rs = (1 / (4 * math.log(2))) * (
        (prices['Adj High'] / prices['Adj Low']).apply(np.log))**2

    def f(v):
        return math.sqrt(252 * v.mean())

    result = pandas.rolling_apply(rs, window, f)
    result[:window - 1] = np.nan

    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 46
0
def predict1():
    config = img_config()
    config.seq_len = 2
    img = utils.load_image(
        "/home/tusimple/junechen/ml_data/data/train2014/COCO_train2014_000000318556.jpg"
    )
    #img = utils.load_image("/home/tusimple/junechen/ml_data/data/train2014/COCO_train2014_000000160629.jpg")
    #/home/tusimple/junechen/ml_data/data/train2014/COCO_train2014_000000318556.jpg
    #img = utils.load_image("./test_data/tiger.jpeg")
    img = img.reshape((1, 224, 224, 3))
    w2d, d2w = data.get_word_to_id()
    config.vob_size = len(w2d)
    print "read w2d size:", len(w2d)
    if len(w2d) == 0:
        f, image, label, word, target, w2d, d2w = data.get_data(
            FLAGS.caption_path,
            FLAGS.image_path,
            max_len=config.num_steps + 1,
            batch_size=config.batch_size)

    images = tf.placeholder("float", [None, 224, 224, 3], name="image")
    word = tf.placeholder(tf.int32, [None, None], name="word_seq")

    image_caption = IMAGE_ATT_CAP(images, word, None, config, is_training=True)
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config_proto) as sess:
        sv = load_session(sess, FLAGS.save_path)

        word = [3, 1]
        words = np.array(word).reshape(1, -1)
        output, c, h = sess.run(
            [image_caption.logits, image_caption.c, image_caption.h],
            feed_dict={
                image_caption.image: img,
                image_caption.word: words
            })

        print(output.shape)
        print "c:", c
        print "h:", h
        print "output:", output
        predict = output[0][-1]
        sort_idx = predict.argsort()[::-1]
        print sort_idx

        print[d2w[p] for p in word + [sort_idx[0]]]
Exemplo n.º 47
0
def get_rainfall():

	dict_class = get_data()

	# tests if both start/end and limit/offset used together
	if ('start' in request.args or 'end' in request.args) and ('limit' in request.args or 'offset' in request.args):
		return jsonify({'msg':'Please do not use start/end with limit/offset'}), 400

	# if start/end provided, returns appropriate data
	if 'start' in request.args or 'end' in request.args:
		start = None
		end = None
		if 'start' in request.args:
			try:
				start = int(request.args.get('start'))
			except:
				return jsonify({'msg':'Please Enter a Valid Start'}), 400
		
		if 'end' in request.args:
			try:
				end = int(request.args.get('end'))
			except:
				return jsonify({'msg':'Please Enter a Valid End'}), 400

		return jsonify(dict_class.in_between(start=start,end=end).data)

	# if limit/offset provided, returns appropriate data
	if 'limit' in request.args or 'offset' in request.args:
		limit = None
		offset = None

		if 'limit' in request.args:
			try:
				limit = int(request.args.get('limit'))
			except:
				return jsonify({'msg':'Please Enter a Valid Limit'}), 400

		if 'offset' in request.args:
			try:
				offset = int(request.args.get('offset'))
			except:
				return jsonify({'msg':'Please Enter a Valid Offset'}), 400

		return jsonify(dict_class.limset(limit=limit,offset=offset).data)

	return jsonify(dict_class.data)
Exemplo n.º 48
0
def main():
    # get data
    X_train, y_train, X_test = get_data()

    # hyper - parameters
    params = dict(n_estimators=400,
                  max_depth=4,
                  eta=0.09,
                  gamma=0,
                  min_child_weight=0,
                  subsample=0.8,
                  colsample_bytree=0.8,
                  colsample_bylevel=0.6,
                  colsample_bynode=0.2)

    # fit parameters
    #fit_params = {'early_stopping_rounds': 10, 'verbose': True}
    fit_params = {'verbose': True}

    # cross validation
    print('=============================================')

    cv(X=X_train,
       y=y_train,
       k=10,
       verbose=True,
       mode='r',
       model_params=params,
       fit_params=fit_params)

    # find result
    def res():
        df_test = pd.read_csv('data/test.csv',
                              skipinitialspace=True,
                              verbose=True)

        model = XGBRegressor(**params)
        model.fit(X_train, y_train, **fit_params)
        predictions = model.predict(X_test)
        print(predictions)
        df_test['SalePrice'] = np.exp(predictions)
        result = df_test[['Id', 'SalePrice']]
        result.to_csv('data/new/xgb.csv', index=False)
        print('Done!')

    res()
Exemplo n.º 49
0
def get_layout_for_app(customer_list, app_id):
    form_layout = QFormLayout()
    for cust_id in customer_list:
        status_button = QPushButton("...")
        check_box = QCheckBox()
        row = QHBoxLayout()
        # creates a mapping [cust_id,"app_id]: status_button
        mapping_key = cust_id + "," + app_id
        execute.status_button_list.setdefault(mapping_key, [status_button])
        execute_for_customer.setdefault(mapping_key, check_box)
        status_button.clicked.connect(partial(open_status_dialog, mapping_key))

        row.addWidget(check_box)
        row.addWidget(status_button)
        form_layout.addRow(QLabel(data.get_data("customers")[cust_id]["name"]),
                           row)
    return form_layout
Exemplo n.º 50
0
def random_forest(cfg):
    # Load data
    train_df, valid_df, test_df = get_data(cfg)
    df = pd.concat([train_df, valid_df])

    # Remove columns and split data into (X,y)
    df = df.drop([
        'State_AL', 'State_NC', 'isNaN_rep_income', 'State_FL', 'State_LA',
        'isNaN_uti_card_50plus_pct', 'State_SC', 'State_GA', 'State_MS',
        'auto_open_36_month_num', 'card_open_36_month_num', 'ind_acc_XYZ'
    ],
                 axis=1)
    X = df.drop("Default_ind", axis=1).values
    y = df["Default_ind"].values

    # Below 2 lines needed for cross-validation in RandomizedSearchCV
    split_index = [-1] * len(train_df) + [0] * len(valid_df)
    pds = PredefinedSplit(test_fold=split_index)

    # Create classifier and the hyperparameter search space
    classifier = RandomForestClassifier(n_jobs=-1, verbose=1)
    param_grid = {
        "n_estimators": np.arange(50, 1000, 100),
        "max_depth": np.arange(1, 20),
        "criterion": ["gini", "entropy"],
        "min_samples_split": np.arange(2, 10),
        "max_features": [0.8, "sqrt", "log2"],
        "min_samples_leaf": np.arange(1, 5),
        "bootstrap": [True, False],
    }

    model = RandomizedSearchCV(
        estimator=classifier,
        param_distributions=param_grid,
        scoring="f1",
        n_iter=700,
        verbose=1,
        n_jobs=1,
        cv=pds,
    )

    model.fit(X, y)
    print(model.best_score_)
    print(model.best_estimator_.get_params())
    with open("rf.pkl", "wb") as f:
        pickle.dump(model.best_estimator_, f)
Exemplo n.º 51
0
def gen(**kwargs):
    """
    提供命令行接口,用以生成相应的诗
    """

    for k, v in kwargs.items():
        print(k, v)
        setattr(opt, k, v)
    data, word2ix, ix2word = get_data(opt)
    model = PoetryModel(len(word2ix), 128, 256)
    map_location = lambda s, l: s
    state_dict = t.load(opt.model_path, map_location=map_location)
    model.load_state_dict(state_dict)

    if opt.use_gpu:
        model.cuda()

    # python2和python3 字符串兼容
    if sys.version_info.major == 3:
        if opt.start_words.isprintable():
            start_words = opt.start_words
            prefix_words = opt.prefix_words if opt.prefix_words else None
        else:
            start_words = opt.start_words.encode(
                'ascii', 'surrogateescape').decode('utf8')
            prefix_words = opt.prefix_words.encode(
                'ascii',
                'surrogateescape').decode('utf8') if opt.prefix_words else None
    else:
        start_words = opt.start_words.decode('utf8')
        prefix_words = opt.prefix_words.decode(
            'utf8') if opt.prefix_words else None

    prefix_words = prefix_words.replace(',', u',') \
        .replace('.', u'。') \
        .replace('?', u'?')
    start_words = start_words.replace(',', u',') \
        .replace('.', u'。') \
        .replace('?', u'?')

    gen_poetry = gen_acrostic if opt.acrostic else generate
    result = gen_poetry(model, start_words, ix2word, word2ix, prefix_words)
    with open('result.txt', 'w') as f:
        f.writelines(result)
    print(''.join(result))
Exemplo n.º 52
0
def train():
    config = img_config()
    config.batch_size = 128
    f, image, label, word, target, w2d, d2w = data.get_data(
        FLAGS.caption_path,
        FLAGS.image_path,
        max_len=config.seq_len + 1,
        batch_size=config.batch_size)
    epoch_size = 10000
    config.vob_size = len(w2d)
    print("vb size:", len(w2d))
    image_caption = IMAGE_ATT_CAP(image, word, target, config)

    #summary_op = tf.merge_all_summaries()

    #sv = tf.train.Supervisor(logdir=FLAGS.save_path)
    config_proto = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=True)
    #with sv.managed_session(config=config_proto) as sess:
    with tf.Session(config=config_proto) as sess:
        sv = load_session(sess, FLAGS.save_path)
        threads = tf.train.start_queue_runners(sess)

        summary_writer = tf.summary.FileWriter(FLAGS.log_path, sess.graph)

        for i in range(config.max_max_epoch):
            x_lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
            print("lr:", x_lr_decay)
            image_caption.assign_lr(sess, config.learning_rate * x_lr_decay)

            for j in range(10000):
                loss, lr, sum_str = image_caption.run_epoch(
                    sess, x_lr_decay, epoch_size, summary_writer, sv)
                if j % 10 == 0:
                    print("step %d per %f, lr %f" % (i, loss, lr))
                    summary = tf.Summary()
                    summary.value.add(tag='loss', simple_value=loss)
                    i_global = sess.run(tf.train.get_or_create_global_step())
                    print("cost %f global step %d" % (loss, i_global))
                    summary_writer.add_summary(
                        summary, i_global)  #write eval to tensorboard
                    summary_writer.add_summary(sum_str, i_global)

                    if j % 100 == 0:
                        save_model(sess, sv, FLAGS.save_path, i_global)
Exemplo n.º 53
0
def kmeans(d, headers, K, whiten=True, categories = ''):
	'''Takes in a Data object, a set of headers, and the number of clusters to create
	Computes and returns the codebook, codes and representation errors. 
	If given an Nx1 matrix of categories, it uses the category labels 
	to calculate the initial cluster means.
	'''
	
	A = d.get_data(headers)
	if whiten:
		W = vq.whiten(A)
	else:
		W = A
  
	codebook = kmeans_init(W,K,categories)

	codebook, codes, errors = kmeans_algorithm(W,codebook)
	
	return [codebook,codes, errors]			
Exemplo n.º 54
0
 def get_real_data(self):
     self.country_real = {}
     c_remove = []
     for country in self.countries:
         time, cases, deaths, recovered = data.get_data(
             country, self.all_data)
         if time == []:
             c_remove.append(country)
         else:
             self.country_real[country] = {
                 "time": time,
                 "cases": cases,
                 "deaths": deaths,
                 "recovered": recovered
             }
     # remove all the unwanted countries - less data
     for rem in c_remove:
         self.countries.remove(rem)
Exemplo n.º 55
0
def main():
    _, x_test = data.get_data()
    # x_test = x_test.reshape((len(x_test), 28,28,1))

    decoder = load_model('models/decoder.h5')

    encoded_imgs = np.load('data/encoded_imgs.npy')

    reconstructions = decoder.predict(encoded_imgs)
    reconstructions = reconstructions.reshape((len(x_test), 28, 28))
    reconstructions = 255 * reconstructions

    for i in range(len(reconstructions)):

        temp = np.expand_dims(reconstructions[i], axis=2)

        temp = np.repeat(temp.astype(np.uint8), 3, 2)
        save_img('data/' + str(i) + '.png', temp)
Exemplo n.º 56
0
def run():
    to_kill = []
    
    # start coordinator
    cmd = ['pypy', dht_util.SETUP_BIN,dht_util.CMD_START_COORDINATOR]
    print '\nStarting discovery coordinator'
    proc = subprocess.Popen(cmd,shell=False)
    to_kill.append(proc)
    time.sleep(2)
    
    # start nodes
    for i in range(0,len(conf.NODE_HOST_PORT_PAIRS) -1 ):
        print 'Starting node %s of %s ' % (str(i + 1), str(len(conf.NODE_HOST_PORT_PAIRS)))
        host_port_pair = conf.NODE_HOST_PORT_PAIRS[i]
        encoded_node_host_port_pair = dht_util.encode_node_start_args(
            host_port_pair)
        cmd = (
            ['pypy',dht_util.SETUP_BIN,dht_util.CMD_START_NODE] +
            encoded_node_host_port_pair)
        proc = subprocess.Popen(cmd,shell=False)
        to_kill.append(proc)        
        time.sleep(5)

    print (
        'Starting node %s of %s ' %
        (str(len(conf.NODE_HOST_PORT_PAIRS)),str(len(conf.NODE_HOST_PORT_PAIRS))))
             
    local_node_host_port_pair = conf.NODE_HOST_PORT_PAIRS[-1]
    local_dht_node = dht_lib.add_single_dht_node(local_node_host_port_pair)
    time.sleep(5)


    data_to_load = data.get_data(conf.NUMBER_DATA_ITEMS)
    print 'Starting loading %s data items' % str(len(data_to_load))
    dht_lib.load_data(local_dht_node,data_to_load)
    print 'Waiting period'
    time.sleep(10)
    print 'Querying data (once for each loaded item)'
    dht_lib.query_loaded_data([local_dht_node],data_to_load)

    print 'Shutting down'
    for proc_to_kill in to_kill:
        proc_to_kill.kill()
Exemplo n.º 57
0
def get_estimator(symbol, start, end, window=30, clean=True):
    
    prices = data.get_data(symbol, start, end)
    
    log_hl = (prices['High'] / prices['Low']).apply(np.log)
    log_co = (prices['Close'] / prices['Open']).apply(np.log)

    rs = 0.5 * log_hl**2 - (2*math.log(2)-1) * log_co**2
    
    def f(v):
        return math.sqrt(252 * v.mean())
    
    result = rs.rolling(window=window, center=False).apply(func=f)
    result[:window-1] = np.nan
    
    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 58
0
def get_estimator(ticker, start, end, window=30, clean=True):
    
    prices = data.get_data(ticker, start, end)
    
    log_ho = (prices['Adj High'] / prices['Adj Open']).apply(np.log)
    log_lo = (prices['Adj Low'] / prices['Adj Open']).apply(np.log)
    log_co = (prices['Adj Close'] / prices['Adj Open']).apply(np.log)
    
    rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co)

    def f(v):
        return math.sqrt(252 * v.mean())
    
    result = pandas.rolling_apply(rs, window, f)
    result[:window-1] = np.nan
    
    if clean:
        return result.dropna()
    else:
        return result
Exemplo n.º 59
0
def blogdata(clustered=False):
    def dict_clusters(clust):
        count = dict(zip(cols, matrix[clust.id]))
        count.update({'Blog': rows[clust.id]})
        return count

    rows, cols, matrix = data.get_data('blogdata.txt')

    if clustered:
        print "Calculating clusters...",
        clusters = groups.cluster_list(groups.cluster_hierarchy(matrix))
        print " DONE."
        counts = map(dict_clusters, clusters)
    else:
        counts = []
        for i, vector in enumerate(matrix):
            d = dict(zip(cols, vector))
            d.update({'Blog': rows[i]})
            counts.append(d)

    return {"cols": cols, "counts": counts}