Exemplo n.º 1
17
def main(name, ordering, csv=False):
    years = range(2010, 2015)
    out = open(name + '.' + ('csv' if csv else 'arff'), 'w')
    if not csv:
        out.write('@relation basketball\n')
    datas = [get_data(year) for year in years]
    attrs = get_attrs(datas[0])
    print 'Using', attrs
    if csv:
        csvHead(attrs, out)
    else:
        arffHead(attrs, out)

    i = 0
    Is = range(265)
    for year in years:
        data = get_data(year)
        results = get_results(year)
        for ((id1, name1), (id2, name2), sc1, sc2) in results:
            if id1 not in data:
                print 'Team not found', id1
                continue
            if id2 not in data:
                print 'Team not found', id2
                continue

            i = random.choice(Is)
            Is.remove(i)

            if ordering == False:
                line_norm = [str(i), 'NORM'] + oneline(data, attrs, id1, name1, sc1, id2, name2, sc2, noOrder)
                out.write(('\t' if csv else ',').join(line_norm) + '\n')
                line_rev = [str(i), 'REV'] + oneline(data, attrs, id2, name2, sc2, id1, name1, sc1, noOrder)
                out.write(('\t' if csv else ',').join(line_rev) + '\n')
            else:
                line_norm = [str(i), 'NORM'] + oneline(data, attrs, id1, name1, sc1, id2, name2, sc2, ordering)
                out.write(('\t' if csv else ',').join(line_norm) + '\n')

    out.close()
Exemplo n.º 2
0
def sidebyside():
    """
    """
    hdat, hobs, hobs_nr, hobs_r = get_data.get_data('H')
    pdat, pobs, pobs_nr, pobs_r = get_data.get_data('P')

#    plt.figure(figsize=(18,7))
    plt.figure(figsize=(13,5.5))
    axh = plt.subplot(122)
    axp = plt.subplot(121)

    plot_stars(hdat,axh)
    axh.set_ylim(18,3)
    texty = 2.75
    for i in range(klen):
        axh.text(kh_rpmK[i],texty,kh_spt[i],fontsize='large')
    axh.tick_params(which='both',top=False)
    axh.text(5.75,3.75,'Hyades',fontsize='large')

    plot_stars(pdat,axp)
    axp.set_ylim(21,6)
    axp.legend(numpoints=1,prop={'size':12},markerscale=1.5,
         handletextpad=0.3,handlelength=1,borderaxespad=0.2)
    texty = 5.75
    for i in range(klen):
       axp.text(kh_rpmK[i],texty,kh_spt[i],fontsize='large')
    axp.tick_params(which='both',top=False)
    axp.text(5.4,10.4,'Praesepe',fontsize='large')

    plt.savefig('paper_rsource.eps',orientation='landscape',bbox_inches='tight')
    plt.savefig('paper_rsource.png')
Exemplo n.º 3
0
 def get(self, wikipage_address):
     cookie_p = self.request.cookies.get('user_id')
     if cookie_p:
         user_id_check = (security.check_secure_val
                          (cookie_p))
         if user_id_check:
             params["username"] = user_id_check
             get_data(wikipage_address)
             self.render("editpage.html", **params)
     else:
         self.redirect("/signup")
Exemplo n.º 4
0
def main():
    data = get_data()
    data = data[::-1]
    data_len = len(data)
    weekDayName = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
    
    closeVals = [float(d['Close']) for d in data]
    
    changes = [(closeVals[i+1]/closeVals[i]-1) * 100
               for i in range(data_len-1)]
    
    dates = [datetime.strptime(data[i]['Date'], '%Y-%m-%d').date()
               for i in range(data_len-1)]
    
    for i in range(data_len-10, data_len-1):
        print('%s (%s): %s==>%s, %.2f' % (dates[i], weekDayName[dates[i].weekday()], 
                                     closeVals[i], closeVals[i+1], changes[i]))

    nTotal = [0] * 5
    nPos = [0] * 5
    for i in range(data_len-1):
        k = dates[i].weekday()
        nTotal[k] = nTotal[k] + 1
        if changes[i] > 0:
            nPos[k] = nPos[k] + 1
            
    for k in range(5):
        print('%s, nTotal=%d, nPos=%d (%.2f)' % 
              (weekDayName[k], nTotal[k], nPos[k], nPos[k] / nTotal[k] * 100))
Exemplo n.º 5
0
    def post(self, wikipage_address):
        logging.error(wikipage_address)
        user_id_check = (security.check_secure_val
                         (self.request.cookies.get('user_id')))
        if user_id_check:
            params["username"] = user_id_check
        else:
            self.redirect("/login")
        page_content = self.request.get("content")
        if get_data(wikipage_address):
            parent_p = params["wikipage_query"].key
        else:
            parent_p = None
        page_data = WikiData(
            id=wikipage_address,
            parent=parent_p,
            content=page_content,
            author=user_id_check)
        page_data.put()
        memcache.set(page_data.key.id(), page_data)

        # next few lines reset params
        params["content_err"] = ""
        params["content"] = ""

        self.redirect(wikipage_address)
        return
Exemplo n.º 6
0
def main():
    years = range(1976, 2016)
    data = get_data()[::-1]  # reverse so the latest data is last
    dates = [datetime.datetime.strptime(a['Date'], '%Y-%m-%d') 
             for a in data]
    annDates = [datetime.datetime(y, 2, 1) for y in years]
    indexes = []
    k = 0
    for d in annDates:
        for k1, d1 in enumerate(dates[k:]):
            if d1 >= d:
                k += k1
                indexes.append(k)
                break
    
    n = len(indexes) - 1
    rates = []
    for i in range(n):
        k0 = indexes[i]
        k1 = indexes[i+1]
        bgnVal = float(data[k0]['Close'])
        bgnDate = data[k0]['Date']
        endVal = float(data[k1]['Close'])
        endDate = data[k1]['Date']
        ret = (endVal - bgnVal) / bgnVal * 100
        ret1 = min(4.75, max(0, ret))
        rates.append((ret, ret1))
        print('From %s to %s [%d to %d], %.2f to %.2f: %.2f' % 
              (bgnDate, endDate, k0, k1, bgnVal, endVal, ret1))

    print('Full returns: %.2f, %.2f' % accRates(rates, 0, n-1))
    
    x = []
    y1 = []
    y2 = []
    for i in range(n-10):
        acc = accRates(rates, i, i+10)
        print('Between %r and %r: (%.2f, %.2f)' % 
              (annDates[i].strftime('%Y-%m-%d'), 
               annDates[i+10].strftime('%Y-%m-%d'), acc[0], acc[1]))
        year = annDates[i].year
        x.append(year)
        y1.append(acc[0])
        y2.append(acc[1])

    x = np.array(x)
    y1 = np.array(y1)
    y2 = np.array(y2)
               
    fig = plt.figure()
    axes = fig.add_subplot(111)
    axes.plot(x, y1, '*-')
    axes.hold('on')
    axes.plot(x, y2, '*-')
    plt.title('%.2f vs %.2f' % (y1.mean(), y2.mean()))
    plt.grid('on')
    
    plt.show()
Exemplo n.º 7
0
    def test_bucketing(self):
        loader = ["get_splitted_data",
                  {"n_folds": 3,
                   "seed":777,
                   "valid_size": self.valid_size,
                   "test_size":0.0}]

        preprocess_fncs = [["to_binary", {"all_below": True}]]
        data = get_data(self.comps, loader, preprocess_fncs)
        folds = data.values()[0][0]
Exemplo n.º 8
0
    def test_bucketing_with_test_data(self):
        loader = ["get_splitted_data",
                  {"n_folds": 2,
                   "seed":777,
                   "valid_size": self.valid_size,
                   "test_size":0.2}]

        preprocess_fncs = [["to_binary", {"all_below": True}]]
        data = get_data(self.comps, loader, preprocess_fncs)
        folds = data.values()[0][0]
        X_test, y_test = data.values()[0][1][0]["X"]["data"], data.values()[0][1][0]["Y"]["data"]

        self.assertEqual(folds[0]['X_train']["data"].shape[1], X_test.shape[1])
Exemplo n.º 9
0
    def test_spliting_with_test_data(self):
        loader = ["get_splitted_data",
                  {"n_folds": self.n_folds,
                   "seed":777,
                   "valid_size": self.valid_size,
                   "test_size":0.2}]
        preprocess_fncs = []

        data = get_data(self.comps, loader, preprocess_fncs)
        # Great test..
        X_test, y_test = data.values()[0][1][0]["X"]["data"], data.values()[0][1][0]["Y"]["data"]
        folds = data.values()[0][0]

        self.assertTrue(X_test.shape[0] > 0)
        self.assertTrue(X_test.shape[0] == y_test.shape[0])
        self.assertTrue(4 * X_test.shape[0] -  (folds[0]['X_train']["data"].shape[0] +folds[0]['X_valid']["data"].shape[0] ) < 10)
Exemplo n.º 10
0
def run(experiment_sub_name, batch_size, fingerprint, protein, preprocess_fncs, loader_function, loader_args, seed, _log, _config):

    time.sleep(2) # Please don't remove, important for tests ..
    loader = [loader_function, loader_args]
    comp = [[protein, fingerprint]]
    loader[1]['seed'] = seed

    sgd = partial(SGDClassifier, random_state=seed)
    strat = random_query
    model = partial(ActiveLearningExperiment, param_grid={"alpha": [1]}, strategy=strat, base_model_cls=sgd, batch_size=batch_size)

    folds, _, _ = get_data(comp, loader, preprocess_fncs).values()[0]

    metrics, _ = fit_AL_on_folds(model, folds)


    return ExperimentResults(results=metrics, monitors={}, misc={}, dumps={}, name=ex.name, config=_config)
Exemplo n.º 11
0
def main():
    HOST, PORT = '', 9898
    
    listen_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    listen_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    listen_socket.bind((HOST, PORT))
    listen_socket.listen(1)
    print('Serving HTTP on port %s ...' % PORT)
    reqNum = 0
    data = get_data()
    while True:
        client_connection, client_address = listen_socket.accept()
        request = client_connection.recv(1024)
        print(request.decode())
        http_response = genHttp(data, reqNum)
        reqNum += 1
        client_connection.sendall(http_response.encode())
        client_connection.close()
Exemplo n.º 12
0
    def test_splitted_data(self):
        loader = ["get_splitted_data",
                  {"n_folds": self.n_folds,
                   "seed":777,
                   "valid_size": self.valid_size,
                   "test_size":0.0}]
        preprocess_fncs = []

        data = get_data(self.comps, loader, preprocess_fncs)
        print data.values()[0][1]
        folds = data.values()[0][0]
        test_data = data.values()[0][1]
        data_desc = data.values()[0][2]

        self.assertEqual(len(data), 1)
        self.assertEqual(len(test_data), 0)
        self.assertEqual(len(folds), self.n_folds)
        self.assertEqual(len(data_desc.values()), 2)
Exemplo n.º 13
0
    def load_data(self):
        pair_fname  = '../lastfm_train_mappings.txt'
        lyrics_path = '../lyrics/data/lyrics/train/'
    
        # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics.
        # word_counts is a dictionary that maps 
        X_train, l_train, self.word_counts, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold=100, n_class=self.config.n_class)
        self.labels_train = np.zeros((len(X_train),self.config.n_class))
        self.labels_train[range(len(X_train)),l_train] = 1
    
        self.vocab = Vocab()
        self.vocab.construct(self.word_counts.keys())

        self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. 
        self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps))
        for i in range(len(X_train)):
            self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]]       
            self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]       
Exemplo n.º 14
0
def main():
    from get_data import get_data

    words = get_data()
    game = Game(words, max_rank=None)

    print('しりとり')
    print('1)ひらがなやカタカナで書いてください')
    print('2)quitを入れたら止めます')
    print('-' * 10)

    # First draw
    word = game.next_word()
    print('最初は。。 {} ({})'.format(word.kana, word.kanji))
    while True:
        try:
            # Get answer/command from player
            answer = input('答えは? ')
            answer = answer.strip().replace(' ', '')
            if not answer:
                print('何かを入力をしてください')
                continue
            if answer == 'quit':
                break
            # Check word
            old_word = game.send_word(answer)
            # Get new word from game
            word = game.next_word()
            print('{} ({})ですか?じゃ。。 {} ({})'.format(old_word.kana,
                  old_word.kanji, word.kana, word.kanji))

        except UnknownWordException as e:
            print('{} ({})'.format(e, word.kana))
            continue

        except InvalidWordException as e:
            print('{}'.format(e))
            break

    # Show score and the word chain
    print('-' * 10)
    print('遊んでくれてありがとうございました')
    print('1)スコアは {} になります'.format(game.player_score))
    print('2) 言葉: {}'.format(' -> '.join(w.kana for w in game.seen_words)))
Exemplo n.º 15
0
def create_picture(symbol):
    daysFmt = matplotlib.dates.DateFormatter('%H:%M')
    fig, ax1 = matplotlib.pyplot.subplots()
    fig.set_size_inches(10, 5)
    date_list, close_list = get_data.get_data(symbol)
    if not close_list:
        return
    high, low = close_list[0], close_list[0]
    for close in close_list:
        if close > high:
            high = close
        elif close < low:
            low = close
    ax1.plot(date_list, close_list, color="gray")
    ax1.xaxis.set_major_formatter(daysFmt)
    # ax1.set_ylim(low - 0.01, high + 0.01)
    ax1.autoscale_view()
    matplotlib.pyplot.setp(matplotlib.pyplot.gca().get_xticklabels(), rotation=45, horizontalalignment='right')
    matplotlib.pyplot.show()
Exemplo n.º 16
0
def gpc(fut_code = "TY"):
    import get_data as gd
    from matplotlib.finance import quotes_historical_yahoo
    """Function to plot a candle graph"""
    
    """takes as input the security code, default TY"""
   


    #quotes contains a list of tuples:
    #(date, open, close, high, low, volume)
    quotes = gd.get_data(fut_code, "candle")
    if len(quotes) == 0:
        today = datetime.datetime.now()
        
        date2 = (today.year, today.month, today.day)
        date1 = ( today.year -1, today.month, 1)
        quotes = quotes_historical_yahoo('fut_code', date1, date2)
    if len(quotes) == 0:
        raise SystemExit

    gpcs(quotes)
Exemplo n.º 17
0
    def setUp(self):

        comps = [['5ht7', 'ExtFP']]

        loader = ["get_splitted_data", {
                "seed": 666,
                "valid_size": 0.25,
                "n_folds": 1,
                "percent": 0.5}]

        preprocess_fncs = []

        data = get_data(comps, loader, preprocess_fncs).values()[0][0][0]
        self.X = data['X_train']['data']
        self.y = data['Y_train']['data']

        self.X_test = data['X_valid']['data']
        self.y_test = data['Y_valid']['data']

        self.elm_param_grid = {'C': list(np.logspace(0, 5, 6)),
                               'h': [100, 200, 500, 1000]}

        self.svm_param_grid = {'C': list(np.logspace(-3, 4, 8))}
        self.nb_param_grid = {'h': [100, 200, 500, 1000]}
Exemplo n.º 18
0
from strategies.GoldenCross import GoldenCross
from strategies.BuyHold import BuyHold
#from strategies.MacdStratTalib import MacdStrat
from strategies.MacdStrat import MacdStrat

from strategies.MovingAverage import MovingAverage
from strategies.Sar import Sar
from strategies.AdxMacd import AdxMacd

from get_data import get_data

cerebro = bt.Cerebro()

symbol = 'TSLA'  #CLDR'

prices = get_data(symbol, "01-01-2020", "09-15-2020")
print(prices.head())

prices = pd.read_csv(f'../datasets/data_1yr_sep/{symbol}.csv',
                     index_col='Date',
                     parse_dates=True)

print(prices.head())

# initialize the Cerebro engine
cerebro = Cerebro()
cerebro.broker.setcash(100000)

# add OHLC data feed
feed = bt.feeds.PandasData(dataname=prices)
cerebro.adddata(feed)
Exemplo n.º 19
0
def train():
    # get data
    tr_d, tr_l, ts_d, ts_l = get_data()
    tr_d = np.concatenate([tr_d, tr_d, tr_d], 3)
    ts_d = np.concatenate([ts_d, ts_d, ts_d], 3)

    # input data
    img = tf.placeholder(tf.float32, [None, height, width, num_channel],
                         name="img")
    label = tf.placeholder(tf.float32, [None, num_label], name="label")

    # classifier
    c_params, c_net, c_prediction = classifier(img)

    # loss function and accuracy
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=c_net, labels=label))
    acc = tf.reduce_mean(
        tf.cast(tf.equal(tf.argmax(c_prediction, 1), tf.argmax(label, 1)),
                tf.float32))

    # optimizer
    optimizer = tf.train.AdamOptimizer(0.001)
    trainer = optimizer.minimize(loss)

    # shuffle the training set
    tr_idx = range(len(tr_d))
    np.random.shuffle(tr_idx)
    tr_d = tr_d[tr_idx]
    tr_l = tr_l[tr_idx]

    # saver
    saver = tf.train.Saver()

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        # tf board
        tf.summary.scalar('loss', loss)
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(FLAGS.tb_folder, sess.graph)
        for i in range(FLAGS.max_epoch):
            tr_acc = 0
            for j in range(len(tr_d) / FLAGS.batch_num):
                batch_img = tr_d[j * FLAGS.batch_num:(j + 1) * FLAGS.batch_num]
                batch_label = tr_l[j * FLAGS.batch_num:(j + 1) *
                                   FLAGS.batch_num]
                summary, tr_acc_b, _ = sess.run([merged, acc, trainer],
                                                feed_dict={
                                                    img: batch_img,
                                                    label: batch_label
                                                })
                tr_acc += tr_acc_b * FLAGS.batch_num
                #train_writer.add_summary(summary,i*len(tr_d)+j*FLAGS.batch_num);
            ts_acc = sess.run([acc], feed_dict={
                img: ts_d,
                label: ts_l
            })[0]
            print(
                str(i + 1) + " Epoch Training Acc: " +
                str(tr_acc / len(tr_d)) + ", Test Acc: " + str(ts_acc))
            # shuffle
            tr_idx = range(len(tr_d))
            np.random.shuffle(tr_idx)
            tr_d = tr_d[tr_idx]
            tr_l = tr_l[tr_idx]
        saver.save(sess, os.path.join(FLAGS.save_folder, FLAGS.save_file_name))
def data():
	get_data.get_data()
Exemplo n.º 21
0
        ax1.errorbar(rmK[good_stars],avg_ll[good_stars],
#            xerr=rmKerr[good_stars],
            yerr=unc_ll[good_stars]*0.0,
            mfc=plot_color,
            marker=plot_marker,mec='None',label=plot_label,lw=0,elinewidth=1,
            ms=msize,capsize=0,ecolor=plot_color)

# r-K color
color_f = plt.figure(figsize=(9,8))
ax = plt.subplot(111)
ax.set_xlabel('(r\'-K)',fontsize='x-large')
ax.set_xlim(1.5,6.1)
ax.tick_params(which='both',width=2,labelsize='x-large',top=False)
ax.set_ylabel(r'H$\alpha$ EqW ($\AA$)',fontsize='x-large')

pdat,pobs,pobs_nr,pobs_r = get_data.get_data('P')
plotit(pdat,pobs,ax,
    'DarkBlue',#'#0099CC',
    'o','Praesepe','ADAMSPT','NUM_SPECTRA',0,'color')
hdat,hobs,hobs_nr,hobs_r = get_data.get_data('H')
plotit(hdat,hobs,ax,
    'OrangeRed',#'#FFAA00',
    'D','Hyades','MDM_SPEC_ADAMSPT','MDM_SPECMATCH',0,'color')
ax.legend(numpoints=1,handletextpad=0.2,
    handlelength=1,borderaxespad=0.2,loc=2,frameon=False)
ax.set_ylim(1.5,-15)
texty = -15.25
for i in range(klen):
    ax.text(kh_rpmK[i],texty,kh_spt[i],fontsize='large')
color_f.savefig('papereqws.png')
color_f.savefig('papereqws.eps',bbox_inches='tight')
Exemplo n.º 22
0
from sklearn.model_selection import cross_validate, train_test_split, GroupKFold
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler, FunctionTransformer, PolynomialFeatures
# local files
import get_data

plt.style.use('seaborn')

# x_data, y_data = get_data.get_data_as726x_serial("mango", "as7262", int_times=[150],
#                                                  positions=[1, 2, 3],
#                                                  led_currents=["25 mA"])
x_data, y_data = get_data.get_data("mango",
                                   "as7262",
                                   int_time=[100, 200],
                                   position=[2, 3],
                                   led_current=["25 mA"])
x_data, y_data = get_data.get_data("mango",
                                   "as7265x",
                                   int_time=[100, 200],
                                   led="White",
                                   position=[2, 3],
                                   led_current=["25 mA"])

print(x_data)
print('==')
print(y_data.to_string())
print('======')

y_data = y_data['Avg Total Chlorophyll (µg/cm2)']
Exemplo n.º 23
0
        # compare reachability vector this point..   
        rd_temp = update(cd[seed_trial], distance_mat, seed_trial, seeds)
        # compare the current reachability matrix with an updated rd
        # if the updata rd is less then the rd
        rd_index = np.where(rd[seeds] > rd_temp)[0]
        # pdb.set_trace()
        rd[seeds[rd_index]] = rd_temp[rd_index]
        index = np.argmin(rd[seeds])
    processed.append(seeds[0])
    rd[0] = 0
    return rd, cd, processed

if __name__ == '__main__':
        #X = np.load("zhang2.dat.npy")
    min_cluster_size = 100 
    X = np.array(get_data.get_data()[14000:21000])
    rd, cd, processed = optics(np.array(X), min_cluster_size, "haversine")
   
    ## create 2 plots 
    RPlot = []
    RPoints = []
    for item in processed:
        RPlot.append(rd[item])  # Reachability Plot
        RPoints.append([X[item][0], X[item][1]])
    
    ## create some plot
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.plot(X[:, 0], X[:, 1], 'b.', ms=2)
    ax.set_title('Crime in SF (March 2012)')
    plt.savefig('Graph.png', dpi=None, facecolor='w', edgecolor='w',
from graph import save
from get_data import get_data
from indicators import CalculateIndicators
import datetime
import time

# Disable UserWarnings on linux promt: export TF_CPP_MIN_LOG_LEVEL=2

## *********************************************************************************
## 1) *** Download data ***

ticker = 'TSLA'
start_date = '20000101'
end_date = str(datetime.datetime.fromtimestamp(time.time()).strftime('%Y%m%d'))

df = get_data(ticker=ticker, start_date=start_date, end_date=end_date)

## *********************************************************************************
## 2) *** Calculat indicators ***

# The first part of the dataset will be cut depending on the indicators parameter to prevent empty data.
ci = CalculateIndicators(df)

# Parameters
ci.set_RSI_parameter(n=14)
ci.set_MACD_parameter(fast=12, slow=26, signal=9)
ci.set_SO_parameter(period=14)
ci.set_moving_average_1(window=12)
ci.set_moving_average_2(window=26)

data = ci.calculate_indicators()
Exemplo n.º 25
0
def main():
    with open('config.json') as json_data_file:
        data = json.load(json_data_file)
    langs = data["languages"]

    args = _init_parser()

    work_dir = data["work_dir"]
    translation_files = data['translation_files']

    # sentence max length, Tx for first language, Ty for the other
    seq_lens = [data["hyper_parameters"]["Tx"], data["hyper_parameters"]["Ty"]]
    batch_size = data["hyper_parameters"]["batch_size"]
    num_layers = data["hyper_parameters"]["num_layers"]
    lstm_units = data["hyper_parameters"]["lstm_units"]
    beam_width = data["hyper_parameters"].get("beam_width", 1)
    test_set_pct = data["hyper_parameters"]["test_set_pct"] # % of all lines
    skip_words_treshold = data["skip_words_treshold"]

    action = args.cmd
    if args.reverse:
        langs.reverse()
        seq_lens.reverse()
    languages = tuple(langs)
    Tx = seq_lens[0]
    Ty = seq_lens[1]

    print("Configured for {} --> {}".format(languages[0], languages[1]))

    if not os.path.exists(work_dir):
        os.makedirs(work_dir)
    if action == 'dictionary':
        file_pairs = get_data(translation_files)
        create_dictionary(languages, file_pairs, skip_words_treshold, work_dir)
    elif action == 'glove':
        dictionaries, _ = read_dictionary(languages, work_dir)
        fetch_glove_vectors(languages, dictionaries, work_dir, data)
    elif action == 'prepare':
        file_pairs = get_data(translation_files)
        convert_words_to_indexes(languages, file_pairs, work_dir, Tx, Ty, test_set_pct)
    elif action == 'build_all':
        # note: tensorflow keras utils do not extract all data types so this may fail, at least so in Windows. 
        # Recommend build in pieces and check every step.
        file_pairs = get_data(translation_files)
        create_dictionary(languages, file_pairs, skip_words_treshold, work_dir)
        dictionaries, _ = read_dictionary(languages, work_dir)
        fetch_glove_vectors(languages, dictionaries, work_dir, data)
        convert_words_to_indexes(languages, file_pairs, work_dir, Tx, Ty, test_set_pct)
    elif action == 'train':
        kwargs={}
        if args.learning_rate:
            kwargs["lr"] = args.learning_rate
        if args.batch_size is not None:
            batch_size = args.batch_size
        instance = RunNMT(langs, Tx, Ty, num_layers,
            lstm_units, batch_size, work_dir, **kwargs)
        kwargs={}
        if args.load:
            kwargs["load_checkpoint"] = True
        instance.train(2, **kwargs)
    elif action == 'save_weights':
        instance = RunNMT(langs, Tx, Ty, num_layers,
            lstm_units, 1, work_dir)
        instance.save_weights()
    elif action == 'translate':
        if args.beam_width is not None:
            beam_width = args.beam_width
        instance = RunNMT(langs, Tx, Ty, num_layers,
            lstm_units, 1, work_dir, beam_width=beam_width)
        instance.translate_interactive()
    elif action == 'validate':
        batch_size = VALIDATION_BATCH_SIZE
        instance = RunNMT(langs, Tx, Ty, num_layers,
            lstm_units, batch_size, work_dir, beam_width=1) # tf beam search is buggy
        instance.validate()
    else:
        print("not implemented yet.")
Exemplo n.º 26
0
def test_get_data_is_inst():
    value = get_data()
    assert isinstance(value, type(pd.DataFrame()))
Exemplo n.º 27
0
def test_get_data_dim():
    value = get_data().shape
    assert value[0] > 0
    assert value[1] > 0
Exemplo n.º 28
0
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
from get_data import get_data
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

if __name__ == '__main__':

    X_train_lst, X_test_lst, y_train_lst, y_test_lst = get_data()

    scores = []

    # For each subject
    for i in range(len(X_test_lst)):

        # Train model
        clf = KNeighborsClassifier(n_neighbors=5)
        clf.fit(X_train_lst[i], y_train_lst[i])

        # Test model and append evaluation
        scores.append(round(clf.score(X_test_lst[i], y_test_lst[i]), 3))

    # Print each subject score and mean score
    print('Subjects scores: {}'.format(scores))
    print('Mean score: {}'.format(np.mean(scores)))


Exemplo n.º 29
0
def update():
    # get the new data
    get_data()
    # generate the charts and other output
    create_charts()
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import mplfinance as mpf
import matplotlib.dates as mdates
import pandas as pd
import pandas_datareader.data as web
from get_data import get_data

data = get_data()
data.index = pd.to_datetime(data.index)

mpf.plot(data[-50:], type='candlestick', show_nontrading=True)
Exemplo n.º 31
0
import get_data
import bs4
import csv
import config

sure = input("Are you sure you want to overwrite data.csv?(y/n?)\n")
if (sure == 'y'):
    data = get_data.get_data()
    names = []
    apartments = []
    for li in data:
        names.append(li.contents[0])
        apartments += li.contents[1]
    with open(config.path + 'Domo_analytics/data.csv', 'w') as csvfile:
        filewriter = csv.writer(csvfile,
                                delimiter=',',
                                quotechar='|',
                                quoting=csv.QUOTE_MINIMAL)
        filewriter.writerow(['names'] + names)
        filewriter.writerow(['apartments'] + apartments)
else:
    print("Canceled")
Exemplo n.º 32
0
from tkinter import *
from tkinter import ttk
from get_data import get_data
from treeview import to_view

if __name__ == "__main__":
    app = Tk()
    app.title("Parser")
    app.geometry('550x550')
    # app.resizable(width=1000, height=1000)
    ttk.Label(app, text="Treeview").pack()
    treeview = ttk.Treeview(app, selectmode='browse')
    treeview.column('#0', width=500, stretch='no')
    treeview.pack()
    verscrlbar = ttk.Scrollbar(app, orient="vertical", command=treeview.yview)
    verscrlbar.pack(side='right', fill='x')

    # Configuring treeview
    treeview.configure(xscrollcommand=verscrlbar.set)
    data = get_data('./testfiles/sweets.yaml', 'yaml')
    to_view(data, treeview, 'json', '')

    app.mainloop()
Exemplo n.º 33
0
import pickle
import sys, os, string
from scipy.optimize import leastsq
import sigfig
from astropy.io.ascii import read
from A_lamb import A_lamb
import STANstats

plt.style.use('serif')
rcParams['font.size'] = 14

cfgfile = sys.argv[1]
base = os.path.dirname(cfgfile)

cf = config.config(cfgfile)
data = get_data.get_data(cf)
pfile = cf.Sampler.outfile

ch = STANstats.STANchains(pfile)

names = data.names
Np = len(names)
d0 = 1.0

a = ch.get_trace('a', merge=True) - 19
b = ch.get_trace('b', merge=True)
c = ch.get_trace('c', merge=True)

evar = atleast_1d(ch.get_trace('evar', merge=True))
Rl = atleast_1d(ch.get_trace('Rl', merge=True))
dz = ch.median('vpec') * 100 / 3e5
Exemplo n.º 34
0
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR, LinearSVR
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler, FunctionTransformer, PolynomialFeatures
# local files
import get_data

plt.style.use('seaborn')

# x_data, y_data = get_data.get_data_as726x_serial("mango", "as7262", int_times=[150],
#                                                  positions=[1, 2, 3],
#                                                  led_currents=["25 mA"])
x_data, y_data = get_data.get_data("mango",
                                   "as7262",
                                   int_time=[150],
                                   position=[2],
                                   led_current=["25 mA"])

print(x_data)
print('==')
print(y_data.to_string())
print('======')

y_data = y_data['Avg Total Chlorophyll (µg/cm2)']
print(x_data.shape, y_data.shape)
x_data = StandardScaler().fit_transform(x_data)
x_scaled_np = PolynomialFeatures(degree=2).fit_transform(x_data)

x_data = pd.DataFrame(x_scaled_np)
Exemplo n.º 35
0
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.feature_extraction.text import CountVectorizer

from tqdm import trange

from get_data import get_data, get_val_data
from evaluation import score

# Get data
print 'Reading and preprocessing data'
users, items, interactions, target_users, active_items = get_data()

users.career_level = users.career_level.fillna(3)
users.career_level = users.career_level.replace(0, 3)

# Get stuff for validation
interactions_train, interactions_val, target_val, users_val = (get_val_data(
    users, items, interactions))

print 'Count Vectorizer'
count_vec = CountVectorizer(binary=True, max_features=5000)
count_vec.fit(users_val.jobroles)
active_items_title_t = count_vec.transform(active_items.title)
active_items_tags_t = count_vec.transform(active_items.tags)
users_jobroles_t = count_vec.transform(users_val.jobroles)

print 'Getting sparse intersection matrices'
jobroles_title = users_jobroles_t.dot(active_items_title_t.T)
jobroles_tags = users_jobroles_t.dot(active_items_tags_t.T)
Exemplo n.º 36
0
import datetime
import csv
from xgboost import XGBRegressor as XGBR
import requests
import demjson
import os
import xgboost as xgb
from sklearn.model_selection import KFold, cross_val_score as CVS, train_test_split as TTS
from time import time
from sklearn.metrics import mean_squared_error as MSE, r2_score
import pickle
from ajll_datapreprocessing import all_dealdata as nl_datapreprossion
from llsummerhourmodeling import Model_construcion as summerModel_construcion
from get_data import deal_data as get_data
os.chdir(DIR)

if __name__ == '__main__':
    get_data()
    nl_datapreprossion()
    for root, dirs, files in os.walk('../ll'):
        for name in files:
            if name.startswith("deal"):
                yc = name.lstrip('deal').rstrip('.csv')
                data = pd.read_csv('../config/sbdy_config.csv')
                for i in range(len(data['id'])):
                    if (data['bh'][i] == yc):
                        nameid = str(data['id'][i])
                summerModel_construcion(input_path=os.path.join(root, name),
                                        model_path=os.path.join(
                                            root, nameid + '.dat'))
Exemplo n.º 37
0
def main(fname_fits, datasrc, datapath=None):
    '''

    '''
    # Load the FITS file using the custom class to wrap the data.
    fits = BLSOutput(fname_fits)
    kic = fits.kic

    # Use the existing get_data functionality to load the raw Kepler data.
    dataspec = StringIO('%s\t*\tllc' % kic)
    outstream1 = StringIO()
    outstream2 = StringIO()
    get_data(datasrc, datapath, instream=dataspec, outstream=outstream1)
    outstream1.seek(0)
    join_quarters(instream=outstream1, outstream=outstream2)
    outstream2.seek(0)
    for _, _, t, f, e in read_mapper_output(outstream2, uri=False):
        time, flux, fluxerr = t, f, e
    time = np.array(time)
    flux = np.array(flux)
    fluxerr = np.array(fluxerr)

    for i in xrange(fits.num_passes):
        # Get the detrended light curve for this pass.
        lc = fits.lightcurves[i]
        dtime = lc['Time']
        dflux = lc['Flux']
        dfluxerr = lc['Flux error']

        # Get the BLS output for this pass.
        bls = fits.dipblips[i]
        mask = (bls['srsq_dip'] > 0.) & (bls['srsq_blip'] > 0.)
        duration_dip = bls['duration_dip'][mask]
        depth_dip = -1. * bls['depth_dip'][mask]
        midtime_dip = bls['midtime_dip'][mask]
        duration_blip = bls['duration_blip'][mask]
        depth_blip = bls['depth_blip'][mask]
        midtime_blip = bls['midtime_blip'][mask]
        segstart = bls['segstart'][mask]
        segend = bls['segend'][mask]

        # This is needed for the plot interaction.
        data = np.column_stack((depth_dip,depth_blip))
        kdtree = scipy.spatial.cKDTree(data)

        # Set up the canvas.
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.set_aspect('equal')
        cid = fig.canvas.mpl_connect('button_press_event',
            lambda e: __onclick(e, ax, kdtree, segstart, segend, duration_dip,
                depth_dip, midtime_dip, duration_blip, depth_blip, midtime_blip,
                time, flux, fluxerr, dtime, dflux, dfluxerr))

        # Plot the dip and blip depths.
        ax.scatter(depth_dip, depth_blip, marker='x', color='k')

        # Draw a dashed y = x line.
        ax.plot([0.,1.], [0.,1], transform=plt.gca().transAxes, ls='--',
            color='r')

        # The limits of the plot are set by the maximum absolute depth. Use the
        # same dimension in both directions so y = x has slope 1 when displayed
        # on the screen.
        size = max(np.amax(depth_dip), np.amax(depth_blip))
        ax.set_xlim(0., size)
        ax.set_ylim(0., size)
        ax.set_title('KIC ' + kic)
        ax.set_xlabel('Dip depth')
        ax.set_ylabel('Blip depth')
        ax.set_title('Pass #' + str(fits.num_passes - i))

        # Show the plot; halts execution until the user exits.
        plt.tight_layout()
        plt.show()
Exemplo n.º 38
0
"""

Run the chatbot locally in the console without the Will framework (WIP, just code samples so far).

"""

if __name__ == '__main__':

    import pandas as pd
    import pickle

    from get_data import get_data
    from predict.MarkovTextGenerator import MarkovTextGenerator
    from predict.ExpertPredictor import ExpertPredictor

    data_full = get_data()
    data_resampled = get_data(max_msgs_per_user=5000,
                              undersampling_method='recent',
                              boost_users_in_range=(1, 1500),
                              boost_factor=2)
    data_dummy = pd.DataFrame()

    # Training and saving MarkovTextGenerator data
    markov_text_generator = MarkovTextGenerator(data=data_full)
    markov_text_generator.save(save_dir='models')

    # Training and saving ExpertPredictor data
    expert_predictor = ExpertPredictor(data=data_resampled,
                                       min_df=5,
                                       max_df=0.05)
    expert_predictor.save(save_dir='models')
Exemplo n.º 39
0
    to_categorical,
    show,
)
import pandas as pd
import numpy as np
import scipy as sp
import datetime
import torch

LEARNING_RATE = 1e-2
NUM_HIDDEN_UNITS = 8
LOG_INTERVAL = 500  # epochs
NUM_EPOCHS = 3500
BATCH_SIZE = 4

[items, attributes, df] = get_data()

# show(plot_distance_matrix(df.transpose()))
# show(plot_svd(df.transpose()))
# show(plot_singular_dimensions(df.transpose()))
# show(plot_covariance_matrix(df.transpose()))

NUM_ITEMS = len(items)
NUM_ATTRIBUTES = len(attributes)

# Create feature and target tensors
features = torch.tensor(to_categorical(range(NUM_ITEMS)), dtype=torch.float32)
targets = torch.tensor(df.values, dtype=torch.float32)

NUM_RUNS = 5
Exemplo n.º 40
0
        matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
    ax.yaxis.set_ticks_position('both')

    plt.suptitle(f"{region} new daily {lbl}", fontsize='x-large')

    if not os.path.exists(outdir):
        os.mkdir(outdir)
    outfilename = os.path.join(outdir, f"{region}_new_{lbl}.png")
    plt.savefig(outfilename, bbox_index='tight', dpi=200)
    print(f"Saved {outfilename}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(dest="region", help="Name of state or nation to plot")
    parser.add_argument("-d",
                        "--deaths",
                        action="store_true",
                        default=False,
                        help="Switch to plot deaths instead of cases")
    args = parser.parse_args()

    data_usa, pops_usa = get_data.get_data("usa", deaths=args.deaths)
    data_world, pops_world = get_data.get_data("world", deaths=args.deaths)
    plot_by_region(args.region,
                   data_world,
                   data_usa,
                   pops_world,
                   pops_usa,
                   deaths=args.deaths)
Exemplo n.º 41
0
from flask import Flask
from flask import render_template
from jinja2 import Environment
from get_data import get_data

app = Flask(__name__)
recalls_data = get_data()

def date_format(date_in):
	return date_in[:10]

def injury(inj):
	if len(inj) > 0:
		return True
	return False

app.jinja_env.filters['date_format'] = date_format
app.jinja_env.tests['injury'] = injury

@app.route("/")
def homepage():
	html = render_template('homepage.html',
		                   recalls=recalls_data,
		                   recalls_count=len(recalls_data))
	return html

if __name__ == '__main__':
	app.run(use_reloader=True, debug=True)
Exemplo n.º 42
0
def handle_endpoint(endpoint):
    df = reformat_dates(get_data(endpoint))
    df = handle_jsons(df)
    write_to_db(endpoint, df)
    return True
Exemplo n.º 43
0
    def load_data(self):
        pair_fname  = '../lastfm_train_mappings.txt'
        lyrics_path = '../data/lyrics/train/'
    
        # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics.
        # word_counts is a dictionary that maps
        if self.config.debug:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, '../glove.6B.50d.txt', threshold_down=0, threshold_up=float('inf'), npos=100, nneg=100)
        else:
            X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold_down=100, threshold_up=4000, npos=10000, nneg=10000)

        self.labels_train = np.zeros((len(X_train),self.config.n_class))
        self.labels_train[range(len(X_train)),l_train] = 1
        
        x = collections.Counter(l_train)
        for k in x.keys():
            print 'class:', k, x[k]
        print ''

        self.vocab = Vocab()
        self.vocab.construct(self.word_counts.keys())
        self.wv = self.vocab.get_wv('../glove.6B.50d.txt')

        with open('word_hist.csv', 'w') as f:
            for w in self.word_counts.keys():
                f.write(w+','+str(self.word_counts[w])+'\n')
            
        self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. 
        self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps))
        for i in range(len(X_train)):
            self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]]       
            self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]]       
        self.sequence_len1 = np.array(seq_len1)
        self.sequence_len2 = np.array(seq_len2)
Exemplo n.º 44
0
def gbtgridder(args):
    if not args.SDFITSfiles:
        return

    verbose = args.verbose
    chanStart, chanStop = parse_channels(args.channels,verbose=verbose)
    if (chanStart is not None and chanStart < 0) or (chanStop is not None and chanStop < 0):
        return

    if chanStart is None:
        chanStart = 0

    average = args.average

    minTsys = args.mintsys
    maxTsys = args.maxtsys

    scanlist = args.scans
    if args.scans is not None:
        scanlist = parse_scans(scanlist)

    sdfitsFiles = args.SDFITSfiles
    for sdf in sdfitsFiles:
        if not os.path.exists(sdf):
            if verbose > 1:
                print sdf + ' does not exist'
            return

    # extract everything from the SDFITS files
    # this needs in the long run so that only one SDFITS file is opened at a time
    # and a reasonable amount of data are read and then gridded - repeat until done
    # right now, all of the data must be read first, then passed in one call
    # to the gridder.  In that case, there will be 2 passes through the SDFITS files
    # since the full extent of the data on the sky must be known before gridding can start.

    xsky = None
    ysky = None
    wt = None
    data = None
    nchan = None
    frest = None
    faxis = None
    source = None
    dataUnits = None
    calibType = None
    veldef = None
    specsys = None
    coordType = (None,None)
    radesys = None
    equinox = None
    observer = None
    telescop = None
    frontend = None
    dateObs = None
    uniqueScans = None
    ntsysFlagCount = 0
    outputFiles = {}

    if verbose > 3:
        print "Loading data ... "
    for thisFile in sdfitsFiles:
        try:
            if verbose > 3:
                print "   ",thisFile
            dataRecord = get_data(thisFile,nchan,chanStart,chanStop,average,scanlist,
                                  minTsys,maxTsys,verbose=verbose)
            if dataRecord is None:
                # there was a problem that should not be recovered from
                # reported by get_data, no additional reporting necessary here
                sys.exit(1)

            if len(dataRecord) == 0:
                # empty file, skipping
                continue

            if xsky is None:
                xsky = dataRecord["xsky"]
                ysky = dataRecord["ysky"]
                wt = dataRecord["wt"]
                data = dataRecord["data"]
                nchan = dataRecord["nchan"]
                chanStart = dataRecord["chanStart"]
                chanStop = dataRecord["chanStop"]
                frest = dataRecord["restfreq"]
                faxis = dataRecord["freq"]
                source = dataRecord["source"]
                dataUnits = dataRecord["units"]
                calibType = dataRecord["calibtype"]
                veldef = dataRecord["veldef"]
                specsys = dataRecord["specsys"]
                coordType = (dataRecord["xctype"],dataRecord["yctype"])
                radesys = dataRecord["radesys"]
                equinox = dataRecord["equinox"]
                telescop = dataRecord["telescop"]
                frontend = dataRecord["frontend"]
                observer = dataRecord["observer"]
                dateObs = dataRecord["date-obs"]
                uniqueScans = numpy.unique(dataRecord["scans"])

                # this also checks that the output files are OK to write
                # given the value of the clobber argument
                outputFiles = set_output_files(source, frest, args, ["cube","weight","line","cont"],
                                               verbose=verbose)
                if len(outputFiles) == 0:
                    if verbose > 1:
                        print "Unable to write to output files"
                    return
                
            else:
                xsky = numpy.append(xsky,dataRecord["xsky"])
                ysky = numpy.append(ysky,dataRecord["ysky"])
                wt = numpy.append(wt,dataRecord["wt"])
                data = numpy.append(data,dataRecord["data"],axis=0)
                uniqueScans = numpy.unique(numpy.append(uniqueScans,dataRecord["scans"]))

            ntsysFlagCount += dataRecord["ntsysflag"]

        except(AssertionError):
            if verbose > 1:
                print "There was an unexpected problem processing %s" % thisFile
            raise

    if xsky is None:
        if verbose > 1:
            print "No data was found in the input SDFITS files given the data selection options used."
            print "Can not continue."
        return

    if args.restfreq is not None:
        # Use user supplied rest frequency, conver to Hz
        frest = args.restfreq * 1.0e6

    # grid_otf.py already sets the weights to 1 if wt=None
    # Added a flag here called --eqweight 
    # print args.eqweight
    if args.eqweight is True: 
        #if verbose > 1:
        #    print "Setting all weights to 1."
        wt = None

    # characterize the center of the image

    # the beam_fwhm is needed in various places
    # currently we use the same equation used in idlToSdfits
    # there's about a 2% difference between the two

    # this equation comes from Adam's IDL code, where do the 747.6 and 763.8 values come from?
    # beam_fwhm = (747.6+763.8)/2.0/numpy.median(faxis/1.e9)/3600.
    # This is what idlToSdfits does (next 2 lines of code)
    # telescop diameter, in meters
    diam = 100.0
    beam_fwhm = 1.2 * constants.c * (180.0/constants.pi) / (diam * numpy.median(faxis))
    # the 747.6 and 763.8 values above are equivalent to diam of 99.3 and 97.2 m in this equation, respectively

    refXsky = None
    refYsky = None
    centerYsky = None
    pix_scale = None
    xsize = None
    ysize = None
    refXpix = None
    refYpix = None

    if args.clonecube is not None:
        # use the cloned values
        cubeInfo = get_cube_info(args.clonecube,verbose=verbose)
        if cubeInfo is not None:
            if (cubeInfo["xtype"] != coordType[0]) or \
                    (cubeInfo["ytype"] != coordType[1]) or \
                    (cubeInfo['proj'] != args.proj) or \
                    (radesys is not None and (cubeInfo['radesys'] != radesys)) or \
                    (equinox is not None and (cubeInfo['equinox'] != equinox)):
                if verbose > 2:
                    print "Sky coordinates of data are not the same type found in %s" % args.clonecube
                    print "Will not clone the coordinate information from that cube"
                    if verbose > 4:
                        print "xtype : ", cubeInfo["xtype"], coordType[0]
                        print "ytype : ", cubeInfo["ytype"], coordType[1]
                        print "proj : ", cubeInfo['proj'], args.proj
                        print "radesys : ", cubeInfo['radesys'], radesys
                        print "equinox : ", cubeInfo['equinox'], equinox
            else:
                refXsky = cubeInfo["xref"]
                refYsky = cubeInfo["yref"]
                pix_scale = cubeInfo["pix_scale"]
                xsize = cubeInfo["xsize"]
                ysize = cubeInfo["ysize"]
                refXpix = cubeInfo["xrefPix"]
                refYpix = cubeInfo["yrefPix"]


    # this is needed ONLY when the cube center and size are not given 
    # on the command line in one way or another
    centerUnknown = ((refXsky is None or refYsky is None) and args.mapcenter is None)
    sizeUnknown = ((xsize is None or ysize is None) and args.size is None)
    nonZeroXY = None
    if centerUnknown or sizeUnknown:
        # this masks out antenna positions exactly equal to 0.0 - unlikely to happen
        # except when there is no valid antenna pointing for that scan.
        nonZeroXY = (xsky!=0.0) & (ysky!=0.0)

        # watch for the pathological case where there is no good antenna data
        # which can not be gridded at all
        if numpy.all(nonZeroXY == False):
            # always print this out, independent of verbosity level
            print "All antenna pointings are exactly equal to 0.0, can not grid this data"
            return

        if verbose > 3 and numpy.any(nonZeroXY == False):
            print "%d spectra will be excluded because the antenna pointing is exactly equal to 0.0 on both axes - unlikely to be a valid position" % (nonZeroXY == False).sum()

    # need to watch for coordinates near 0/360  OR near +- 180
    # this technique will miss the difficult case of a mixture of +- 180 and 0:360 X coordinates
    # assumes that Y doesn't have this problem, likely is +- 90
    xskyMin = xsky[nonZeroXY].min()
    xskyMax = xsky[nonZeroXY].max()
    newXsky = None

    if (xskyMin > 0.0) :
        # all coordinates > 0, watch for 0/360 coordinates
        if (xskyMax - xskyMin) > 180.0:
            # probably a problem, subtract 360 for coordinates > 180.0 so that they run from -180 to +180 continuously through 0.0
            rangeBefore = xskyMax - xskyMin
            xskyMask = xsky>180.0
            newXsky = xsky.copy()
            newXsky[xskyMask] -= 360.0
    else:
        # some coordinates are < 0, watch for +- 180.0
        # same criteria
        if (xskyMax - xskyMin) > 180.0:
            # probably a problem, add 360 to all negative coordinates so they run from 0 through 360
            rangeBefore = xskyMax - xskyMin
            xskyMask = xsky<0.0
            newXsky = xsky.copy()
            newXsky[xskyMask] += 360.0

    if newXsky is not None:
        # see if that's an improvemenet
        newXskyMin = newXsky[nonZeroXY].min()
        newXskyMax = newXsky[nonZeroXY].max()
        if (newXskyMax-newXskyMin) < rangeBefore:
            # this is an improvement, use it
            xsky = newXsky.copy()
            xskyMin = newXskyMin
            xskyMax = newXskyMax

    if refXsky is None:
        if args.mapcenter is not None:
            # use user-supplied value
            refXsky = args.mapcenter[0]
        else:
            # set the reference sky position using the mean x and y positions
            # still need to worry about points clearly off the grid
            #   e.g. a reference position incorrectly included in the data to be gridded.
            #   not sure what an appropriate heuristic for that is

            # idlToSdfits rounds the center from the mean to the nearest second/arcsecond
            # for RA or HA, divide by 15
            if coordType[0] in ['RA','HA']:
                refXsky = round(numpy.mean(xsky[nonZeroXY])*3600.0/15)/(3600.0/15.0)
            else:
                refXsky = round(numpy.mean(xsky[nonZeroXY])*3600.0)/3600.0

    if refYsky is None:
        if args.mapcenter is not None:
            # use user-supplied value
            refYsky = args.mapcenter[1]
        else:
            # nonZeroXY MUST have already been set above to get here
            # do not check that it's set or set it here
            # assume that the Y coordinate is +- 90 and there's no problem
            # with 360/0 or +- 180 confusion as there may be with the X coordinate
            refYsky = round(numpy.mean(ysky[nonZeroXY])*3600.0)/3600.0

    if pix_scale is None:
        if args.pixelwidth is not None:
            # use user-supplied value, convert to degrees
            pix_scale = args.pixelwidth / 3600.0
        else:
            # find the cell size, first from the beam_fwhm
            # Need to decide on number of cell's per beam.  Adam`'s code uses 4, idlToSdfits uses 6
            # idlToSdfits also rounds up to nearest arcsecond
            pixPerBeam = 6.0
            if args.kernel == "nearest":
                # assume it's nyquist sampled, use 2 pixels per beam
                pixPerBeam = 2.0

            pix_scale = math.ceil(3600.0*beam_fwhm/pixPerBeam)/3600.0

    if xsize is None or ysize is None:
        # set both together
        if args.size is not None:
            # use user-supplied value
            xsize = args.size[0]
            ysize = args.size[1]
        else:
            xRange = xskyMax-xskyMin
            yRange = ysky[nonZeroXY].max()-ysky[nonZeroXY].min()

            # image size, idlToSdfits method
            # padding around border
            # imPadding = math.ceil(45./(pix_scale*3600.0))
            # add in padding and truncate to an integer
            # xsize = int((xRange*1.1/pix_scale)+2*imPadding)
            # ysize = int((yRange*1.1/pix_scale)+2*imPadding)
            # image.py then does this ... 
            # xsize = int((2*round(xsize/1.95)) + 20)
            # ysize = int((2*round(ysize/1.95)) + 20)
            # But idlToSdfits only sees one SDFITS file at a time, so the extra padding makes sense there.
            # With all the data, I think just padding by 10% + 20 pixels is sufficient
            xsize = int(math.ceil(xRange*1.1/pix_scale))+20
            ysize = int(math.ceil(yRange*1.2/pix_scale))+20

    # used only for informational purposes
    centerYsky = refYsky
    if refXpix is None or refYpix is None:
        # both should be set together or unset together
        if args.proj == "TAN":
            # this is how Adam does things in his IDL code
            # the reference pixel is in the center
            refXpix = xsize/2.0
            refYpix = ysize/2.0
        else:
            # must be SFL
            # this is how idlToSdfits+AIPS does things for GLS==SFL
            refXpix = xsize/2.0
            # for the Y axis is, this is where we want refYsky to be
            centerYpix = ysize/2.0 + 1.0
            # but by definition, refYsky must be 0.0, set set refYpix
            # so that the current refYsky ends up at centerYpix
            refYpix = centerYpix - refYsky/pix_scale
            # then reset refYsky
            refYsky = 0.0
            
    # gaussian size to use in gridding.
    # this is what Adam used:  gauss_fwhm = beam_fwhm/3.0
    # this duplicates the aparm(2)=1.5*cellsize used by AIPS in the default pipeline settings
    # the following is about 0.41*beam_fwhm vs 0.33*beam_fwhm from Adam - so wider
    gauss_fwhm = (1.5*pix_scale)*2.354/math.sqrt(2.0)

    if verbose > 4:
        print "Data summary ..."
        print "   scans : ", format_scans(uniqueScans)
        print "   channels : %d:%d" % (chanStart, chanStop)
        if args.mintsys is None and args.maxtsys is None:
            print "   no tsys selection"
        else:
            tsysRange = ""
            if args.mintsys is not None:
                tsysRange += "%f" % args.mintsys
            tsysRange += ":"
            if args.maxtsys is not None:
                tsysRange += "%f" % args.maxtsys
            print "   tsys range : ", tsysRange
            print "   flagged outside of tsys range : ", ntsysFlagCount
        # number of spectra actually gridded if wt is being used
        if wt is not None:
            print "   spectra to grid : ", (wt != 0.0).sum()
        else:
            print "   spectra to grid : ", len(xsky)
            print "   using equal weights"

        print ""
        print "Map info ..."
        print "   beam_fwhm : ", beam_fwhm, "(", beam_fwhm*60.0*60.0, " arcsec)"
        print "   pix_scale : ", pix_scale, "(", pix_scale*60.0*60.0, " arcsec)"
        print "  gauss fwhm : ", gauss_fwhm, "(", gauss_fwhm*60.0*60.0, " arcsec)"
        print "    ref Xsky : ", refXsky
        print "    ref Ysky : ", refYsky
        print " center Ysky : ", centerYsky
        print "       xsize : ", xsize
        print "       ysize : ", ysize
        print "    ref Xpix : ", refXpix
        print "    ref Ypix : ", refYpix
        print "          f0 : ", faxis[0]
        print "    delta(f) : ", faxis[1]-faxis[0]
        print "      nchan  : ", len(faxis)
        print "      source : ", source
        print " frest (MHz) : ", frest/1.e6

    # build the initial header object
    # only enough to build the WCS object from it + BEAM size info
    # I had trouble with embedded HISTORY cards and the WCS constructor
    # so those are omitted for now
    hdr = make_header(refXsky, refYsky, xsize, ysize, pix_scale, refXpix, refYpix, coordType, radesys, equinox, frest, faxis, beam_fwhm, veldef, specsys, proj=args.proj, verbose=verbose)

    # relax is turned on here for compatibility with previous images produced by AIPS from the gbtpipeline
    # there may be a better solution
    # even so, it does not like the "-LSR" tag to the CTYPE3 value for the frequency axis
    wcsObj = wcs.WCS(hdr,relax=True)

    if verbose > 3:
        print "Gridding"

    try:
        (cube, weight, beam_fwhm) = grid_otf(data, xsky, ysky, wcsObj, len(faxis), xsize, ysize, pix_scale, weight=wt, beam_fwhm=beam_fwhm, kern=args.kernel, gauss_fwhm=gauss_fwhm, verbose=verbose)
    except MemoryError:
        if verbose > 1:
            print "Not enough memory to create the image cubes necessary to grid this data"
            print "   Requested image size : %d x %d x %d " % (xsize, ysize, len(faxis))
            print "   find a beefier machine, consider restricting the data to fewer channels or using channel averaging"
            print "   or use AIPS (with idlToSdfits) to grid all of this data"
        return

    if cube is None or weight is None:
        if verbose > 1:
            print "Problem gridding data"
        return

    if verbose > 3:
        print "Writing cube"

    # Add in the degenerate STOKES axis
    cube.shape = (1,)+cube.shape
    weight.shape = cube.shape    

    # start writing stuff to disk
    # add additional information to the header
    hdr['object'] = source
    hdr['telescop'] = telescop
    hdr['instrume'] = frontend
    hdr['observer'] = observer
    hdr['date-obs'] = (dateObs,'Observed time of first spectra gridded')
    hdr['date-map'] = (time.strftime("%Y-%m-%dT%H:%M:%S",time.gmtime()),"Created by gbtgridder")
    hdr['date'] = time.strftime("%Y-%m-%d",time.gmtime())
    hdr['obsra'] = refXsky
    hdr['obsdec'] = centerYsky

    if args.kernel == 'gauss':
        hdr.add_comment('Convolved with Gaussian convolution function.')
        hdr['BMAJ'] = beam_fwhm
        hdr['BMIN'] = beam_fwhm
    elif args.kernel == 'gaussbessel':
        hdr.add_comment('Convolved with optimized Gaussian-Bessel convolution function.')
        hdr['BMAJ'] = (beam_fwhm,'*But* not Gaussian.')
        hdr['BMIN'] = (beam_fwhm,'*But* not Gaussian.')
    else:
        hdr.add_comment('Gridded to nearest cell')
        hdr['BMAJ'] = beam_fwhm
        hdr['BMIN'] = beam_fwhm
    hdr['BPA'] = 0.0
    # need to change this to get the actual units from the data
    # could add additional notes to the comment field
    # if Jy, make this Jy/Beam
    if dataUnits == 'Jy':
        dataUnits = 'Jy/Beam'
    hdr['BUNIT'] = (dataUnits,calibType)

    # This suppresses runtime NaN warnings if the cube is empty
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        hdr['DATAMAX'] = numpy.nanmax(cube)

    nanCube = False
    if numpy.isnan(hdr['DATAMAX']):
        nanCube = True
        # this could possibly be done inside the above with block
        # if the warnings catch was more sophisticated
        if verbose > 2:
            print "Entire data cube is not-a-number, this may be because a few channels are consistently bad"
            print "consider restricting the channel range"
        # remove it
        hdr.remove('DATAMAX')
    else:
        hdr['DATAMIN'] = numpy.nanmin(cube)

    # note the parameter values - this must be updated as new parameters are added
    hdr.add_history("gbtgridder version: %s" % gbtgridderVersion)
    if args.channels is not None:
        hdr.add_history("gbtgridder channels: "+args.channels)
    else:
        hdr.add_history("gbtgridder all channels used")
    hdr.add_history("gbtgridder clobber: "+str(args.clobber))
    if average is not None and average > 1:
        hdr.add_history("gbtgridder average: %s channels" % average)
    hdr.add_history("gbtgridder kernel: "+args.kernel)
    if args.output is not None:
        hdr.add_history("gbtgridder output: "+args.output)
    if args.scans is not None:
        hdr.add_history("gbtgridder scans: "+args.scans)
    if args.mintsys is None and args.maxtsys is None:
        hdr.add_history("gbtgridder no tsys selection")
    else:
        if args.mintsys is not None:
            hdr.add_history("gbtgridder mintsys: %f" % args.mintsys)
        if args.maxtsys is not None:
            hdr.add_history("gbtgridder maxtsys: %f" % args.maxtsys)
        hdr.add_history("gbtgridder N spectra outside tsys range: %d" % ntsysFlagCount)
 
    hdr.add_history("gbtgridder sdfits files ...")
    for thisFile in args.SDFITSfiles:
        # protect against long file names - don't use more than one comment row to
        # document this.  80 chars total, 8 for "COMMENT ", 12 for "gbtgridder: "
        # leaving 60 for the file name
        if len(thisFile) > 60:
            thisFile = "*"+thisFile[-59:]
        hdr.add_history("gbtgridder: " + thisFile)

    hdr.add_comment("IEEE not-a-number used for blanked pixels.")
    hdr.add_comment("  FITS (Flexible Image Transport System) format is defined in 'Astronomy")
    hdr.add_comment("  and Astrophysics', volume 376, page 359; bibcode: 2001A&A...376..359H")

    phdu = pyfits.PrimaryHDU(cube, hdr)
    phdu.writeto(outputFiles["cube"])

    if not args.noweight:
        if verbose > 3:
            print "Writing weight cube"
        wtHdr = hdr.copy()
        wtHdr['BUNIT'] = ('weight','Weight cube')  # change from K -> weight
        wtHdr['DATAMAX'] = numpy.nanmax(weight)
        wtHdr['DATAMIN'] = numpy.nanmin(weight)

        phdu = pyfits.PrimaryHDU(weight, wtHdr)
        phdu.writeto(outputFiles["weight"])

    if not args.nocont:
        if verbose > 3:
            print "Writing 'cont' image"
        # "cont" map, sum along the spectral axis
        # SQUASH does a weighted average
        # As implemented here, this is equivalent if there are equal weights along the spectral axis
        # doing a weighted average using numpy.average and ignoring NaNs would be tricky here
        # some slices may be all NaNs (but an entire cube of NaNs was tested for earlier)
        # this suppresses that warning
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            cont_map = numpy.nanmean(cube,axis=1)

        contHdr = hdr.copy()
        # AIPS just changes the channel count on the frequency axis, leaving everything else the same
        contHdr['NAXIS3'] = 1
        # restore the now-degenerate frequency axis to the shape
        cont_map.shape = (1,)+cont_map.shape
        contHdr.add_history('gbtgridder: average of cube along spectral axis')
        contHdr['DATAMAX'] = numpy.nanmax(cont_map)
        contHdr['DATAMIN'] = numpy.nanmin(cont_map)
        phdu = pyfits.PrimaryHDU(cont_map, contHdr)
        phdu.writeto(outputFiles["cont"])

    if not args.noline:
        if verbose > 3:
            print "Writing line image"
        # "line" map, subtract the along the spectral axis from every plane in the data_cube
        # replace the 0 channel with the avg
        # first, find the average over the baseline region
        n = len(faxis)
        baseRegion = [int(round(0.04*n)),int(round(0.12*n)),int(round(0.81*n)),int(round(0.89*n))]
        # construct an index from  these regions
        baseIndx = numpy.arange(baseRegion[1]-baseRegion[0]+1)+baseRegion[0]
        baseIndx = numpy.append(baseIndx,numpy.arange(baseRegion[3]-baseRegion[2]+1)+baseRegion[2])
        # this should probably be a weighted average
        avg_map = numpy.average(cube[:,baseIndx,:,:],axis=1)
        cube -= avg_map
        cube[:,0,:,:] = avg_map
        hdr['DATAMAX'] = numpy.nanmax(cube)
        hdr['DATAMIN'] = numpy.nanmin(cube)
        hdr.add_history('gbtgridder: subtracted an average over baseline region on freq axis')
        hdr.add_history('gbtgridder: average over channels: %d:%d and %d:%d' % tuple(baseRegion))
        hdr.add_history('gbtgridder: channel 0 replaced with averages')
        phdu = pyfits.PrimaryHDU(cube,hdr)
        phdu.writeto(outputFiles["line"])

    return
Exemplo n.º 45
0
            
            self.isbinary=False
            if self.isbinary:
                self.sigma_signal = 1 * self.imresize_factor
            else:
                self.sigma_signal = 50
            self.ll_type=['gaussian','gaussian_on_distancetransform'][self.isbinary]
                    
        
    inference_params = InferenceParams()  
     
    name='MNIST_one_00001_to_00015'
#    name =  'MNIST_four_00009_to_00013'
    name =  'MNIST_four_00013_to_00009'
#    name =  'MNIST_four_00022_to_00009'
    data = get_data(name=name,
                    imresize_factor=inference_params.imresize_factor)

    dname_results = os.path.join(HOME,'data/derived/MNIST/examples',name)
    FilesDirs.mkdirs_if_needed(dname_results)
#    fname_results = os.path.join(dname_results ,get_time_stamp()+'.pkl')
    
    fname_results = os.path.join(dname_results ,'result.pkl')


    dispOn = False or 1
    reg,inference_record,theta_est = main(data,inference_params,dispOn=dispOn)


    

Exemplo n.º 46
0
from mk_display import mk_display
from get_data import get_data

#取得したcsvファイルを保存するpath
file_path=""

if __name__ == '__main__':

    id_list,appId=mk_display()
    
    for id in id_list:
        get_data(appId,id,file_path)
while True:

    # Wake up periodically to check time
    while goal_time > time.time():
        time.sleep(0.1)

    # Record the time of the next iteration
    cur_time = goal_time
    goal_time += granularity_in_seconds

    #if __debug__:
    print "\nTrying time", dt.datetime.now().strftime(DATE_FORMAT)

    # Retrieve sensor data from ZServer
    try:
        new_data = get_data(ZServer)
    except Exception:
        logging.error("ZServer Connection Lost. Ending analysis.")
        exit(1)
    #new_data[0] contains timestamp which is not used
    #new_data[0] gets replaced by Audio Sensor Data
    try:
        new_data[0] = get_sound()
    except Exception:
        logging.error("Audio Sensor Connection Lost. Ending analysis")
        exit(1)    

    #get current energy reading
    cur_row = (row_count) % matrix_length
    og_row = row_count % Avg_over
    T_Power =  float(get_power(config_dict))
Exemplo n.º 48
0
from itertools import islice
import copy

#divide list 'lis' into 'n' rows
def solve(lis, n):
  it = iter(lis)
  return [list(islice(it,n)) for _ in xrange(len(lis)/n)]

#concate list 'list_'
def concate_list(list_):
  total = []
  for i in list_:
    total += i
  return total

data = np.array(get_data.get_data('testdir'))
files = copy.deepcopy(data)
#unfold ranges into list of integers
for i in range(0,len(data)):
  ranges = data[i]
  for j in range(0,len(ranges)):
    ranges[j] = range(int(ranges[j][0]),int(ranges[j][1])+1)

#unfold ranges into list of integers
for i in range(0,len(files)):
  ranges = files[i]
  for j in range(0,len(ranges)):
    size = ranges[j][1] - ranges[j][0]
    ranges[j] = [int(ranges[j][0])]*size
    ranges[j].append(i+1)
Exemplo n.º 49
0
def cal_payment():
    file = get_data(time_df)
    period_hour = {}
    parent_hour = {}
    
    shift_pay = []
    hour_day_list = {}
    
    for row in file.iterrows():
        _parent = row[1]["Parent ID"]
        _id          = row[1]["ContactID"]
        _key_parent = str(_id) + " " + str(_parent)
        _hour = row[1]["Quantity"]  
        if _key_parent in hour_day_list:
            hour_day_list[_key_parent] += _hour
        else:
            hour_day_list[_key_parent]= _hour
        
    for row in file.iterrows():
        _id          = row[1]["ContactID"]
        _period      = row[1]["Start Period"] 
        _key_period  = str(_id) + " " + str(_period)
        _hour = row[1]["Quantity"]   
        _rate = row[1]["Factor"]
        _ot1_rate = row[1]["Rate OT1"]
        _ot2_rate = row[1]["Rate OT2"]
        _type = row[1]["Type"]
        _parent = row[1]["Parent ID"]
        _object_id = row[1]["Object ID"]
        _key_parent = str(_id) + " " + str(_parent)
        base_rate = row[1]['Amount']
        payment = 0
        maxot_1 = get_maxot1(_type) 
        

        total_hour_parent = parent_hour[_key_parent] if _key_parent in parent_hour else 0      

        total_hour_period = period_hour[_key_period] if _key_period in period_hour else 0

        if (total_hour_parent + _hour <= maxot_1): 
            _hour_new = _hour + ((3 - hour_day_list[_key_parent]) if list(file[file["Parent ID"] ==_parent]['Object ID'])[-1] == _object_id and hour_day_list[_key_parent] < 3 else 0)

            if total_hour_period + _hour <= 38:
                payment = base_rate * _hour_new * _rate
                
            elif total_hour_period >= 40 :
              
                payment = base_rate * _hour_new * _ot2_rate
                
            elif total_hour_period >=38 and total_hour_period +  _hour < 40:
               
                payment = base_rate * _hour_new * _ot1_rate

            elif total_hour_period >=38 and total_hour_period +  _hour >= 40:  
                payment = base_rate*((40-total_hour_period)*_ot1_rate + (_hour_new + total_hour_period - 40)*_ot2_rate)

            elif total_hour_period + _hour_new  >  38 and total_hour_period < 38:
                ot_hour = (total_hour_period + _hour) - 38 

                base_hour = 38 - total_hour_period 
            

                base_pay = base_rate*(base_hour * _rate)

                ot_pay = base_rate*(min(2,ot_hour)*_ot1_rate + max(0,ot_hour-2)*_ot2_rate)

                payment = base_pay + ot_pay

            total_hour_period += _hour
            total_hour_parent += _hour
            
        else:   
            
            if total_hour_period + min(_hour,maxot_1) <= 38:   

                _hour_new = maxot_1 - total_hour_parent
                _hour_ot = total_hour_parent + _hour - maxot_1
                payment = (_hour_new * _rate + min(2, _hour_ot) * _ot1_rate + max(_hour_ot - 2, 0) * _ot2_rate)*base_rate
                
            elif total_hour_period >= 40 :              
                payment = base_rate*(max(_hour,3)*_ot2_rate)

            elif total_hour_period >=38 and total_hour_period + _hour <40:               
                payment = base_rate*(3*_ot1_rate)

            elif total_hour_period >=38 and total_hour_period + _hour >=40:    
                payment = base_rate*((40-total_hour_period)*_ot1_rate + (max(_hour,3) + total_hour_period-40)*_ot2_rate)

            elif total_hour_period + min(_hour,maxot_1)  >  38 and total_hour_period < 38:
                ot_hour = (total_hour_period + _hour) - 38 

                base_hour = 38 - total_hour_period 
            
                base_pay = base_rate*(base_hour *_rate)

                ot_pay = base_rate*(min(2,ot_hour)*_ot1_rate + max(0,ot_hour-2)*_ot2_rate)

                payment = base_pay + ot_pay
            
            total_hour_period += (maxot_1 - total_hour_parent) if total_hour_parent < maxot_1 else 0
            total_hour_parent += min(_hour,maxot_1) 
        
        parent_hour[_key_parent] = total_hour_parent
      
        period_hour[_key_period] = total_hour_period 
    
        shift_pay.append(payment)
    
    file["Payment"] = shift_pay
    
    return file.drop(['Rate OT1', 'Rate OT2'], axis='columns')
Exemplo n.º 50
0
def run(experiment_detailed_name, warm_start_percentage, strategy_kwargs, id_folds, strategy_projection_h,
        batch_size, fingerprint, strategy, protein,\
        base_model, base_model_kwargs, param_grid, \
        preprocess_fncs, loader_function, loader_args, seed, _config):

    logger = get_logger(experiment_detailed_name)

    assert preprocess_fncs != 0, "Please pass preprocess_fncs"
    assert loader_function != 0, "Please pass loader_function"
    assert loader_args != 0, "Please pass loader_args"
    assert protein != 0, "Please pass protein"
    assert fingerprint != 0, "Please pass fingerprint"
    assert seed != -1, "Please pass seed"

    strategy_kwargs = copy.deepcopy(strategy_kwargs)
    loader_args = copy.deepcopy(loader_args)
    loader_function = copy.deepcopy(loader_function)
    base_model_kwargs = copy.deepcopy(base_model_kwargs)

    ## Prepare data loader ##
    loader = [loader_function, loader_args]
    comp = [[protein, fingerprint]]


    adaptive_grid = False
    # Construct model with fixed projection
    if "_" in base_model:

        logger.info(base_model.split("_")[0])
        base_model_cls = globals()[base_model.split("_")[0]]
        if base_model.split("_")[1] == "adaptivegrid":
            adaptive_grid = True
        else:
            raise ValueError("Unrecognized base model format")
        base_model = base_model.split("_")[0]
    else:
        if base_model not in globals():
            raise ValueError("Not imported base_model class into global namespace. Aborting")

        base_model_cls = globals()[base_model]

    if "h" in param_grid:
        projector_cls = partial(FixedProjector, h_max=max(param_grid["h"]), projector=RandomProjector())
    else:
        projector_cls = None

    strategy = find_obj(strategy)

    logger.error("Strategy_projection_h="+str(strategy_projection_h))

    model_cls = partial(ActiveLearningExperiment, logger=logger, adaptive_grid=adaptive_grid,
                        strategy=strategy, batch_size=batch_size,strategy_projection_h=strategy_projection_h,
                        strategy_kwargs=strategy_kwargs, param_grid=param_grid)

    folds, _, _ = get_data(comp, loader, preprocess_fncs).values()[0]

    logger.info("Fitting on loader "+str(loader) + " preprocess_fncs="+str(preprocess_fncs))
    logger.info(folds[0]["X_train"]["data"].shape)



    metrics, monitors = fit_AL_on_folds(model_cls=model_cls, base_model_cls=base_model_cls, base_model_kwargs=base_model_kwargs, \
                                        projector_cls=projector_cls,
                                        folds=folds, logger=logger, id_folds=id_folds,
                                        base_seed=seed, warm_start_percentage=warm_start_percentage)
    misc = {}
    if id_folds == -1 or len(id_folds) == len(folds):
        mean_monitor = {k: np.zeros(len(v)) for k, v in monitors[0].iteritems() if isinstance(v, list)}

        for fold_monitor in monitors:
            for key in mean_monitor.keys():
                mean_monitor[key] += np.array(fold_monitor[key])

        for key, values in dict(mean_monitor).iteritems():
            mean_monitor[key] = values / len(monitors)
            metrics['auc_' + key] = auc(np.arange(values.shape[0]), values)

        misc = {'mean_monitor': mean_monitor}


    misc['X_train_size'] = folds[0]["X_train"]["data"].shape
    misc['X_valid_size'] = folds[0]["X_valid"]["data"].shape

    logger.info("Logging following keys in monitors: "+str(monitors[0].keys()))

    return ExperimentResults(results=dict(metrics), misc=misc, monitors=monitors, dumps={}, \
                             config=_config, name=experiment_detailed_name)
Exemplo n.º 51
0
                         rule_list):
    for sub_set in sub_set_list:
        if sub_set.issubset(freq_set):
            conf = item_support[freq_set] / item_support[freq_set - sub_set]
            rule = (freq_set - sub_set, sub_set, conf)
            if conf >= confidence and rule not in rule_list:
                # print freq_set-sub_set, " => ", sub_set, "conf: ", conf
                rule_list.append(rule)
    sub_set_list.append(freq_set)
    return rule_list


if __name__ == "__main__":
    starttime = datetime.datetime.now()
    print "Apriori algorithm begin" + "=" * 28
    data_set = g.get_data()
    # data_set = g.getSimpleTestData2()
    support = 0.4
    confidence = 0.5
    L, item_support, rule_list = apriori(data_set, support, confidence)
    for Lk in L:
        if len(Lk) > 0:
            print "=" * 50
            print "frequent " + str(len(list(Lk[0]))) + "-itemsets\t\tsupport"
            print "=" * 50
            for freq_set in Lk:
                print freq_set, "\t\t", item_support[freq_set]
    print
    print "Association rules" + "=" * 33
    for item in rule_list:
        print item[0], "=>", item[1], "confidence: ", item[2]
Exemplo n.º 52
0
# -*- coding: utf-8 -*-
from get_data import get_data
import pandas as pd

data = get_data(2021, 1)
s = pd.Series(data.split()).astype(int)

part1 = (s > s.shift()).sum()
print(part1)

rolling = s.rolling(3).sum()
part2 = (rolling > rolling.shift()).sum()
print(part2)
Exemplo n.º 53
0
from get_data import get_data
import traceback

dataset = "genomes"
goal = 0.7
precisions = []
topk = "R1"
max_time = 2
for method, data in get_data(dataset):
	max_recall = 0
	try:
		best_precision = 0		
		for measure, time, recall, precision in data:
			if measure.split("@")[0] == topk and recall > goal and time < max_time:
				max_recall = max(recall, max_recall)
				best_precision = max(best_precision, precision)

		if best_precision != 0:
			precisions.append(best_precision)
			print(method, best_precision)	
			print(max_recall)
	except Exception as e:
		traceback.print_exc() 
		pass

precisions.sort()
print(f"At {goal} recall with max time of {max_time}ms, the best method on {dataset} is {precisions[-1] / precisions[-2]} better on precision")
Exemplo n.º 54
0
            #print out a message to the screen that we have collected a tweet
            print("Tweet collected at " + str(created_at))

            #insert the data into the mongoDB into a collection called twitter_search
            #if twitter_search doesn't exist, it will be created.
            db.twitter_search.insert(datajson)
        except Exception as e:
            print(e)


#Set up the listener. The 'wait_on_rate_limit=True' is needed to help with Twitter API rate limiting.

filterList = []
lastTable = ["cliente"]
clientesIdList = get_data.get_data(
    """ SELECT "ID" FROM public."Cliente" ORDER BY "ID" ASC """)
''' 
TO-DO: 
Full focus de ventanas, ie no dejar que me ponga en otra ventana mientras estoy editando algo
Faltan todo lo interno. Tengo que leer mas de psycopg, para poder hacerlas NECESITO hacer las tablas.
'''
'''
FUNCTIONS
(Functions exclusive to the GUI, aka button commands and hotkeys.)

Functions related to actual inner workings are imported from their respective libraries
'''

#Abrir ventana para elegir filtros y mostrar filas

Exemplo n.º 55
0
    ax3.get_xaxis().set_visible(False)
    ax3.get_yaxis().set_visible(False)

    ax4 = fig.add_subplot(gs[2:, :2], zorder=1)
    for key, spine in ax4.spines.items():
        spine.set_visible(False)
    ax4.get_xaxis().set_visible(False)
    ax4.get_yaxis().set_visible(False)

    ax1.set_ylabel("Fraction of reflectance", size=12)
    ax1.set_xlabel("Wavelength", size=12)

    plt.tight_layout()

x, y = get_data.get_data("mango", "as7265x", int_time=150,
                         position=2, led="b'White'",
                         led_current="25 mA")
print(y.columns)
y = y['Avg Total Chlorophyll (µg/cm2)']

print(x)


def runner(i):
    j = i - 20
    print('i = ', i, j)


    if j == 0:
        make_axis()
Exemplo n.º 56
0
def load_and_save(config_path):
    config = read_params(config_path)
    df = get_data(config_path)
    new_cols = [col.replace(" ", "_") for col in df.columns]
    raw_data_path = config["load_data"]["raw_dataset_csv"]
    df.to_csv(raw_data_path, index=False, header=new_cols)
import get_data,convertmass
import matplotlib.pyplot as plt
import numpy as np


pdat,pobs,pobsnr,pobsr = get_data.get_data('P')
hdat,hobs,hobsnr,hobsr = get_data.get_data('H')

pbinary = ((pdat.field('RPRIME_K')<=4.) & (pdat.field('BINARY')>0))
hbinary = ((hdat.field('RPRIME_K')<=4.) & (hdat.field('BINARY')>0))

pmass = pdat.field('KH_MASS')
hmass = hdat.field('KH_MASS')

kun = hdat.field('KUNDERT_PROT')
delh = hdat.field('DELORME_LITP')
delp = pdat.field('SWASP_PERIOD')
sch = pdat.field('SCHOLZ_PERIOD')
ptf = pdat.field('PTF_PERIOD')
pperiods = pdat.field('PERIOD')
hperiods = hdat.field('PERIOD')
pflag = pdat.field('PERIOD_FLAG')
hflag = hdat.field('PERIOD_FLAG')
ptf_flag = pdat.field('PTF_FLAG')

bad = np.where((kun>0) & (delh>0) & (abs(kun-delh)>0.1))[0]
print bad
delh[bad] = -99.

plt.figure(figsize=(9,8))
ax = plt.subplot(111)
Exemplo n.º 58
0
interface   : FastEthernet0/4
switch      : sw1
----------------------------------------

$ python get_data1.py
--------------------------------------------------------------------------------
Active values:
--------------------------------------------------------------------------------
00:09:BB:3D:D6:58  10.1.10.2         10    FastEthernet0/1    sw1         1
00:04:A3:3E:5B:69  10.1.5.2          5     FastEthernet0/10   sw1         1
00:05:B3:7E:9B:60  10.1.5.4          5     FastEthernet0/9    sw1         1
00:07:BC:3F:A6:50  10.1.10.6         10    FastEthernet0/3    sw1         1
00:09:BC:3F:A6:50  192.168.100.100   1     FastEthernet0/7    sw1         1
00:B4:A3:3E:5B:69  10.1.5.20         5     FastEthernet0/5    sw2         1
00:C5:B3:7E:9B:60  10.1.5.40         5     FastEthernet0/9    sw2         1
00:A9:BC:3F:A6:50  10.1.10.60        20    FastEthernet0/2    sw2         1
--------------------------------------------------------------------------------
Inactive values:
--------------------------------------------------------------------------------
00:A9:BB:3D:D6:58  10.1.10.20        10    FastEthernet0/7    sw2         0

'''

from get_data import get_data

get_data('ip', '10.1.10.2')
get_data('vlan', '10')
get_data('vlan')
get_data('10')
get_data()
Exemplo n.º 59
0
from __future__ import print_function

from argparse import ArgumentParser

from get_data import get_data
from setup_data import setup_data
from process_data import process_data
from run_walsh_alg import run

if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument('-s', '--settings', help='Settings file')
    parser.add_argument('-r', '--remove', help='Remove', action='store_true')
    args = parser.parse_args()

    if not args.settings:
	import settings.default as settings
    else:
	raise Exception('not impl')

    if settings.GET_DATA:
	get_data(settings)
    if settings.SETUP_DATA:
	setup_data(settings)
    if settings.PROCESS_DATA:
	process_data(settings)
    if settings.RUN_WALSH:
        run()


Exemplo n.º 60
0
def send():
    heading = ['年月', '参加回数', '平均', '最高', '最低']
    user = request.form['User']
    url = "https://atcoder.jp/users/" + user + "/history/json"
    data_list = get_data(url)
    return render_template('index.html', heading=heading, data_list=data_list)