Пример #1
0
def chaikin(comp_idx, comp_name_conv):
#    comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV
    cur_date = pg_query(PSQL.client, "SELECT max(date) FROM portfolio.day_chaikin;")[0].values[0]
    nxt_id, comp_cur = det_cur_chaikin(PSQL)
    total_stocks = len(comp_idx)
    for stock_num, (rh_id) in enumerate(comp_idx):
        progress(stock_num, total_stocks, status = comp_name_conv[rh_id])
        if rh_id in comp_cur.keys() and cur_date <= np.datetime64(comp_cur[rh_id]):
            continue
        
        comp_data = pg_query(PSQL.client, "SELECT date, adl FROM portfolio.adl_day where rh_id = '%s';" % (rh_id))
        comp_data.rename(columns = {0:'date', 1:'adl'}, inplace = True)
        comp_data.sort_values('date', ascending = True, inplace = True)     
        
        if len(comp_data) < 11:
            continue
        adl_3_ema = calc_ema(comp_data, 3, 'adl')
        adl_10_ema = calc_ema(comp_data, 10, 'adl')
        
        adl_ema = pd.merge(adl_3_ema, adl_10_ema, left_on = 'date', right_on = 'date')

        adl_ema['chaikin'] = adl_ema['adl_3_ema'] - adl_ema['adl_10_ema'] 

        for date, chaikin in adl_ema[['date', 'chaikin']].values:
            if rh_id in comp_cur.keys() and date <= np.datetime64(comp_cur[rh_id]):
                continue
            if chaikin != chaikin:
                continue
            if chaikin == np.inf or chaikin == -np.inf:
                continue
            script = "INSERT INTO portfolio.day_chaikin(day_chaikin_id, rh_id, date, chaikin) VALUES (%i, '%s', '%s', %.4f);" % (nxt_id, rh_id, datetime.strptime(str(date).split('.')[0], '%Y-%m-%d %H:%M:%S'), chaikin)
            pg_insert(PSQL.client, script)
            nxt_id += 1
            comp_cur[rh_id] = date
Пример #2
0
def test_solver(x, y, clf, prev_score, verbose=False):
    #    x, y, clf, scaler, prev_score, verbose = X[features], Y, log_clf, scale, log_checkpoint_score, False
    print('Searching for best solver.')
    model = deepcopy(clf)
    scores = {}
    all_solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
    cur_solver = 0
    for solve in all_solvers:
        if not verbose:
            progress(cur_solver, len(all_solvers))
        model.set_params(**{'clf__solver': solve})
        score = cross_validate(x,
                               y,
                               model.set_params(**{'clf__solver': solve}),
                               only_scores=True,
                               verbose=verbose)
        #        score = test_scaler(model, x, y, verbose = verbose, prev_score = False, prev_scaler = False, skip = False, prog = False)
        if verbose:
            print('%s Score: %.5f' % (solve, score))
        scores[solve] = {'score': score}
        cur_solver += 1
    if not verbose:
        progress(cur_solver, len(all_solvers))
    best_solver = max(scores, key=lambda x: scores[x]['score'])
    if scores[best_solver]['score'] > prev_score:
        print('Using %s.' % (best_solver))
        return (model.set_params(**{'clf__solver': best_solver}),
                scores[best_solver]['score'])
    else:
        print('No Improvement, Using Default')
        return (clf, prev_score)
Пример #3
0
def pop_times(psql, _last_time):
#    psql, _last_time = PSQL, last_time
    print('Updating times')
    nxt_time, cur_max_time, psql = _det_current_times_(psql)
    
#    for time_mult in range(0, 1440):
    insert_times = []
    if isinstance(cur_max_time, bool):
        for time_mult in range(0, 1440):
            insert_times.append(_last_time + timedelta(minutes = -30)*time_mult)
    else:
        for time_mult in range(0, 1440):
            if cur_max_time >= _last_time + timedelta(minutes = -30)*time_mult:
                break
            else:
                insert_times.append(_last_time + timedelta(minutes = -30)*time_mult)
                
    total_times = len(insert_times)
    for time_num, (time) in enumerate(reversed(insert_times)):
        progress(time_num, total_times, status = time)

        script = "insert into binance.times (time_id, q_date, q_time, full_time) VALUES (%i, '%s', '%s', '%s')" % (nxt_time, time.date(), time.time(), time)
        pg_insert(psql.client, script)
        nxt_time += 1

    print('\n')
Пример #4
0
def pop_exchanges(psql, _last_time, _prices, mrkt_conv, current_mkt_data, time_conv, binance):
#    psql, _last_time, _prices = PSQL, last_time, prices
    print('Updating exchanges')
    total_exchanges = len(_prices)
    
    for exchange_num, (mkt) in enumerate([i['symbol'] for i in _prices]):
        progress(exchange_num, total_exchanges, status = mkt)
        bootstrap_time = False
        if mkt not in mrkt_conv.keys():
            continue
        if mrkt_conv[mkt] in current_mkt_data.keys():
            if current_mkt_data[mrkt_conv[mkt]] == time_conv[_last_time]:
                continue
            else:
                bootstrap_time = current_mkt_data[mrkt_conv[mkt]]
        klines = binance.get_historical_klines(mkt, '30m', "2 weeks ago UTC")
        if len(klines) > 0:
            mkt_hist = process_kline(klines, time_conv, bootstrap_time)
            for hist in mkt_hist:
                script = "insert into binance.exchanges (ex_time_id, ex_market_id, open, high, low, \
                close, volume, quote_asset_volume, number_of_trades, taker_buy_base_asset_volume, \
                taker_buy_quote_asset_volume) \
                VALUES (%i, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" % (hist['open_time'], mrkt_conv[mkt], hist['open'], hist['high'], hist['low'], hist['close'], hist['volume'], hist['quote_asset_volume'], hist['number_of_trades'], hist['taker_buy_base_asset_volume'], hist['taker_buy_quote_asset_volume'])
                pg_insert(psql.client, script)   
    print('\n')
Пример #5
0
def feat_selector_optimization(x_, y_):
    #    x_, y_ = proc_data[drivers], proc_data[target_col]

    cross_val_scores = {}

    tot_runs = 10 * 7 * 5 * 6
    run_num = 0
    for trees in np.linspace(10, 100, 10):
        for depth in np.linspace(3, 21, 7):
            for samples in np.linspace(.1, .9, 5):
                for feats in np.linspace(.1, 1, 6):
                    run_num += 1
                    _x, _y = deepcopy(x_), deepcopy(y_)
                    estimator = ExtraTreesClassifier(max_features=feats,
                                                     min_samples_split=samples,
                                                     max_depth=int(depth),
                                                     n_estimators=int(trees),
                                                     random_state=53)
                    cross_val_scores[cross_validate(_x, _y, estimator,
                                                    StandardScaler(),
                                                    'logloss')] = {
                                                        'max_features': feats,
                                                        'min_samples_split':
                                                        samples,
                                                        'max_depth': depth,
                                                        'n_estimators': trees
                                                    }

                    progress(run_num, tot_runs)
    print('\n')
    best_params = cross_val_scores[max(cross_val_scores.keys())]

    return (best_params)
Пример #6
0
def pull_tweets(psql, twitter):
    page = requests.get('https://coinmarketcap.com/all/views/all/')
    tree = html.fromstring(page.content)
    coin_name_conv = {v:k for k,v in zip(tree.xpath('//*[@id="currencies-all"]/tbody/tr/td[2]/a/text()'), tree.xpath('//*[@id="currencies-all"]/tbody/tr/td[3]/text()'))}

    coins = pg_query(psql.client, 'SELECT * FROM binance.coins')
    use_coins = {i: coin_name_conv[i] for i in coins[1].values if i in coin_name_conv.keys()}
    coin_id_conv = {v:k for k,v in coins.values}
        
    coin_num = 0
    total_coins = len(use_coins.keys())
    tweets = []
    for symbol, name in use_coins.items():
        progress(coin_num, total_coins, status = symbol)
        fetched_tweets = twitter.search(q = '%s|%s' % (symbol.lower(), name.lower()), count = 10, tweet_mode='extended')
        for tweet in fetched_tweets: 
            # empty dictionary to store required params of a tweet   
            parsed_tweet = {} 
            
            tweet_data = tweet._json
            
            if 'retweeted_status' in tweet_data.keys():
                tweet_data = tweet_data['retweeted_status']
            
            parsed_tweet['text'] = clean_tweet(tweet_data['full_text'])
            parsed_tweet['id'] = tweet_data['id']
            parsed_tweet['rt'] = tweet_data['retweet_count']
            parsed_tweet['fav'] = tweet_data['favorite_count']
            parsed_tweet['user'] = tweet_data['user']['id']
            parsed_tweet['verified'] = tweet_data['user']['verified']
            parsed_tweet['followers'] = tweet_data['user']['followers_count']
            parsed_tweet['datetime'] = datetime.strptime(''.join([' '.join(i.split(' ')[1:]) for i in tweet_data['created_at'].split('+')]), '%b %d %H:%M:%S %Y')        
            tweets.append(parsed_tweet)
        coin_num += 1
    return(tweets, coin_id_conv, use_coins)
Пример #7
0
def nlu_insert(nlu_tweet_data, nlu, _psql_, _coin_id_conv, _use_coins):
#    tweet_data = pg_query(PSQL.client, 'select tweet_id, content from binance.twitter_tweets;')
#    nlus = pg_query(PSQL.client, 'select tweet_id from binance.twitter_nlu;')
#    nlus = list(set(nlus[0].values))
#    tweet_idx = [i for i in tweet_data[0] if i not in nlus]
#    tweet_data = tweet_data.set_index(0).loc[tweet_idx].reset_index()
#    nlu_tweet_data, nlu, _psql_, _coin_id_conv, _use_coins = tweet_data[[0,5]], NLU, PSQL, coin_id_conv, use_coins
#    
    matches = {}
    for tweet_id, content in nlu_tweet_data[[0,5]].values:
        matches[tweet_id] = {}
        for symbol, name in _use_coins.items():
            sym_name_match = []
            if name.lower() in content:
                sym_name_match.append(name.lower())
            if symbol.lower() in content:
                sym_name_match.append(symbol.lower())
            if len(sym_name_match) > 0:
                matches[tweet_id][symbol] = sym_name_match
                 
    nlu_idx = _det_current_nlu(_psql_)            
    indexed_tweet_data = nlu_tweet_data.set_index(0)      
    tweet_num = 0
    total_tweets = len(matches.keys())
    for k,v in matches.items():
        progress(tweet_num, total_tweets)
        tweet_num += 1
        targets = []
        for vv in v.values():
            targets += vv
        try:
            nlu_output = nlu.analyze(text=indexed_tweet_data[5].loc[k], features = Features(emotion=EmotionOptions(targets=targets), sentiment=SentimentOptions(targets=targets))).result
        except:
            continue
    
        coin_output = {}
        all_sentiment = nlu_output['sentiment']['targets']
        try:
            all_emotion = nlu_output['emotion']['targets']
        except:
            continue
    
        for coin, names in v.items():
            coin_nlu = {}
            if len([i for i in all_sentiment if i['text'] in names]) == 0:
                continue
            if len([i for i in all_emotion if i['text'] in names]) == 0:
                continue
            coin_nlu['sentiment'] = np.mean([i['score'] for i in all_sentiment if i['text'] in names])
            coin_nlu['sadness'] = np.mean([i['emotion']['sadness'] for i in all_emotion if i['text'] in names])
            coin_nlu['joy'] = np.mean([i['emotion']['joy'] for i in all_emotion if i['text'] in names])
            coin_nlu['fear'] = np.mean([i['emotion']['fear'] for i in all_emotion if i['text'] in names])
            coin_nlu['disgust'] = np.mean([i['emotion']['disgust'] for i in all_emotion if i['text'] in names])
            coin_nlu['anger'] = np.mean([i['emotion']['anger'] for i in all_emotion if i['text'] in names])
            coin_output[coin] = coin_nlu  
            
        for nlu_coin, nlu_values in coin_output.items():
            script = "insert into binance.twitter_nlu (nlu_id, tweet_id, coin_id, sentiment, sadness, joy, fear, disgust, anger) VALUES (%i, %i, %i, %f, %f, %f, %f, %f, %f)" % (nlu_idx, k, _coin_id_conv[nlu_coin], nlu_values['sentiment'], nlu_values['sadness'], nlu_values['joy'], nlu_values['fear'], nlu_values['disgust'], nlu_values['anger'])
            pg_insert(_psql_.client, script)
            nlu_idx += 1
Пример #8
0
def commentExtract(videoId, count=-1):
    print("\nComments downloading")
    # 关闭http连接,增加重连次数

    page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key))
    while page_info.status_code != 200:
        if page_info.status_code != 429:
            print("Comments disabled")
            sys.exit()

        time.sleep(20)
        page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key))

    page_info = page_info.json()
    # test
    # print(page_info)

    comments = []
    co = 0
    for i in range(len(page_info['items'])):
        # 对3000赞以上的评论进行保留,可以根据需求更改
        if page_info['items'][i]['snippet']['topLevelComment']['snippet'][
                'likeCount'] >= 3000:
            comments.append(page_info['items'][i]['snippet']['topLevelComment']
                            ['snippet']['textOriginal'])
            co += 1
        if co == count:
            PB.progress(co, count, cond=True)
            return comments

    PB.progress(co, count)
    # INFINTE SCROLLING
    while 'nextPageToken' in page_info:
        temp = page_info
        page_info = requests.get(
            YOUTUBE_IN_LINK.format(videoId=videoId,
                                   key=key,
                                   pageToken=page_info['nextPageToken']))

        while page_info.status_code != 200:
            time.sleep(20)
            page_info = requests.get(
                YOUTUBE_IN_LINK.format(videoId=videoId,
                                       key=key,
                                       pageToken=temp['nextPageToken']))
        page_info = page_info.json()

        for i in range(len(page_info['items'])):
            comments.append(page_info['items'][i]['snippet']['topLevelComment']
                            ['snippet']['textOriginal'])
            co += 1
            if co == count:
                PB.progress(co, count, cond=True)
                return comments
        PB.progress(co, count)
    PB.progress(count, count, cond=True)
    print()

    return comments
Пример #9
0
def rsi(comp_idx, comp_name_conv):
    #    comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV

    #    nxt_id = 0
    nxt_id, comp_cur = det_cur_rsi(PSQL)
    total_stocks = len(comp_idx)
    cur_date = pg_query(
        PSQL.client,
        "SELECT max(date) FROM portfolio.rsi_day_14;")[0].values[0]
    for stock_num, (rh_id) in enumerate(comp_idx):
        progress(stock_num, total_stocks, status=comp_name_conv[rh_id])

        if rh_id in comp_cur.keys() and cur_date <= np.datetime64(
                comp_cur[rh_id]):
            continue

        comp_data = pg_query(
            PSQL.client,
            "SELECT date, close_price FROM portfolio.day_prices where rh_id = '%s';"
            % (rh_id))
        comp_data.rename(columns={0: 'date', 1: 'close_price'}, inplace=True)
        comp_data.sort_values('date', ascending=True, inplace=True)
        comp_data['diff'] = comp_data['close_price'].diff()
        comp_data['gains'] = comp_data['diff'].apply(lambda x: x
                                                     if x > 0 else 0)
        comp_data['losses'] = comp_data['diff'].apply(lambda x: x
                                                      if x < 0 else 0)
        comp_data.dropna(inplace=True)
        all_avg_gain = []
        all_avg_loss = []
        dates = []
        all_avg_gain.append(comp_data.iloc[:14]['gains'].mean())
        all_avg_loss.append(comp_data.iloc[:14]['losses'].mean())
        dates.append(comp_data.iloc[14]['date'])
        comp_data = comp_data.iloc[15:]
        for _date, cur_gain, cur_loss in comp_data[['date', 'gains',
                                                    'losses']].values:
            dates.append(_date)
            all_avg_gain.append((all_avg_gain[-1] * 13 + cur_gain) / 14)
            all_avg_loss.append((all_avg_loss[-1] * 13 + cur_loss) / 14)

        for date, gain, loss in zip(dates, all_avg_gain, all_avg_loss):
            if gain == 0:
                rsi = 0
            elif loss == 0:
                rsi = 100
            else:
                rsi = 100 - (100 / 1 + (gain / loss))

            if rh_id in comp_cur.keys() and comp_cur[rh_id] >= date:
                continue

            script = "INSERT INTO portfolio.rsi_day_14(rsi_day_14_id, rh_id, date, rsi) VALUES (%i, '%s', '%s', %.3f);" % (
                nxt_id, rh_id,
                datetime.strptime(
                    str(date).split('.')[0], '%Y-%m-%d %H:%M:%S'), rsi)
            pg_insert(PSQL.client, script)
            nxt_id += 1
            comp_cur[rh_id] = date
Пример #10
0
def random_search(x, y, model, params, trials=25, verbose=False):
    #    x, y, model, params, trials, verbose = x, y, pipe, param_dist, 5, False
    def _rand_shuffle(choices):
        """ Randomly select a hyperparameter for search """
        selection = {}
        for key, value in choices.items():
            selection[key] = random.choice(value)
        return (selection)

    search_scores = {}
    current_params = deepcopy(model).get_params()
    max_combinations = np.prod([len(i) for i in params.values()])
    if trials > max_combinations:
        trials = max_combinations
    for i in range(trials):
        progress(i, trials)
        iter_params = _rand_shuffle(params)
        iter_name = '_'.join("{!s}={!r}".format(k, v)
                             for (k, v) in iter_params.items())
        while iter_name in search_scores.keys():
            iter_params = _rand_shuffle(params)
            iter_name = '_'.join("{!s}={!r}".format(k, v)
                                 for (k, v) in iter_params.items())
        search_scores[iter_name] = {}
        estimator = deepcopy(model)
        for key, value in iter_params.items():
            estimator = estimator.set_params(**{key: value})
            search_scores[iter_name][key] = value
            if verbose:
                try:
                    print('%s: %.3f' % (key, value))
                except TypeError:
                    print('%s: %s' % (key, value))
        if estimator.get_params() == current_params:
            if verbose:
                print('Parameters already in default.')
            iter_score = -np.inf
        else:
            try:
                iter_score = cross_validate(x,
                                            y,
                                            estimator,
                                            only_scores=True,
                                            verbose=verbose)
            except:
                iter_score = -np.inf
        if verbose:
            print('- score: %.5f' % (iter_score))
        search_scores[iter_name]['score'] = iter_score
    progress(i + 1, trials)
    best_combination = max(search_scores,
                           key=lambda x: search_scores[x]['score'])
    return_results = {'score': search_scores[best_combination]['score']}
    return_results['parameters'] = {}
    best_params = dict(search_scores[best_combination])
    best_params.pop('score')
    return_results['parameters'] = best_params
    return (return_results)
Пример #11
0
def commentExtract(videoId, count=-1):
    print("Comments downloading")
    page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key))
    while page_info.status_code != 200:
        # 		if page_info.status_code != 429:
        if page_info.status_code == 403:
            # 			return(page_info.status_code)
            # 			print ("Comments disabled")
            # 			sys.exit()
            return 'Comments disabled'

        time.sleep(20)
        page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key))

    page_info = page_info.json()

    comments = []
    co = 0
    for i in range(len(page_info['items'])):
        comments.append(page_info['items'][i]['snippet']['topLevelComment']
                        ['snippet']['textOriginal'])
        co += 1
        if co == count:
            PB.progress(co, count, cond=True)
            print()
            return comments

    PB.progress(co, count)
    # INFINTE SCROLLING
    while 'nextPageToken' in page_info:
        temp = page_info
        page_info = requests.get(
            YOUTUBE_IN_LINK.format(videoId=videoId,
                                   key=key,
                                   pageToken=page_info['nextPageToken']))

        while page_info.status_code != 200:
            time.sleep(20)
            page_info = requests.get(
                YOUTUBE_IN_LINK.format(videoId=videoId,
                                       key=key,
                                       pageToken=temp['nextPageToken']))
        page_info = page_info.json()

        for i in range(len(page_info['items'])):
            comments.append(page_info['items'][i]['snippet']['topLevelComment']
                            ['snippet']['textOriginal'])
            co += 1
            if co == count:
                PB.progress(co, count, cond=True)
                print()
                return comments
        PB.progress(co, count)
    PB.progress(count, count, cond=True)
    print()

    return comments
Пример #12
0
def commentExtract(videoId, count=-1):
    print("Comments downloading")
    page_info = requests.get(
        'https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&maxResults=100&order=relevance&videoId=tCXGJQYZ9JA&key=AIzaSyDVrVDtn4F2tqpQDg8DeQ7KnL2avbkZ8Pk'
    )
    while page_info.status_code != 200:
        if page_info.status_code != 429:
            print("Comments disabled")
            sys.exit()

        time.sleep(20)
        page_info = requests.get(YOUTUBE_LINK.format(videoId=videoId, key=key))

    page_info = page_info.json()

    comments = []
    co = 0
    for i in range(len(page_info['items'])):
        comments.append(page_info['items'][i]['snippet']['topLevelComment']
                        ['snippet']['textOriginal'])
        co += 1
        if co == count:
            PB.progress(co, count, cond=True)
            print()
            return comments

    PB.progress(co, count)
    # INFINTE SCROLLING
    while 'nextPageToken' in page_info:
        temp = page_info
        page_info = requests.get(
            YOUTUBE_IN_LINK.format(videoId=videoId,
                                   key=key,
                                   pageToken=page_info['nextPageToken']))

        while page_info.status_code != 200:
            time.sleep(20)
            page_info = requests.get(
                YOUTUBE_IN_LINK.format(videoId=videoId,
                                       key=key,
                                       pageToken=temp['nextPageToken']))
        page_info = page_info.json()

        for i in range(len(page_info['items'])):
            comments.append(page_info['items'][i]['snippet']['topLevelComment']
                            ['snippet']['textOriginal'])
            co += 1
            if co == count:
                PB.progress(co, count, cond=True)
                print()
                return comments
        PB.progress(co, count)
    PB.progress(count, count, cond=True)
    print()

    return comments
def channelVideoStatisticsExtract(channel_id, multithreading=False, thread_num=32):
    try:
        title = getChannelName(channel_id)
    except Exception:
        title = channel_id

    if multithreading:
        with open('ChannelStatistics/' + title + '.csv', 'w', encoding='utf-8-sig', newline='') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()

        queue = Queue(maxsize=thread_num)
        statisticsQueue = Queue(maxsize=128)

        getterThreads = []
        for i in range(thread_num):
            t = threading.Thread(target=multithreadingStatisticsExtract, args=(queue, statisticsQueue,))
            t.daemon = True
            getterThreads.append(t)
            t.start()

        writerThread = threading.Thread(target=multithreadingSaveStastics, args=(title, statisticsQueue,))
        writerThread.start()

        getVideoIdByChannelId(channel_id, queue, True)
        queue.join()
        statisticsQueue.join()

        for i in range(thread_num):
            queue.put('--thread-end--')

        statisticsQueue.put('--thread-end--')

        for i in range(thread_num):
            getterThreads[i].join()
        writerThread.join()

    else:
        print('Getting channel video ids...')
        video_ids = getVideoIdByChannelId(channel_id)
        print('Getting all statistics')
        PB.progress(0, 100)
        statistics = []
        curr_num = 0
        total_num = len(video_ids)
        for video_id in video_ids:
            statistics.append(statisticsExtract(video_id))
            curr_num += 1
            PB.progress(curr_num, total_num)

        print('Wring statistics...', end='')
        saveStastics(statistics, title)
        print('done')
def fetching(fake_product_reviews, original_product_reviews):

    print("Phase 1...\n")
    pb.progress(0.0, 100)

    with open("../dataset/fake_pro.txt", "r") as file:
        for line in file:
            fake_product_reviews.append(line)

    with open("../dataset/original_pro.txt", "r") as file:
        for line in file:
            original_product_reviews.append(line)

    time.sleep(1)
Пример #15
0
def pop_coins(psql, coins):
#    psql = PSQL
    print('Updating coins')
    nxt_coin, cur_coins, psql = _det_current_(psql, 'coins')
    total_coins = len(coins)
    for coin_num, (coin) in enumerate([str(i['name']) for i in coins]):
        progress(coin_num, total_coins, status = coin)

        if coin not in cur_coins:
            script = "insert into binance.%s (coin_id, symbol) VALUES (%i, '%s')" % ('coins', nxt_coin, coin)
            pg_insert(psql.client, script)
            cur_coins.add(coin)
            nxt_coin += 1
    print('\n')
Пример #16
0
def stoch_osc_k(comp_idx, comp_name_conv):
    #    comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV
    #    all_next_id, all_comp_cur = det_cur_ema(PSQL)

    cur_date = pg_query(
        PSQL.client,
        "SELECT max(date) FROM portfolio.sto_osc_14;")[0].values[0]

    total_stocks = len(comp_idx)
    nxt_id, comp_cur = det_cur_ma(PSQL)
    for stock_num, (rh_id) in enumerate(comp_idx):
        progress(stock_num, total_stocks, status=comp_name_conv[rh_id])

        if rh_id in comp_cur.keys() and cur_date <= np.datetime64(
                comp_cur[rh_id]):
            continue


#        if rh_id in comp_cur.keys():
#            missing_days = pg_query(PSQL.client, "SELECT date, close_price FROM portfolio.day_prices where rh_id = '%s' and date > '%s';" % (rh_id, all_comp_cur[period][rh_id]))
#            if len(missing_days) == 0:
#                continue
#        else:
        comp_data = pg_query(
            PSQL.client,
            "SELECT date, close_price FROM portfolio.day_prices where rh_id = '%s';"
            % (rh_id))
        comp_data.rename(columns={0: 'date', 1: 'close_price'}, inplace=True)
        comp_data.sort_values('date', ascending=True, inplace=True)

        per_high = comp_data['close_price'].rolling(window=14).max()
        per_low = comp_data['close_price'].rolling(window=14).min()

        for date, current_close, lowest_low, highest_high in zip(
                comp_data.date.values, comp_data.close_price.values, per_low,
                per_high.values):
            if rh_id in comp_cur.keys() and comp_cur[rh_id] >= date:
                continue

            k = (current_close - lowest_low) / (highest_high -
                                                lowest_low) * 100
            if k != k:
                continue
            script = "INSERT INTO portfolio.sto_osc_14(sto_osc_14_id, rh_id, date, k) VALUES (%i, '%s', '%s', %.2f);" % (
                nxt_id, rh_id,
                datetime.strptime(
                    str(date).split('.')[0], '%Y-%m-%dT%H:%M:%S'), k)
            pg_insert(PSQL.client, script)
            nxt_id += 1
            comp_cur[rh_id] = date
Пример #17
0
def init_user_object_list(user_matrix):
	"""
	Creates a list of user objects from a user data matrix extracted from the database.
	Use when refreshing the list of users to search through.
	"""
	counter = 0
	user_object_list = []
	for user_row in user_matrix:
		temp_user_obj = User.User(user_row[:5], user_row[6:10],user_row[5],user_row[10:15], user_row[15:20],user_row[20:])
		user_object_list.append(temp_user_obj)
		# print progress. REMOVE FOR FINAL IMPLEMENTATION
		counter = counter + 1
		progress_bar.progress(counter, len(user_matrix))
	print "\n"
	return user_object_list	
Пример #18
0
def cci(comp_idx, comp_name_conv):
    #    comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV
    nxt_id, comp_cur = det_cur_cci(PSQL)
    total_stocks = len(comp_idx)
    cur_date = pg_query(
        PSQL.client,
        "SELECT max(date) FROM portfolio.cci_day_20;")[0].values[0]
    for stock_num, (rh_id) in enumerate(comp_idx):
        progress(stock_num, total_stocks, status=comp_name_conv[rh_id])

        if rh_id in comp_cur.keys() and cur_date <= np.datetime64(
                comp_cur[rh_id]):
            continue

        comp_ma = pg_query(
            PSQL.client,
            "SELECT date, avg_price from portfolio.ma_day_20 where rh_id = '%s'"
            % (rh_id))
        comp_ma.rename(columns={0: 'date', 1: 'ma'}, inplace=True)
        comp_ma.sort_values('date', ascending=True, inplace=True)
        comp_ma.set_index('date', inplace=True)
        comp_typ_price = pg_query(
            PSQL.client,
            "SELECT date, (high_price + low_price + close_price)/3 from portfolio.day_prices where rh_id = '%s'"
            % (rh_id))
        comp_typ_price.rename(columns={0: 'date', 1: 'typical'}, inplace=True)
        comp_typ_price.sort_values('date', ascending=True, inplace=True)
        comp_typ_price.set_index('date', inplace=True)
        comp_cci = comp_typ_price.join(comp_ma)
        comp_cci['typ_avg'] = comp_cci['typical'].rolling(window=20).mean()
        comp_cci['abs_dev'] = abs(comp_cci['typical'] - comp_cci['typ_avg'])
        comp_cci['mean_dev'] = comp_cci['abs_dev'].rolling(window=20).mean()
        comp_cci['cci'] = (comp_cci['typical'] -
                           comp_cci['ma']) / (comp_cci['mean_dev'] * .015)
        comp_cci.dropna(inplace=True)
        comp_cci.reset_index(inplace=True)
        for date, cci in comp_cci[['date', 'cci']].values:
            if rh_id in comp_cur.keys() and comp_cur[rh_id] >= date:
                continue
            if cci != cci or cci == np.inf or cci == -np.inf:
                continue
            script = "INSERT INTO portfolio.cci_day_20(cci_day_20_id, rh_id, date, cci) VALUES (%i, '%s', '%s', %.3f);" % (
                nxt_id, rh_id,
                datetime.strptime(
                    str(date).split('.')[0], '%Y-%m-%d %H:%M:%S'), cci)
            pg_insert(PSQL.client, script)
            nxt_id += 1
            comp_cur[rh_id] = date
Пример #19
0
def dividends(full_update = False):
    print(' ~~ Dividends ~~ ')

    PSQL = db_connection('psql')
    id_sym = pg_query(PSQL.client, 'select rh_id, rh_sym from portfolio.stocks')          
    current, next_id = det_cur_divs(PSQL)
    total_stocks = len(id_sym)
    for stock_num, (idx, sym) in enumerate(id_sym.values):
        progress(stock_num, total_stocks, status = sym)

        if not full_update and idx in current.keys():
            continue
        url = 'https://api.iextrading.com/1.0/stock/%s/dividends/5y' % (sym)        
        req = requests.request('GET', url)
        if req.status_code == 404:
            continue
        data = req.json()
        if len(data) == 0:
            continue
        for div in reversed(data):
            ex_date = datetime.strptime(div['exDate'], '%Y-%m-%d')
            if idx in current.keys() and ex_date <= current[idx]:
                continue
            if div['amount'] == '':
                continue
            amount = float(div['amount'])
            if div['paymentDate'] == '':
                continue
            payment_date = datetime.strptime(div['paymentDate'], '%Y-%m-%d')
            record_date = datetime.strptime(div['recordDate'], '%Y-%m-%d')
            if div['declaredDate'] == '':
                declared_date = 'null'
            else:
                declared_date = "'"+str(datetime.strptime(div['declaredDate'], '%Y-%m-%d'))+"'"
            
            script = "INSERT INTO portfolio.dividends(\
                    div_id, rh_id, ex_date, payment_date, record_date, declared_date, amount) \
                    VALUES (%i, '%s', '%s', '%s', '%s', %s, %.2f);" % (next_id, idx, ex_date, payment_date, record_date, declared_date, amount)
            pg_insert(PSQL.client, script)
            next_id += 1
            current[idx] = ex_date
            ex_date = None
            payment_date = None
            record_date = None
            declared_date = None
            amount = None
Пример #20
0
def pop_markets(psql, prices):
#    psql = PSQL
    print('Updating markets')
    _, cur_coins, psql = _det_current_(psql, 'coins')
    nxt_market, cur_markets, psql = _det_current_(psql, 'markets')
    total_markets = len(prices)
    for market_num, (market) in enumerate([str(i['symbol']) for i in prices]):
        progress(market_num, total_markets, status = market)

        if market not in cur_markets:
            formed_market = validate_pair(market, cur_coins)
            if formed_market:
                script = "insert into binance.%s (market_id, symbol, sell_coin, buy_coin) VALUES (%i, '%s', '%s', '%s')" % ('markets', nxt_market, market, formed_market['sell_coin'], formed_market['buy_coin'])
                pg_insert(psql.client, script)
                cur_markets.add(market)
                nxt_market += 1
    print('\n')
Пример #21
0
def obv_roc(comp_idx, comp_name_conv):
#    comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV
    cur_date = pg_query(PSQL.client, "SELECT max(date) FROM portfolio.adl_day;")[0].values[0]
    nxt_id, comp_cur = det_cur_obv(PSQL)
    total_stocks = len(comp_idx)
    for stock_num, (rh_id) in enumerate(comp_idx):
        progress(stock_num, total_stocks, status = comp_name_conv[rh_id])
        if rh_id in comp_cur.keys() and cur_date <= np.datetime64(comp_cur[rh_id]):
            continue
        
        comp_data = pg_query(PSQL.client, "SELECT date, close_price, volume FROM portfolio.day_prices where rh_id = '%s';" % (rh_id))
        comp_data.rename(columns = {0:'date', 1:'close_price', 2:'volume'}, inplace = True)
        comp_data.sort_values('date', ascending = True, inplace = True)     
        
        last_close = comp_data.iloc[0]['close_price']
        obvs = [comp_data.iloc[0]['volume']]
        dates = [comp_data.iloc[0]['date']]
        for date, close, volume in comp_data.iloc[1:].values:
            if close > last_close:
                obvs.append(obvs[-1] + volume)
            elif close < last_close:
                obvs.append(obvs[-1] - volume)
            elif close == last_close:
                obvs.append(obvs[-1])
            dates.append(date)
            last_close = close
        obv = pd.DataFrame([dates, obvs]).T
        obv.rename(columns = {0: 'date', 1: 'obv'}, inplace = True)
        
        obv['ra'] = obv['obv'].rolling(window = 14).mean()

        obv['obv_roc'] = (obv['obv'] - obv['ra'].shift()) / obv['ra'].shift()
        obv.dropna(inplace = True)
        
        for date, obv in obv[['date', 'obv_roc']].values:
            if rh_id in comp_cur.keys() and date <= np.datetime64(comp_cur[rh_id]):
                continue
            if obv != obv:
                continue
            if obv == np.inf or obv == -np.inf:
                continue
            script = "INSERT INTO portfolio.obv_14_roc(obv_14_roc_id, rh_id, date, obv_roc) VALUES (%i, '%s', '%s', %.4f);" % (nxt_id, rh_id, datetime.strptime(str(date).split('.')[0], '%Y-%m-%d %H:%M:%S'), obv)
            pg_insert(PSQL.client, script)
            nxt_id += 1
            comp_cur[rh_id] = date        
Пример #22
0
def day_prices():
    
    print(' ~~ Full Day Prices ~~ ')
    PSQL = db_connection('psql')
    current, next_id = det_cur_day_prices(PSQL, 'day')
    id_sym = pg_query(PSQL.client, 'select rh_id, rh_sym from portfolio.stocks')
    total_stocks = len(id_sym)
    trader = rs()
    login_data = trader.login()
        
    for stock_num, (idx, sym) in enumerate(id_sym.values):
#        if idx == '1d4d0780-ba27-4adc-ab12-0c3062fdf365':
#            asdfasdf
        progress(stock_num, total_stocks, status = sym)
        symbols = helper.inputs_to_set(sym)
        url = urls.historicals()
        payload = { 'symbols' : ','.join(symbols),
                    'interval' : 'day',
                    'span' : '5year',
                    'bounds' : 'regular'}
        data = trader.request_get(url,'results',payload)
        
        if data == [None]:
            continue
        for day in data[0]['historicals']:
            beg_date = datetime.strptime(day['begins_at'].replace('Z', '').replace('T', ' '), '%Y-%m-%d %H:%M:%S')
            if idx in current.keys() and beg_date <= current[idx]:
                continue
    
            open_price = float(day['open_price'])
            close_price = float(day['close_price'])
            high_price = float(day['high_price'])
            low_price = float(day['low_price'])
            volume = int(day['volume'])
            script = "INSERT INTO portfolio.day_prices(day_price_id, rh_id, date, open_price, close_price, high_price, low_price, volume) VALUES ('%s', '%s', '%s', %.2f, %.2f, %.2f, %.2f, %i);" % (next_id, idx, beg_date, open_price, close_price, high_price, low_price, volume)
            pg_insert(PSQL.client, script)
            next_id += 1  
            open_price = None
            close_price = None
            high_price = None
            low_price = None
            volume = None
    trader.logout()
Пример #23
0
def LEASE_NTC_INIT():
    current_page = get_web_page(
        LEASE_URL, urlJumpIp)  # return a dict of dict of list of dict
    total_rows = get_total_rows(current_page)

    page_count = 0
    row_data = []

    while page_count <= total_rows:
        data = get_info(current_page)
        row_data += data
        page_count += pageRow
        current_page = get_web_page(
            LEASE_URL + "&firstRow=" + str(page_count) + "&totalRows=" +
            str(total_rows), urlJumpIp)
        progress(page_count, total_rows, __file__)

    save(row_data)
    print(str(__file__) + " complete")
Пример #24
0
def judgement(inputt, output, arr, brr):

    co = 0
    print("Network testing:")
    for i in range(len(inputt)):
        st = np.resize(inputt[i], (1, len(inputt[i])))
        st = st / 255
        mid = nonlin(np.dot(st, arr))
        end = nonlin(np.dot(mid, brr))

        ind = np.argmax(end)
        if (ind == output[i]):
            co += 1
        if (i % 1000 == 0):
            prb.progress(i, len(inputt))
    prb.progress(len(inputt), len(inputt), cond=True)
    print('\n')

    print("Accuracy : ", ((co / len(inputt)) * 100), '%')
Пример #25
0
 def _participation(self, chat_id, message):
     output = ""
     for key, value in sorted(self._flood[chat_id].get_all().items()):
         output += self._get_name(key) + ": "
         if message == self._commands[12]:
             output += str(value) + '\n'
         output += progress(value, self._flood_amount[chat_id]) + '\n'
     if output == "":
         return "Пока всё тихо..."
     return output
Пример #26
0
def training(inputt, output):
    np.random.seed(1)

    # Weight Holder. IMPORTANT/TRAINEE/FUTURE
    arr = np.random.uniform(-1.0, 1.0, (len(inputt[0]), 20))  #transpose
    brr = np.random.uniform(-1.0, 1.0, (20, 10))  #same, duh!

    print("Network training:")

    for i in range(len(inputt)):
        a1 = np.resize(inputt[i], (1, len(inputt[i])))
        arr, brr = func(a1, output[i], arr, brr)

        if (i % 1000 == 0):
            prb.progress(i, len(inputt))
    prb.progress(len(inputt), len(inputt), cond=True)
    print()

    return arr, brr
Пример #27
0
def moving_average(comp_idx, comp_name_conv):
    #    comp_idx, comp_name_conv = COMP_IDX, COMP_NAME_CONV

    cur_date = pg_query(
        PSQL.client,
        "SELECT max(date) FROM portfolio.day_prices;")[0].values[0]
    all_next_id, all_comp_cur = det_cur_ma(PSQL)
    total_stocks = len(comp_idx)
    for stock_num, (rh_id) in enumerate(comp_idx):
        progress(stock_num, total_stocks, status=comp_name_conv[rh_id])
        for period in [200, 100, 50, 20, 10]:
            if rh_id in all_comp_cur[period].keys(
            ) and cur_date <= np.datetime64(all_comp_cur[period][rh_id]):
                continue

            comp_data = pg_query(
                PSQL.client,
                "SELECT date, close_price FROM portfolio.day_prices where rh_id = '%s';"
                % (rh_id))
            comp_data.rename(columns={
                0: 'date',
                1: 'close_price'
            },
                             inplace=True)
            comp_data.sort_values('date', ascending=True)
            date_idx = comp_data['date']
            dma = calc_mov_avg(comp_data, period)

            for date, avg_price in zip(date_idx.values, dma.values):
                if rh_id in all_comp_cur[period].keys(
                ) and date <= np.datetime64(all_comp_cur[period][rh_id]):
                    continue
                if avg_price != avg_price:
                    continue
                script = "INSERT INTO portfolio.ma_day_%s(ma_day_%s_id, rh_id, date, period, avg_price) VALUES (%i, '%s', '%s', %i, %.2f);" % (
                    period, period, all_next_id[period], rh_id,
                    datetime.strptime(
                        str(date).split('.')[0],
                        '%Y-%m-%dT%H:%M:%S'), period, avg_price)
                pg_insert(PSQL.client, script)
                all_next_id[period] += 1
                all_comp_cur[period][rh_id] = date
Пример #28
0
def _gen_indices():
    use_kws, kw_weights, use_conc, conc_weights, use_cat, cat_weights = find_weights(
    )

    _kw_index = {i: [] for i in use_kws}
    _conc_index = {i: [] for i in use_conc}
    _cat_index = {i: [] for i in use_cat}

    total_docs = len(data)
    for doc_num, (idx, v) in enumerate(data.items()):

        for doc_kw in v['keywords']:
            _kw_index[doc_kw['text'].replace(' ', '_').lower()] += [{
                'doc_id':
                idx,
                'score':
                doc_kw['relevance'] *
                kw_weights[doc_kw['text'].replace(' ', '_').lower()]
            }]

        for doc_conc in v['concepts']:
            _conc_index[doc_conc['text'].replace(' ', '_').lower()] += [{
                'doc_id':
                idx,
                'score':
                doc_conc['relevance'] *
                conc_weights[doc_conc['text'].replace(' ', '_').lower()]
            }]

        for doc_cat in v['categories']:
            _cat_index[doc_cat['label'].replace(' ', '_').lower()] += [{
                'doc_id':
                idx,
                'score':
                doc_cat['score'] *
                cat_weights[doc_cat['label'].replace(' ', '_').lower()]
            }]

        progress(doc_num + 1, total_docs)

    print('\n')
    return (_kw_index, _conc_index, _cat_index)
Пример #29
0
def pop_prices(psql, time_conv, coins):
#    psql, time_conv, coins = PSQL, TIME_CONV, COINS
    print('Updating prices')
    all_prices = pg_query(psql.client, 'select pr_symbol, max(pr_time_id) from binance.prices group by pr_symbol')
    price_conv = {k:v for k,v in all_prices.values}
    total_prices = len(coins)
    for price_num, (coin) in enumerate([i['name'] for i in coins]):    
        progress(price_num, total_prices, status = coin)
        for time in list(time_conv.keys()):
            if coin in price_conv.keys():
                if price_conv[coin] >= time_conv[time]:
                    break
            try:
                price_data = requests.get('https://min-api.cryptocompare.com/data/pricehistorical?fsym=%s&tsyms=USD&ts=%s'%(coin, time.strftime("%s"))).json()
            except:
                continue
            script = "insert into binance.prices (pr_symbol, pr_time_id, usd) VALUES \
            ('%s', %i, %s);" % (coin, time_conv[time], price_data[coin]['USD'])
            pg_insert(psql.client, script)   
    print('\n')
Пример #30
0
def test_scaler(clf, prev_score, x, y, verbose=False, prog=True):
    #   clf, prev_score, x, y, verbose, prog = log_clf, log_checkpoint_score, X, Y, False, True
    if prog:
        print('Searching for best scaler.')
    scores = {}
    model = deepcopy(clf)
    total_scales = 3
    cur_scale = 0
    for scale, name in zip(
        [StandardScaler(), MinMaxScaler(),
         RobustScaler()], ['Standard', 'MinMax', 'Robust']):
        if not verbose and prog:
            progress(cur_scale, total_scales)
        if model.steps[1][1].__class__ == scale.__class__:
            if verbose:
                print('%s Already Included' % (name))
                print('Score: %.5f' % (prev_score))
            cur_scale += 1
            continue
        scale_score = cross_validate(x,
                                     y,
                                     model.set_params(**{'scale': scale}),
                                     only_scores=True,
                                     verbose=verbose)
        if verbose:
            print('%s Score: %.5f' % (name, scale_score))
        scores[name] = {'scale': scale}
        scores[name]['score'] = scale_score
        cur_scale += 1
    if not verbose and prog:
        progress(cur_scale, total_scales)
    best_scale = max(scores, key=lambda x: scores[x]['score'])
    if scores[best_scale]['score'] > prev_score:
        if prog:
            print('Using %s.' % (best_scale))
        return (model.set_params(**{'scale': scores[best_scale]['scale']}),
                scores[best_scale]['score'])
    else:
        if prog:
            print('Keeping original.')
        return (clf, prev_score)
def channelCommentExtract(channel_id, multithreading=False, thread_num=16):
    if multithreading:
        global queue
        queue = Queue(maxsize=thread_num)
        for i in range(thread_num):
            t = threading.Thread(target=multithreadingCommentExtract)
            t.daemon = True
            t.start()

        getVideoIdByChannelId(channel_id, queue, True)
        queue.join()

    else:
        print('Getting channel video ids...')
        video_ids = getVideoIdByChannelId(channel_id)
        print('Getting all comments')
        PB.progress(0, 100)
        comments = []
        curr_num = 0
        total_num = len(video_ids)
        for video_id in video_ids:
            comments.append(commentExtract(video_id))
            curr_num += 1
            PB.progress(curr_num, total_num)
        PB.progress(curr_num, total_num)
        pass
Пример #32
0
def main():
    """The main function."""
    parser = OptionParser(version='%prog v' + __version__)
    parser.add_option('-c', '--config', default='config.ini',
        help='Location of config file (default: %default)', metavar='FILE')
    parser.add_option('-o', '--output', default='simplenotebak.json.txt',
        help='Output file name (default: %default)', metavar='FILE')
    (options, args) = parser.parse_args() 

    # set script's path and add '/' to end
    script_path = os.path.abspath(os.path.dirname(sys.argv[0])) + '/'

    if args:
        print 'debug: you wanted to run command: ' + args[0]

    config = SafeConfigParser()
    # can pass multiple files to config.read but it merges them, which we don't want
    if not config.read(options.config):
        # could not read file, try the script's path
        if not config.read(script_path + options.config):
            # Still can't find it, error out
            print 'Could not read any config file'
            sys.exit(1)
    email = config.get('simplenote', 'email')
    password = config.get('simplenote', 'password')

    sn = Simplenote(email, password)
    if not sn.login():
        print 'ERROR:', sn.last_error
        sys.exit(1)
    index = sn.full_index()
    #index = sn.index(5)
    if sn.has_error:
        print 'ERROR:', sn.last_error
        sys.exit(1)
    #print '- index -'
    #pp = pprint.PrettyPrinter(indent=4)
    #pp.pprint(index)
    print 'number of notes:', str(len(index))
    notecount = float(len(index))
    #print '- data -'
    notes = []
    i = 0
    #for note_meta in index['data']:
    for note_meta in index:
        note = sn.note(note_meta['key'])
        if sn.has_error:
            print 'ERROR:', sn.last_error
            sys.exit(1)
        notes.append(note)
        #pp.pprint(note)
        i += 1
        pb.progress(50, math.floor(float(i) / notecount * 100.0))
    print 'Number of api calls:', str(sn.api_count)
    # xml format
    #xmlnotes = ''
    #for note in notes:
    #    if 'xml' in locals():
    #        xml.append(dict_to_xml(note))
    #    else:
    #        xml = dict_to_xml(note)
    #xmlnotes = '<?xml version="1.0" encoding="UTF-8"?>' + "\n"
    #xmlnotes += ET.tostring(xml, encoding="UTF-8")
    #print xmlnotes
    # JSON format
    jsonnotes = []
    i = 0
    for note in notes:
        if note['deleted'] == 1:
            continue
        json_note_tmp = {'modifydate': format_date(float(note['modifydate']))}
        json_note_tmp.update({'createdate': format_date(float(note['createdate']))})
        json_note_tmp.update({'tags': note['tags']})
        json_note_tmp.update({'systemtags': note['systemtags']})
        json_note_tmp.update({'content': note['content']})
        json_note_tmp.update({'key': note['key']})
        jsonnotes.append(json_note_tmp)
    #print json.dumps(jsonnotes)
    fh = open(options.output, 'w')
    fh.write(json.dumps(jsonnotes))
    fh.close()
Пример #33
0
def insert_band(raster, layer, outName, bandIndex=None, substitute=False, \
                bandNames=None, outFormat=None, createOptions=None):
    
    
    """
    Insert raster layers into a multiband image at desired positions.
    
    Use:
    
    raster (string): the name of the output file (full path and file extension).
    
    layer (string / list): EITHER the name of the input raster containing the 
            layer to be inserted (full path and file extension)
            OR
            a list containing all (single-band) rasters to be inserted.
    
    outName (string): the name of the output file (full path and file extension).
    
    bandIndex (integer / list): EITHER a single value indicating the position
            for the layer
            OR
            a list of integers containing all positions.
            Per default, the layer(s) is/are inserted at the end.
    
    substitute (boolean): if set "True", the layers to be inserted will 
            substitute the existing one at their resprective positions. Defaults
            to "False", which means that the new layers will be inserted at the 
            desired position and all other bands will be appended afterwards.
            Substitution will not work if "bandIndex" ist not set!
    
    bandNames (list): a list of band names for the inserted layers. Defaults to 
            the names of the input files.
    
    outFormat (string): the desired format of the output file as provided by the 
            GDAL raster formats (see: http://www.gdal.org/formats_list.html). 
            Defaults to the format of the input raster.
    
    createOptions (list): a list of strings, containing advanced raster creation 
            options such as band interleave.
            
            Example:
                createOptions=['interleave=bil']
    """
    
    gdal.AllRegister()
    
     # get basic information and create output raster:
    ds = gdal.Open(raster, GA_ReadOnly)    
    bands = ds.RasterCount

    band = ds.GetRasterBand(1)
    x = ds.RasterXSize
    y = ds.RasterYSize
    proj = ds.GetProjection()
    transform = ds.GetGeoTransform()
    meta = ds.GetMetadata_Dict()
    
    if isinstance(layer, basestring) == True:
        layer = list(layer)
    
    if bandIndex != None:
        if isinstance(bandIndex, list) == False:
            newBands = list(bandIndex)
        else:
            newBands = bandIndex
    else:
        newBands = [i for i in xrange(bands + 1, bands + (len(layer) + 1))]
    
    if bandNames != None:
        if isinstance(bandNames, list) == False:
            bandNames = list(bandNames)
        else:
            bandNames = bandNames

    if outFormat == None:
        outFormat = ds.GetDriver().GetDescription()
    else:
        outFormat = outFormat
    
    driver = gdal.GetDriverByName(outFormat)
    
    if createOptions == None:
        ds_out = driver.Create(outName, x, y, bands + len(newBands), band.DataType)
    else:
        ds_out = driver.Create(outName, x, y, bands + len(newBands), band.DataType, \
                                createOptions)
                               
    ds_out.SetProjection(proj)
    ds_out.SetGeoTransform(transform)
    
    band = None
    
    if bandIndex != None:
        lyr_index = 1
        band_index = 1
        # insert single layers at desired positions
        for b in xrange(1, bands + 1):
            # progress bar:
            try:
                pr.progress(b, xrange(1, bands + 1))
            except:
                pass
    
            if b in newBands:
                insert = gdal.Open(layer[lyr_index - 1], GA_ReadOnly)
                band = insert.GetRasterBand(1)
                dtype = band.DataType
                nodata = band.GetNoDataValue()
                
                data = band.ReadRaster(0, 0, x, y, x, y, dtype)
                band_out = ds_out.GetRasterBand(band_index)
                
                # create new band names (either from original image or from user input):
                if bandNames == None:
                    name = string.join(['Band', str(band_index)], sep='_')
                    desc = band.GetDescription()
                    if len(desc) == 0:
                        desc = layer[lyr_index - 1]
                    meta[name] = desc
                    band_out.SetDescription(band.GetDescription())
                else:
                    name = string.join(['Band', str(band_index)], sep='_')
                    meta[name] = bandNames[lyr_index - 1]
                    band_out.SetDescription(bandNames[lyr_index - 1])
             
                band_out.SetNoDataValue(double(nodata))
                band_out.WriteRaster(0, 0, x, y, data, x, y, dtype)
                
                lyr_index += 1
                
                if substitute == False:
                    band_index += 1
                
                insert = None
                
                # then next band of original image:
                band = ds.GetRasterBand(b)
                dtype = band.DataType
                nodata = band.GetNoDataValue()
               
                data = band.ReadRaster(0, 0, x, y, x, y, dtype)
                band_out = ds_out.GetRasterBand(b)
                
                name = string.join(['Band', str(band_index)], sep='_')
                meta[name] = band.GetDescription()
                band_out.SetDescription(band.GetDescription())
                
                band_out.SetNoDataValue(double(nodata))
                band_out.WriteRaster(0, 0, x, y, data, x, y, dtype)
                
                band_index += 1
                
                band_out = None
                band = None
                
            else:
                band = ds.GetRasterBand(b)
                dtype = band.DataType
                nodata = band.GetNoDataValue()
               
                data = band.ReadRaster(0, 0, x, y, x, y, dtype)
                band_out = ds_out.GetRasterBand(b)
                
                name = string.join(['Band', str(band_index)], sep='_')
                meta[name] = band.GetDescription()
                band_out.SetDescription(band.GetDescription())
    
                band_out.SetNoDataValue(double(nodata))
                band_out.WriteRaster(0, 0, x, y, data, x, y, dtype)
                
                band_index += 1
            
                band_out = None
                band = None
                
    else:
        if substitute == True:
            raise ValueError("Option 'substitute' will only work with 'bandIndex' set!")
            
        lyr_index = 1
        band_index = 1
    # insert single layers at the end
        for b in xrange(1, bands + 1):
            band = ds.GetRasterBand(b)
            dtype = band.DataType
            nodata = band.GetNoDataValue()
           
            data = band.ReadRaster(0, 0, x, y, x, y, dtype)
            band_out = ds_out.GetRasterBand(b)
            
            name = string.join(['Band', str(band_index)], sep='_')
            meta[name] = band.GetDescription()
            band_out.SetDescription(band.GetDescription())
    
            band_out.SetNoDataValue(double(nodata))
            band_out.WriteRaster(0, 0, x, y, data, x, y, dtype)
            
            band_index += 1
        
            band_out = None
            band = None
            
        for i in xrange(0, len(newBands)):
            insert = gdal.Open(layer[i], GA_ReadOnly)
            band = insert.GetRasterBand(1)
            dtype = band.DataType
            nodata = band.GetNoDataValue()
                
            data = band.ReadRaster(0, 0, x, y, x, y, dtype)
            band_out = ds_out.GetRasterBand(band_index)
            
            # create new band names (either from original image or from user input):
            if bandNames == None:
                name = string.join(['Band', str(band_index)], sep='_')
                desc = band.GetDescription()
                if len(desc) == 0:
                    desc = layer[lyr_index - 1]
                meta[name] = desc
                band_out.SetDescription(band.GetDescription())
            else:
                name = string.join(['Band', str(band_index)], sep='_')
                meta[name] = bandNames[lyr_index - 1]
                band_out.SetDescription(bandNames[lyr_index - 1])
             
                band_out.SetNoDataValue(double(nodata))
                band_out.WriteRaster(0, 0, x, y, data, x, y, dtype)
                
                lyr_index += 1
                band_index += 1
        
    ds_out.SetMetadata(meta)
    
    ds_out = None
    ds = None
Пример #34
0
def stack_images(images, outfile, bands=None, of='GTiff', co=None, noData=0, bandNames=None):
    
    """
    Create a layerstack from all files within a directory.
    
    Use:
    
    images (list): a list of strings containing the full path and file 
            extension for the single raster files which shall be stacked 
            together. Will be stacked in the order as given by this list.
    
    outfile (string): the name of the output file (full path and file 
            extension).
    
    bands (list): a list containing the desired band numbers of the input 
            images which shall be used for stacking. By this, single bands out 
            of multiband images can be used. Defaults to "None", which means 
            that the first band will be used. If more than one band from the 
            same multiband image shall be used, the filename must be given 
            again in the "images"-list.
    
    of (string): the desired format of the output file as provided by the 
            GDAL raster formats (see: http://www.gdal.org/formats_list.html). 
            Defaults to 'GTiFF'.
    
    co (list): a list of strings, containing advanced raster creation 
            options such as band interleave.
            
            Example:
                createOptions=['interleave=bil']
        
    noData (int / float): the no-data value to be set. Defaults to the no-data 
            value of the first input file.
            
    bandNames (list): a list of band names for the output file. Defaults to the
            names of the input files.
    """
        
    gdal.AllRegister()
    # get basic information and create output raster:
    ds = gdal.Open(images[0], GA_ReadOnly)
    band = ds.GetRasterBand(1)
    proj = ds.GetProjection()
    transform = ds.GetGeoTransform()
    x = ds.RasterXSize
    y = ds.RasterYSize
    dtype = band.DataType
    # nodata
    if noData == None:
        nodata = band.GetNoDataValue()
    else:
        nodata = noData
        
    driver = gdal.GetDriverByName(of)
    # check for create options
    if co == None:
        ds_out = driver.Create(outfile, x, y, len(images), dtype)
    else:
        ds_out = driver.Create(outfile, x, y, len(images), dtype, co)
    # set projection
    ds_out.SetProjection(proj)
    ds_out.SetGeoTransform(transform)
    band = None
    ds = None
    # loop through all files and stack them:
    band_index = 1
    
    for name in images:
        # progress bar:
        try:
            pr.progress(name, images)
        except:
            pass
        # open imput image
        ds = gdal.Open(name, GA_ReadOnly)
        # check if specific bands are desired
        if bands == None:
            band = ds.GetRasterBand(1)
        else:
            band = ds.GetRasterBand(bands[band_index-1])
        # set data type
        dtype = band.DataType
        # read data
        data = band.ReadRaster(0, 0, x, y, x, y, dtype)
        band_out = ds_out.GetRasterBand(band_index)
        # create new band names (either from original image or from user input):
        if bandNames == None:
            band_out.SetDescription(band.GetDescription())
        else:
            band_out.SetDescription(bandNames[band_index - 1])
            
        band_out.SetNoDataValue(nodata)
        band_out.WriteRaster(0, 0, x, y, data, x, y, dtype)
        
        band_index += 1
        
        band = None
        ds = None
    
    ds_out = None
Пример #35
0
def ftp_download(url, user='', pw='', localPath=os.getcwd(), pattern=None):
    
    """
    Download files from a ftp-server. Can handle one level of subfolders.
    
    Use:
    
    url (string): the complete url of the ftp-server containing the desired 
            files.
    
    user (string): 'Username' for server login.
    
    pw (string): 'Password' for server login.
    
    localPath (string): the local directory, to which the files shall be 
            downloaded. Defaults to the current working directory.
    
    pattern (list): a list of strings containing the pattern of characters to look 
            for in the file names as a list. May be useful, if there are many 
            files from which only a selection shall be taken.
            Be aware that the list is taken as "either or", not as "and"!
            Examples:    
                pattern=['.txt'] (if all txt-files are desired)
                pattern=['_test_', '_demo'] (if all desired files contain 
                        either "_test_" or "_demo")
    """

    if url.__contains__('ftp://'):
        url = url.split('//')[1]
    # set up connection:
    ftp = ftplib.FTP(url.split('/')[0])
    ftp.connect()
    ftp.login(user, pw)
    # switch directory on server:
    root = url.lstrip(url.split('/')[0])
    # if root still contains leading "/", remove it
    if root[0] == "/":
        root = root[1:len(root)]
    ftp.cwd(root)
    # list all files and/or directories:
    listing = []
    ftp.retrlines('NLST', listing.append)

    print 'Downloading data...'
    # loop through the current directory and get the files:
    for l in listing:
        # progress bar        
        try:
            pr.progress(l, listing)
        except:
            pass
        
        # check if l is file or directory
        if os.path.splitext(l)[1] != '':    # file
            # check for desired pattern:
            if pattern == None:
                # download files:
                local_filename = os.path.join(localPath, l)
                lf = open(local_filename, 'wb')
                ftp.retrbinary('RETR ' + l, lf.write)
                lf.close()
            else:
                for p in pattern:
                    # download files:
                    if l.__contains__(str(p)):
                        local_filename = os.path.join(localPath, l)
                        lf = open(local_filename, 'wb')
                        ftp.retrbinary('RETR ' + l, lf.write)
                        lf.close()
        else:   # directory or file without extension
            # check if l is a file without extension
            try:
                ftp.cwd(l)  # directory
                # get file names:
                files = []
                ftp.retrlines('NLST', files.append)
                # check for desired pattern:
                if pattern == None:
                    for f in files:
                        # download files:
                        local_filename = os.path.join(localPath, f)
                        lf = open(local_filename, 'wb')
                        ftp.retrbinary('RETR ' + f, lf.write)
                        lf.close()
                else:
                    for p in pattern:
                        for f in files:
                            # download files:
                            if f.__contains__(str(p)):
                                local_filename = os.path.join(localPath, f)
                                lf = open(local_filename, 'wb')
                                ftp.retrbinary('RETR ' + f, lf.write)
                                lf.close()
                # go back up one level
                ftp.cwd('/..')
                ftp.cwd(root)
            except:     # file with no extension
                if pattern == None:
                    # download
                    local_filename = os.path.join(localPath, l)
                    lf = open(local_filename, 'wb')
                    ftp.retrbinary('RETR' + l, lf.write)
                    lf.close()
                else:
                    for p in pattern:
                        if l.__contains__(str(p)):
                            # download
                            local_filename = os.path.join(localPath, l)
                            lf = open(local_filename, 'wb')
                            ftp.retrbinary('RETR' + l, lf.write)
                            lf.close()
            

    ftp.close()
Пример #36
0
def apply_mask(image, maskImage, outName, outPath=None, of='Gtiff', co=None, keepWarp=False):
    """
    Apply a mask to a (multiband) image. The mask will be multiplied with the 
    input image. Both images have to cover the same area, but may differ in 
    resolution.

    Use:

    image (string): the image file (full path and file extension).

    maskImage (string): the image file which shall be used as mask (full path 
            and file extension).

    outName (string): the name of the output file (with file extension).

    outpath (string): the directory to which the output file will be written. 
            Defaults to the parent directory of the input image.

    of (string): the desired format of the output file as provided by the 
            GDAL raster formats (see: http://www.gdal.org/formats_list.html). 
            Defaults to 'GTiFF'.

    co (list): a list of strings, containing advanced raster creation 
            options such as band interleave.

            Example:
                co=['interleave=bil']
    keepWarp (boolean): if image and maskImage have different projection, 
            resolution or extent, the mask image will be warped using gdalwarp.
            If keepWarp is True, this warped mask image will not be deleted.
            Defaults to False (will be deleted).
    """

    # check if outfile exists and delete it:
    path = os.path.dirname(image)

    if outPath == None:
        pass
    else:
        path = outPath

    if outName in os.listdir(path):
        print 'Outfile already exists, will be overwritten!'
        os.remove(os.path.join(path, outName))

    gdal.AllRegister()

    driver = gdal.GetDriverByName(of)

    # get information of input raster
    ds = gdal.Open(image, GA_ReadOnly)
    cols = ds.RasterXSize
    rows = ds.RasterYSize
    bands = ds.RasterCount
    img_proj = ds.GetProjection()
    img_geo = ds.GetGeoTransform()

    # get information of mask
    mask_ds = gdal.Open(maskImage, GA_ReadOnly)
    mask_proj = mask_ds.GetProjection()
    mask_geo = mask_ds.GetGeoTransform()

    # check if inout image and mask share the same projection, resolution and extent
    if img_proj == mask_proj and img_geo == mask_geo:
        pass
    else:
        mask_ds = None
        # warp mask to match input image
        print 'Mask image does not match input image. Transforming mask now...'
        # get bounding box of input image
        minX = img_geo[0]
        maxY = img_geo[3]
        pixSizeX = img_geo[1]
        pixSizeY = img_geo[5]
        maxX = minX + (cols * pixSizeX)
        minY = maxY + (rows * pixSizeY)
        ## set up gdalwarp command
        tempfile = os.path.join(path, os.path.splitext(maskImage)[0] + '_warped' + os.path.splitext(maskImage)[1])
        if os._exists(tempfile):
            os.remove(tempfile)
        of = '-of GTiff'
        bb = '-te %s %s %s %s '%(minX, minY, maxX, maxY)
        res = '-tr %s %s '%(pixSizeX, pixSizeY)
        epsg = int(img_proj.split('''AUTHORITY["EPSG","''')[-1].strip('''"]]'''))
        proj = ''' -t_srs epsg:%s ''' %(epsg)
        warpcmd = string.join(['gdalwarp', of, bb, res, proj, maskImage, tempfile], sep=' ')
        # execute
        os.system(warpcmd)
        # open new file
        maskImage = tempfile
        mask_ds = gdal.Open(maskImage, GA_ReadOnly)

    # open mask image:
    maskBand = mask_ds.GetRasterBand(1)
    mask = maskBand.ReadAsArray(0, 0, mask_ds.RasterXSize, mask_ds.RasterYSize)
    mask_ds = None

    # create (empty) output image:
    if co == None:
        ds_out = driver.Create(os.path.join(path, outName), \
                               ds.RasterXSize, ds.RasterYSize, bands, \
                               ds.GetRasterBand(1).DataType)
    else:
        ds_out = driver.Create(os.path.join(path, outName), \
                                ds.RasterXSize, ds.RasterYSize, bands, \
                                ds.GetRasterBand(1).DataType, co)

    ds_out.SetProjection(ds.GetProjection())
    ds_out.SetGeoTransform(ds.GetGeoTransform())
    ds_out.SetMetadata(ds.GetMetadata())

    # apply mask to all bands:
    for b in xrange(1, bands + 1):
        band = ds.GetRasterBand(b)
        data = band.ReadAsArray(0, 0, cols, rows)
        data_out = mask * data
        # write masked band:
        b_out = ds_out.GetRasterBand(b)
        b_out.WriteArray(data_out)

        b_out = None

        try:
            pr.progress(b, xrange(1, bands + 1))
        except:
            pass

    # clean up
    ds_out = None
    ds = None
    if keepWarp == False:
        os.remove(tempfile)