Exemplo n.º 1
0
def get_subreddit_sentiment_disagreement_res(query_filename, subreddit_name, subreddit_name_2, start_date, end_date):
    conn = db_helpers.connect_to_postgres()
    cur = conn.cursor()

    with open(query_filename, 'r') as q:
        subreddit_sentiment_disagreement_query = q.read().format(
            subreddit_name_2, start_date, end_date, subreddit_name, start_date, end_date
        ).strip()
        # subreddit_sentiment_disagreement_query = """select wsb.p_month, wsb.p_sentiment, wsb.c_sentiment, allOther.p_sentiment, allOther.c_sentiment from (select TO_CHAR(p.post_date,'Mon') as p_month, avg(p.sentiment_score) as p_sentiment, avg(c.sentiment_score) as c_sentiment from Post p join Comment c on p.post_id = c.post_id where p.subreddit_name in ('investing','stocks','StockMarket','economy') and p.post_date::DATE >= DATE '2018-12-01' and p.post_date::DATE < DATE '2019-12-01' group by p_month) as allOther, (select TO_CHAR(p.post_date,'Mon') as p_month, avg(p.sentiment_score) as p_sentiment, avg(c.sentiment_score) as c_sentiment from Post p join Comment c on p.post_id = c.post_id where p.subreddit_name='wallstreetbets' and p.post_date::DATE >= DATE '2018-12-01' and p.post_date::DATE < DATE '2019-12-01' group by p_month) as wsb where allOther.p_month = wsb.p_month and (wsb.p_sentiment > 0.0 and allOther.p_sentiment < 0.0) or (wsb.c_sentiment > 0.0 and allOther.p_sentiment < 0.0);"""
        print('Query to retrieve subreddit sentiment disagreement: ', subreddit_sentiment_disagreement_query)

        cur.execute(subreddit_sentiment_disagreement_query)
        conn.commit()

        final_res = []
        res = cur.fetchall()
        print('Results: ', res)
        for row in res:
            row_dict = {}
            row_dict['p_month'] = row[0]
            row_dict['p_sentiment'] = row[1]
            row_dict['c_sentiment'] = row[2]
            row_dict['other_p_sentiment'] = row[3]
            row_dict['other_c_sentiment'] = row[4]
            final_res.append(row_dict)

        cur.close()

    return final_res
def get_sentiment_popularity_correlation_res(query_filename, subreddit_name):
    conn = db_helpers.connect_to_postgres()
    cur = conn.cursor()

    with open(query_filename, 'r') as q:
        sentiment_popularity_query = q.read().format(subreddit_name).strip()
        print('Query to retrieve sentiment popularity correlation: ',
              sentiment_popularity_query)

        cur.execute(sentiment_popularity_query)
        conn.commit()

        final_res = []
        res = cur.fetchall()
        print('Results: ', res)
        for row in res:
            row_dict = {}
            row_dict['p_month'] = row[0]
            row_dict['p_upvotes'] = row[1]
            row_dict['p_sent'] = row[2]
            row_dict['t_comments'] = int(row[3])
            final_res.append(row_dict)

        cur.close()

    return final_res
def get_sentiment_count_res(company_name, subreddit):
    conn = db_helpers.connect_to_postgres()
    cur = conn.cursor()

    wsb_mongo_db = db_helpers.connect_to_mongo()
    post_keywords_collection = wsb_mongo_db['post_keywords']
    # comment_keywords_collection = wsb_mongo_db['comment_keywords']

    try:
        company_ticker = requests.get('https://s.yimg.com/aq/autoc?query={}&region=US&lang=en-US'.format(company_name)).json()['ResultSet']['Result'][0]['symbol'].lower()
        print('company ticker: ', company_ticker)
    except:
        print('Company ticker not found')
        company_ticker = ''
    query = [
        {"$project": {"data": {"$objectToArray":"$keywords"}, "post_id": 1}},
        {"$unwind": "$data"},
        {"$match": {"$or": [{"data.k": '{}'.format(company_name)}, {"data.k": '{}'.format(company_ticker)}]}},
        {"$project": {"post_id": 1, "_id":0}}
    ]

    document = post_keywords_collection.aggregate(query)
    positive_post = 0
    negative_post = 0
    upvote = 0
    number_post = 0
    dict_vote = {'positive_post': 0, 'negative_post': 0, 'average_upvote_wsb': 0,}

    for post in document: 
        number_post+=1
        post_id = post['post_id'] 
        try:
            cur.execute(
                'SELECT sentiment_score, num_upvotes, subreddit_name FROM post WHERE post_id = \'{}\''.format(post_id)
            )
            rows = cur.fetchall()
            for row in rows:
                if row[2] != subreddit:
                    break
                sentiment_score = float(row[0])
                upvote += int(row[1])
                if sentiment_score > 0 : positive_post += 1
                elif sentiment_score < 0: negative_post += 1

        except (Exception, psycopg2.DatabaseError) as error:
                print('ERROR with finding the postid: {}'.format(str(error)))
                raise Exception(error)

    if number_post > 0:
        average_upvote = upvote / number_post
    else:
        average_upvote = 0
    dict_vote = {'positive_post': positive_post, 'negative_post': negative_post, 'average_upvote': average_upvote}
    return dict_vote
def get_moving_volatility_res(stock_symbol, start_date, end_date):
    conn = db_helpers.connect_to_postgres()
    cur = conn.cursor()

    cur.callproc('calVolatility2',
                 ['1999-11-01 00:00:00', '2001-12-07 00:00:00'])
    conn.commit()

    start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d')

    res = cur.fetchall()
    final_res = []
    for row in res:
        row = row[0]
        curr_stock_symbol = row['stock_symbol']
        timestamp = datetime.datetime.strptime(row['ts'], '%Y-%m-%d %H:%M:%S')
        if curr_stock_symbol == stock_symbol and timestamp >= start_date and timestamp <= end_date:
            final_res.append(row)

    return final_res
def get_tick_data_from_db(stock_symbol, start_date, end_date):
    conn = db_helpers.connect_to_postgres()
    cur = conn.cursor()

    get_stock_tick_data_query = (
        'SELECT ts, close_price FROM StockTickData WHERE stock_symbol = \'{}\''
        ' AND ts::DATE >= DATE \'{}\' AND ts <= DATE \'{}\''
        ' ORDER BY ts ASC'.format(stock_symbol, start_date, end_date))

    print('Query to retrieve tick data: ', get_stock_tick_data_query)

    closing_prices = {stock_symbol: []}

    cur.execute(get_stock_tick_data_query)
    conn.commit()

    for ts, close_price in cur:
        price_at_time = {'date': ts, 'close_price': close_price}
        closing_prices[stock_symbol].append(price_at_time)

    cur.close()

    return closing_prices