示例#1
0
def save_headlines(headlines):
    """Save headlines to file"""

    for stock in headlines:

        entries = []

        with db() as (conn, cur):

            cur.execute(
                "SELECT word, replacement FROM dictionary WHERE stock=? ORDER BY LENGTH(word) DESC",
                [stock])
            dictionary = cur.fetchall()

        for source in headlines[stock]:

            for date in headlines[stock][source]:

                for headline in headlines[stock][source][date]:

                    cleaned_headline = clean_headline(headline, dictionary)

                    entries.append((stock, date, source, cleaned_headline,
                                    headline, -999))

        add_headlines(entries)
def get_embedding_matrix(tokenizer,
                         pretrained_file='glove.840B.300d.txt',
                         purge=False):
    """Load Vectors from Glove File"""
    print("Loading WordVecs...")

    embedding_matrix = np.zeros((vocab_size + 1, emb_size))

    if not pretrained_file:
        return embedding_matrix, None

    ## Load Glove File (Super Slow) ##

    glove_db = dict()

    with open(os.path.join('..', 'data', pretrained_file),
              'r',
              encoding="utf-8") as glove:

        for line in glove:

            values = line.split(' ')
            word = values[0].replace('-', '').replace('_', '').lower()
            coefs = np.asarray(values[1:], dtype='float32')

            if word.isalpha():
                glove_db[word] = coefs

    print('Loaded WordVectors...' + str(len(glove_db)))

    ## Set Embeddings ##

    for word, i in tokenizer.word_index.items():

        embedding_vector = glove_db.get(word)

        if embedding_vector is not None:

            embedding_matrix[i] = embedding_vector

        elif purge:

            with db() as (conn, cur):

                cur.execute(
                    "SELECT 1 FROM dictionary WHERE word=? AND stock=?",
                    [word, "none"])

                if len(cur.fetchall()) == 0:

                    print("Purge..." + word)

                    cur.execute("DELETE FROM headlines WHERE content LIKE ?",
                                ["%" + word + "%"])
                    conn.commit()

    return embedding_matrix, glove_db
示例#3
0
from flask import Flask, request, render_template
from Database import db, get_json, idf_db
from wtforms import Form, StringField
from collections import defaultdict
import math
import time

app = Flask(__name__)

t1 = time.time()
db = db()
j = get_json()
i = idf_db()
t2 = time.time()
print("Read time: " + str(t2 - t1))


class SearchBar(Form):
    word = StringField(label="Search: ")


@app.route('/', methods=['GET', 'POST'])
def index():
    word = SearchBar(request.form)
    return render_template("index.html", word=word)


@app.route('/Search')
def search():
    words = request.full_path[request.full_path.find('=') + 1:].split("+")
    t3 = time.time()
示例#4
0
def make_headline_to_effect_data():
    """
    Headline -> Effect
    
    Creates essentially the X, Y data for the embedding model to use
    when analyzing/encoding headlines. Returns a list of headlines and
    a list of corresponding 'effects' which represent a change in the stock price.
    """
    all_headlines, all_tick_hist, all_effects, test_indexes = [], [], [], []

    with db() as (conn, cur):

        for stock in stocks:

            ## Headline For Every Date ##

            cur.execute(
                "SELECT DISTINCT date FROM headlines WHERE stock=? ORDER BY date ASC LIMIT 1",
                [stock])
            start_date = cur.fetchall()[0][0]

            cur.execute(
                "SELECT DISTINCT date FROM ticks WHERE stock=? AND date >= ? ORDER BY date ASC",
                [stock, start_date])
            dates = [date[0] for date in cur.fetchall()]

            for date in tqdm_notebook(dates, desc=stock):

                ## Collect Headlines ##

                event_date = datetime.strptime(date, '%Y-%m-%d')

                cur.execute(
                    "SELECT date, source, rawcontent FROM headlines WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date DESC",
                    [stock, add_time(event_date, -14), date])
                headlines = [
                    (date, source, clean(content),
                     (event_date - datetime.strptime(date, '%Y-%m-%d')).days)
                    for (date, source, content) in cur.fetchall() if content
                ]

                if len(headlines) < sample_size:
                    continue

                ## Find corresponding tick data ##

                cur.execute(
                    """SELECT open, high, low, adjclose, volume FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date DESC""",
                    [
                        stock,
                        add_time(event_date, -30 - tick_window),
                        add_time(event_date, 0)
                    ])

                before_headline_ticks = cur.fetchall()[:tick_window]

                if len(before_headline_ticks) != tick_window:
                    continue

                cur.execute(
                    """SELECT AVG(adjclose) FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date""",
                    [stock,
                     add_time(event_date, 1),
                     add_time(event_date, 4)])

                after_headline_ticks = cur.fetchall()

                if len(after_headline_ticks) == 0:
                    continue

                previous_tick = before_headline_ticks[0][3]
                result_tick = after_headline_ticks[0][0]

                if not previous_tick or not result_tick:
                    continue

                tick_hist = np.array(before_headline_ticks)
                tick_hist -= np.mean(tick_hist, axis=0)
                tick_hist /= np.std(tick_hist, axis=0)

                ## Create training example ##

                probs = [1 / (headline[3] + 1) for headline in headlines]
                probs /= np.sum(probs)

                contents = [headline[2] for headline in headlines]

                num_samples = len(contents) // sample_size

                effect = [(result_tick - previous_tick) / previous_tick]

                for i in range(num_samples):

                    indexes = np.random.choice(np.arange(len(headlines)),
                                               sample_size,
                                               replace=False,
                                               p=probs)

                    sample = [headlines[i] for i in indexes]

                    if event_date > test_cutoff:  # Mark as Test Example
                        test_indexes.append(len(all_headlines))

                    all_headlines.append(sample)
                    all_tick_hist.append(tick_hist)
                    all_effects.append(effect)

    return all_headlines, np.array(all_tick_hist), np.array(
        all_effects), np.array(test_indexes)
示例#5
0
def predict(stock,
            model=None,
            toke=None,
            current_date=None,
            predict_date=None):

    import keras.metrics
    keras.metrics.correct_sign_acc = correct_sign_acc

    if not model or not toke:

        with open(os.path.join('..', 'models', 'toke2-tick.pkl'),
                  'rb') as toke_file:
            toke = pickle.load(toke_file)

        model = load_model(
            os.path.join('..', 'models',
                         'media-headlines-ticks-' + model_type + '.h5'))

    vocab_size = len(toke.word_counts)

    if not current_date:
        current_date = datetime.today()

    if not predict_date:
        predict_date = current_date + timedelta(days=1)

    all_headlines, all_tick_hist = [], []

    with db() as (conn, cur):

        event_date = current_date
        date = datetime.strftime(event_date, '%Y-%m-%d')

        cur.execute(
            "SELECT date, source, rawcontent FROM headlines WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date DESC",
            [stock, add_time(event_date, -14), date])
        headlines = [(date, source, clean(content),
                      (event_date - datetime.strptime(date, '%Y-%m-%d')).days)
                     for (date, source, content) in cur.fetchall() if content]

        ## Find corresponding tick data ##

        cur.execute(
            """SELECT open, high, low, adjclose, volume FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date DESC""",
            [
                stock,
                add_time(event_date, -30 - tick_window),
                add_time(event_date, 0)
            ])

        before_headline_ticks = cur.fetchall()[:tick_window]
        actual_current = before_headline_ticks[0][3]

        tick_hist = np.array(before_headline_ticks)
        tick_hist -= np.mean(tick_hist, axis=0)
        tick_hist /= np.std(tick_hist, axis=0)

        ## Create training example ##

        probs = [1 / (headline[3] + 1) for headline in headlines]
        probs /= np.sum(probs)

        contents = [headline[2] for headline in headlines]

        num_samples = len(contents) // sample_size

        for i in range(num_samples):

            indexes = np.random.choice(np.arange(len(headlines)),
                                       sample_size,
                                       replace=False,
                                       p=probs)

            sample = [headlines[i] for i in indexes]

            all_headlines.append(sample)
            all_tick_hist.append(tick_hist)

        ## Process ##

        encoded_headlines, toke = encode_sentences(all_headlines,
                                                   tokenizer=toke,
                                                   max_length=max_length)

        tick_hists = np.array(all_tick_hist)

        predictions = model.predict([encoded_headlines, tick_hists])[:, 0]

        prices = predictions * 0.023 * actual_current + actual_current

        return predictions, prices
示例#6
0
    ## Options ##

    stock = 'INTC'
    current_date = '2018-03-08'
    predict_date = '2018-03-09'

    ## Run ##

    predictions, prices = predict(
        stock,
        current_date=datetime.strptime(current_date, '%Y-%m-%d'),
        predict_date=datetime.strptime(predict_date, '%Y-%m-%d'))

    ## Find Actual Value ##

    with db() as (conn, cur):

        cur.execute(
            """SELECT adjclose FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date ASC LIMIT 1""",
            [
                stock,
                add_time(datetime.strptime(predict_date, '%Y-%m-%d'), 0),
                add_time(datetime.strptime(predict_date, '%Y-%m-%d'), 6)
            ])

        after_headline_ticks = cur.fetchall()
        try:
            actual_result = after_headline_ticks[0][0]
        except:
            actual_result = -1
def make_headline_to_effect_data():
    """
    Headline -> Effect
    
    Creates essentially the X, Y data for the embedding model to use
    when analyzing/encoding headlines. Returns a list of headlines and
    a list of corresponding 'effects' which represent a change in the stock price.
    """
    meta, headlines, tick_hists, effects = [], [], [], []

    with db() as (conn, cur):

        for stock in stocks:

            print("Fetching Stock..." + stock)

            ## Go through all the headlines ##

            cur.execute(
                "SELECT date, source, content, sentimentlabel FROM headlines WHERE stock=?",
                [stock])
            headline_query = cur.fetchall()

            for (date, source, content, label) in headline_query:

                if not content or not (5 <= content.count(' ') <= 40):
                    continue

                event_date = datetime.strptime(
                    date, '%Y-%m-%d')  # The date of headline

                ## Find corresponding tick data ##

                cur.execute(
                    """SELECT open, high, low, adjclose, volume FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date DESC""",
                    [
                        stock,
                        add_time(event_date, -30 - tick_window),
                        add_time(event_date, 0)
                    ])

                before_headline_ticks = cur.fetchall()[:tick_window]

                if len(before_headline_ticks) != tick_window:
                    continue

                cur.execute(
                    """SELECT AVG(adjclose) FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date""",
                    [stock,
                     add_time(event_date, 1),
                     add_time(event_date, 4)])

                after_headline_ticks = cur.fetchall()

                ## Create training example ##

                previous_tick = before_headline_ticks[0][3]
                result_tick = after_headline_ticks[0][0]

                if previous_tick and result_tick and len(
                        after_headline_ticks) > 0:

                    tick_hist = np.array(before_headline_ticks)
                    tick_hist -= np.mean(tick_hist, axis=0)
                    tick_hist /= np.std(tick_hist, axis=0)

                    if model_type == 'regression':

                        # Percent Diff (+Normalization Constant)
                        effect = [(result_tick - previous_tick) /
                                  previous_tick / 0.023]

                        # Use labels to adjust effect
                        if label in [-1, 1]:
                            if label == np.sign(effect[0]):
                                effect = [effect[0] * 4]
                            else:
                                effect = [effect[0] / 4]

                    else:

                        if result_tick > previous_tick:
                            effect = [1., 0.]
                        else:
                            effect = [0., 1.]

                        if label in [-1, 1]:
                            if np.sign(label) != np.sign(effect[0]):
                                effect = [.5, .5]

                    meta.append((source, event_date.weekday()))
                    headlines.append(content)
                    tick_hists.append(tick_hist)
                    effects.append(effect)

    return meta, headlines, np.array(tick_hists), np.array(effects)
def predict(stock,
            model=None,
            toke=None,
            current_date=None,
            predict_date=None,
            look_back=None):

    import keras.metrics
    keras.metrics.correct_sign_acc = correct_sign_acc

    if not model or not toke:

        with open(os.path.join('..', 'models', 'toke-tick.pkl'),
                  'rb') as toke_file:
            toke = pickle.load(toke_file)

        model = load_model(
            os.path.join('..', 'models',
                         'media-headlines-ticks-' + model_type + '.h5'))

    vocab_size = len(toke.word_counts)

    if not current_date:
        current_date = datetime.today()

    if not predict_date:
        predict_date = current_date + timedelta(days=1)

    if not look_back:
        look_back = 3

    pretick_date = add_time(current_date, -look_back)

    with db() as (conn, cur):

        ## Select Actual Stock Values ##

        cur.execute(
            """SELECT open, high, low, adjclose, volume FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date DESC""",
            [
                stock,
                add_time(current_date, -30 - tick_window),
                add_time(current_date, 0)
            ])

        before_headline_ticks = cur.fetchall()[:tick_window]
        actual_current = before_headline_ticks[0][3]

        cur.execute(
            """SELECT adjclose FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date ASC LIMIT 1""",
            [stock,
             add_time(predict_date, 1),
             add_time(predict_date, 5)])

        after_headline_ticks = cur.fetchall()

        tick_hist = np.array(before_headline_ticks)
        tick_hist -= np.mean(tick_hist, axis=0)
        tick_hist /= np.std(tick_hist, axis=0)

        ## Find Headlines ##

        cur.execute(
            "SELECT date, source, content FROM headlines WHERE date BETWEEN ? AND ? AND stock=?",
            [pretick_date, current_date, stock])
        headlines = cur.fetchall()

        ## Process ##

        meta, test_sents = [], []

        for (date, source, content) in headlines:

            meta.append(
                [source, datetime.strptime(date, '%Y-%m-%d').weekday()])
            test_sents.append(content)

        encoded_meta, test_encoded, _ = encode_sentences(meta,
                                                         test_sents,
                                                         tokenizer=toke,
                                                         max_length=max_length,
                                                         vocab_size=vocab_size)

        tick_hists = np.array([tick_hist] * len(headlines))

        predictions = model.predict([test_encoded, tick_hists,
                                     encoded_meta])[:, 0]

        prices = predictions * 0.023 * actual_current + actual_current

        return predictions, prices
示例#9
0
    def dataIsUpdate(self):
        isUpdate = False
        for i in self.threads[:-1]:
            isUpdate = i.isUpdated or isUpdate
            if i.isUpdated:
                i.isUpdated = False

        if not self.GuiIsActive():
            quit()

        if isUpdate:
            self.threads[-1].app.update()


database = db('sqlite:///weather.sqlite3')
database.addTable('weather',
                  date='string',
                  mint='float',
                  maxt='float',
                  location='string',
                  humidity='float',
                  feels_like="float")
database.createBase()

locations = ["Moscow", "Volgograd", "New York"]

try:
    ThreadsHandler(database, locations, 30).run()
except SystemExit:
    quit()
示例#10
0
from Database import Database_connection as db
database = db()
qy = """
INSERT INTO `berita_detail` (`judul`, `waktu`, `tag`, `isi`, `sumber`) VALUES
('Mentan harap tatanan normal baru pulihkan permintaan produk pertanian', '2020-06-07', '[normal baru,new normal,petani]', ' Dengan kebijakan normal baru utamanya di sektor pariwisata diharapkan dapat memulihkan permintaan produk pertanian Jakarta (ANTARA) - Menteri Pertanian Syahrul Yasin Limpo berharap tatanan normal baru dapat mendongkrak kesejahteraan petani dan memulihkan permintaan produk pertanian dengan dimulainya aktivitas hotel, restoran, katering (Horeka) dan perkantoran. Dampak yang ditimbulkan akibat pandemi ini masih dirasakan masyarakat, termasuk para petani. Faktor yang mempengaruhi petani yakni harga produk pertanian mengalami tekanan diakibatkan oleh panen raya musim tanam pertama. \"Kondisi ini menyebabkan deflasi kelompok bahan makanan dimana jumlah bahan pangan di lapangan banyak namun permintaan berkurang berakibat langsung dengan pendapatan petani,\" kata Syahrul dalam keterangan di Jakarta, Minggu. Selain itu, petani juga dihadapkan pada gangguan distribusi akibat Pembatasan Sosial Berskala Besar (PSBB), penurunan daya beli masyarakat, melemahnya sektor ekonomi yang terkait dengan sektor pertanian seperti Horeka dan perkantoran. Menurut Mentan, selama pandemi deflasi kelompok bahan makanan masih berimplikasi positif terhadap stabilitas sosial dan politik. Untuk mengurangi dampak ke pendapatan yang diterima petani, pemerintah memberikan bantuan sosial yang dapat mengkompensasi penurunan daya beli petani yang diakibatkan oleh penurunan harga produk pertanian. \"Dengan kebijakan normal baru utamanya di sektor pariwisata diharapkan dapat memulihkan permintaan produk pertanian sehingga dapat memperbaiki harga di tingkat petani,\" kata Syahrul. Kementerian Pertanian (Kementan) mencatat bahwa panen raya musim pertama sukses mengamankan stok pangan sehingga tidak terjadi gejolak kenaikan harga dan tersendatnya distribusi 11 bahan pokok khususnya dalam menghadapi Ramadhan dan Hari Raya Idul Fitri. Eksport komoditas pertanian juga masih tumbuh sebesar 12,6 persen. Namun demikian, Nilai Tukar Petani (NTP) diakui memang turun akibat pandemi. Syahrul menilai kondisi ini hanya sesaat. Menurut Mentan, kunci meningkatkan NTP adalah menyeimbangkan penawaran dan permintaan. Kebijakan pemerintah untuk membuka sektor pariwisata dan aktivitas perkantoran harus dipersiapkan dengan baik karena dengan keberhasilan kebijakan ini dapat berkontribusi terhadap perbaikan harga di tingkat petani. Menghadapi fenomena yang terjadi di kalangan petani, Mentan Syahrul mengatakan bahwa pihaknya sedang melakukan berbagai upaya salah satunya melakukan pengendalian dari sisi harga pertanian melalui koordinasi Bulog dan Kementerian Perdagangan. Pewarta: Mentari Dwi Gayati Editor: Ahmad Wijaya COPYRIGHT © ANTARA 2020 (adsbygoogle = window.adsbygoogle || []).push({}); ', 'antara')
"""
try:
    database.kursor.execute(qy)
    database.koneksi.commit()
    gen_id = database.kursor.lastrowid
    print(gen_id)
except Exception as ex:
    database.koneksi.rollback()
    print(ex)


def make_doc_embeddings(query_range=(None, '1776-07-04', '3000-01-01'),
                        use_extra_dates=True,
                        vec_model=None):
    """
    Create document embeddings from headlines
    """
    if not vec_model: print('Creating doc embeddings...')

    docs, labels = [], []

    class LabeledLineSentence:
        def __init__(self, docs, labels):
            self.docs = docs
            self.labels = labels

        def __iter__(self):
            for idx, doc in enumerate(self.docs):
                yield TaggedDocument(doc.split(),
                                     [self.labels[idx]])  # clean doc

    with db() as (conn, cur):

        q_stock, q_start, q_end = query_range

        for stock in stocks:

            ## Headline For Every Date ##

            if q_stock and q_stock != stock:
                continue

            cur.execute(
                "SELECT DISTINCT date FROM headlines WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date ASC",
                [stock, q_start, q_end])
            dates = [date[0] for date in cur.fetchall()]

            if use_extra_dates:  # True headline days not enough so we create additional querys
                new_dates = []
                for date in dates:
                    d = datetime.strptime(date, '%Y-%m-%d')
                    new_dates.append(add_time(d, -1))
                    new_dates.append(add_time(d, +1))
                dates.extend(new_dates)

            if not vec_model:  # Show loading bar only for training data
                date_iter = tqdm_notebook(dates, desc=stock)
            else:
                date_iter = iter(dates)

            for date in date_iter:

                ## Collect Headlines ##

                event_date = datetime.strptime(date, '%Y-%m-%d')

                cur.execute(
                    "SELECT date, source, rawcontent FROM headlines WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date ASC",
                    [stock, add_time(event_date, -doc_query_days), date])
                headlines = [
                    (date, source, clean(content),
                     (event_date - datetime.strptime(date, '%Y-%m-%d')).days)
                    for (date, source, content) in cur.fetchall() if content
                ]

                if len(headlines) == 0:
                    continue

                ## Create training example ##

                contents = [headline[2] for headline in headlines]

                doc = " **NEXT** ".join(contents)

                docs.append(doc)
                labels.append(stock + " " + date)

    vectors = {stock: {} for stock in stocks}

    doc_iter = LabeledLineSentence(docs, labels)

    if not vec_model:

        vec_model = Doc2Vec(documents=doc_iter, **doc2vec_options)
        #     vec_model = Doc2Vec(**doc2vec_options)
        #     vec_model.build_vocab(doc_iter)

        #     for epoch in range(100):
        #         vec_model.train(doc_iter, **doc2vec_options)
        #         vec_model.alpha -= 0.002
        #         vec_model.min_alpha = vec_model.alpha

        for label in labels:

            stock, date = label.split(" ")

            vectors[stock][date] = vec_model.docvecs[label]

    else:

        for tag_doc in doc_iter:

            vec = vec_model.infer_vector(
                tag_doc.words,
                alpha=doc2vec_options['alpha'],
                min_alpha=doc2vec_options['min_alpha'],
                steps=1000)

            stock, date = tag_doc.tags[0].split(" ")

            vectors[stock][date] = vec

    return vec_model, vectors, (docs, labels)
def make_tick_data(query_range=(None, '1776-07-04', '3000-01-01'), train=True):
    """
    Process historic tick data (high/low/close/etc..) into training examples
    """
    if train: print('Creating tick data...')

    tick_vecs = {stock: {} for stock in stocks}
    effect_vecs = {stock: {} for stock in stocks}

    with db() as (conn, cur):

        q_stock, q_start, q_end = query_range

        for stock in stocks:

            if q_stock and q_stock != stock:
                continue

            cur.execute(
                "SELECT DISTINCT date FROM headlines WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date ASC LIMIT 1",
                [stock, q_start, q_end])
            start_date = cur.fetchall()[0][0]

            cur.execute(
                "SELECT DISTINCT date FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date ASC",
                [stock, start_date, q_end])
            dates = [date[0] for date in cur.fetchall()]

            for date in dates:

                event_date = datetime.strptime(
                    date, '%Y-%m-%d')  # The date of headline

                ## Find corresponding tick data ##

                cur.execute(
                    """SELECT open, high, low, adjclose, volume FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date DESC LIMIT 52""",
                    [
                        stock,
                        add_time(event_date, -80),
                        add_time(event_date, 0)
                    ])

                before_headline_ticks = cur.fetchall()

                if len(before_headline_ticks) < tick_window:
                    continue

                if train:

                    cur.execute(
                        """SELECT adjclose FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date ASC LIMIT 1""",
                        [
                            stock,
                            add_time(event_date, 1),
                            add_time(event_date, 4)
                        ])

                    after_headline_ticks = cur.fetchall()

                    if len(after_headline_ticks) == 0 and train:
                        continue

                ## Create ##

                window_ticks = np.array(
                    list(reversed(before_headline_ticks[:tick_window]))
                )  # Flip so in chron. order
                fifty_ticks = np.array(
                    before_headline_ticks)  # Use last 50 ticks to normalize

                previous_tick = before_headline_ticks[0][3]

                if train:
                    result_tick = after_headline_ticks[0][0]

                if previous_tick:

                    window_ticks -= np.mean(fifty_ticks, axis=0)
                    window_ticks /= np.std(fifty_ticks, axis=0)

                    tick_vecs[stock][date] = window_ticks

                    if train:

                        if result_tick > previous_tick:
                            effect = [1., 0.]
                        else:
                            effect = [0., 1.]

                        effect_vecs[stock][date] = effect

    return tick_vecs, effect_vecs
def make_headline_to_effect_data():
    """
    Headline -> Effect
    
    Creates essentially the X, Y data for the embedding model to use
    when analyzing/encoding headlines. Returns a list of headlines and
    a list of corresponding 'effects' which represent a change in the stock price.
    """
    meta, headlines, tick_hists, effects, test_indices = [], [], [], [], []

    with db() as (conn, cur):

        for stock in stocks:

            print("Fetching Stock..." + stock)

            ## Go through all the headlines ##

            cur.execute(
                "SELECT date, source, rawcontent FROM headlines WHERE stock=?",
                [stock])
            headline_query = cur.fetchall()

            for (date, source, content) in headline_query:

                if not content:
                    continue

                content = clean(content)

                if not (5 <= content.count(' ') <= 35):
                    continue

                event_date = datetime.strptime(
                    date, '%Y-%m-%d')  # The date of headline

                ## Find corresponding tick data ##

                cur.execute(
                    """SELECT open, high, low, adjclose, volume FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date DESC LIMIT 52""",
                    [
                        stock,
                        add_time(event_date, -80),
                        add_time(event_date, 0)
                    ])

                before_headline_ticks = cur.fetchall()

                if len(before_headline_ticks) < tick_window:
                    continue

                cur.execute(
                    """SELECT adjclose FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date ASC LIMIT 1""",
                    [stock,
                     add_time(event_date, 1),
                     add_time(event_date, 4)])

                after_headline_ticks = cur.fetchall()

                ## Create training example ##

                if len(after_headline_ticks) == 0:
                    continue

                window_ticks = np.array(
                    list(reversed(before_headline_ticks[:tick_window]))
                )  # Flip so in chron. order
                fifty_ticks = np.array(
                    before_headline_ticks)  # Use last 50 ticks to normalize

                previous_tick = before_headline_ticks[0][3]
                result_tick = after_headline_ticks[0][0]

                if previous_tick and result_tick:

                    window_ticks -= np.mean(fifty_ticks, axis=0)
                    window_ticks /= np.std(fifty_ticks, axis=0)

                    # Percent Diff (/ Normalization Constant)
                    effect = [
                        (result_tick - previous_tick) / previous_tick / 0.023
                    ]

                    if event_date > test_cutoff:  # Mark as Test Example
                        test_indices.append(len(headlines))

                    meta.append((source, event_date.weekday()))
                    headlines.append(content)
                    tick_hists.append(window_ticks)
                    effects.append(effect)

    return meta, headlines, np.array(tick_hists), np.array(effects), np.array(
        test_indices)
示例#14
0
    currently not working
    """
    item_ID = request.form.to_dict()["hidden"]
    return render_template("add_another.html",
                           item=database.get_selected(item_ID)[:3])


@app.route('/add_another', methods=["POST"])
def add_another():
    """
    currently not working
    """
    addition = request.form.to_dict()
    item_ID = addition["hidden"]
    info = dict()
    for key, value in addition.items():
        if "hidden" not in str(key):
            info[key] = value
    database.add_esc(info, item_ID)
    return hello()


if __name__ == '__main__':
    item_attributes = ("name", "make", "model", "ID", "room", "teacher",
                       "condition", "manual", "movable", "description",
                       "hidden")
    log_values = ('name', 'to', 'from', 'tout', 'tin')
    database = db(log_values, item_attributes)
    app.run(host='0.0.0.0', port=8000, debug=True)
    print("", file=sys.stdout)
示例#15
0
def make_headline_to_effect_data():
    """
    Headline -> Effect
    
    Creates essentially the X, Y data for the embedding model to use
    when analyzing/encoding headlines. Returns a list of headlines and
    a list of corresponding 'effects' which represent a change in the stock price.
    """
    meta, headlines, effects = [], [], []

    with db() as (conn, cur):

        for stock in stocks:

            print("Fetching Stock..." + stock)

            ## Go through all the headlines ##

            cur.execute(
                "SELECT date, source, content FROM headlines WHERE stock=? AND LENGTH(content) >= 16",
                [stock])
            headline_query = cur.fetchall()

            for (date, source, content) in headline_query:

                event_date = datetime.strptime(
                    date, '%Y-%m-%d')  # The date of headline

                add_time = lambda e, days: (e + timedelta(days=days)).strftime(
                    '%Y-%m-%d')

                ## Find corresponding tick data ##

                cur.execute(
                    """SELECT AVG(adjclose) FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date""",
                    [stock,
                     add_time(event_date, -3),
                     add_time(event_date, 0)])

                before_headline_ticks = cur.fetchall()

                cur.execute(
                    """SELECT AVG(adjclose) FROM ticks WHERE stock=? AND date BETWEEN ? AND ? ORDER BY date""",
                    [stock,
                     add_time(event_date, 1),
                     add_time(event_date, 6)])

                after_headline_ticks = cur.fetchall()

                ## Create training example ##

                if len(before_headline_ticks) > 0 and len(
                        after_headline_ticks) > 0 and before_headline_ticks[0][
                            0] != None and after_headline_ticks[0][0] != None:

                    previous_tick = before_headline_ticks[-1][0]
                    result_tick = after_headline_ticks[0][0]

                    if model_type == 'regression':

                        # Percent Diff (+Normalization Constant)
                        effect = [(result_tick - previous_tick) /
                                  previous_tick / 0.0044]

                    else:

                        if result_tick > previous_tick:

                            effect = [1., 0.]

                        else:

                            effect = [0., 1.]

                    meta.append((source, event_date.weekday()))
                    headlines.append(content)
                    effects.append(effect)

    return meta, headlines, np.array(effects)
示例#16
0
clp.add_argument('-a', '--all-feeds', action='store_true', help='Show all \
        feeds in output even if they don\'t have any new rss items. Default \
        is not to show them')
clp.add_argument('-f', '--feed-id', help='Only use or check this feed id')
clp.add_argument('-l', '--list', action='store_true', help='List all Feeds')
clp.add_argument('-c', '--comments', action='store_true', help='Show link to \
        feed comments (if available)')
clp.add_argument('--html', action='store_true', help='Output rss list in \
        simple html')
clargs = clp.parse_args()

# If 'title' and 'url' then add the link to the db
if (clargs.title and clargs.url) or (clargs.url):
    ts = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    try:
        with db(dbc, db_feed_table) as db_add:
            db_add.add_feed(clargs.title, clargs.url, ts)
    except MySQLdb._exceptions.OperationalError:
        print ("No mysql server connection found. Exiting.")
        sys.exit()
    sys.exit()

# If output cmdline option is a filename
if clargs.output:
    try:
        sys.stdout = open(clargs.output, "w")
    except FileNotFoundError:
        print("No such file or directory'" + clargs.output + "'. Exiting")
        exit()

# Create the html header for font size etc if --html used