Пример #1
0
    def click_run_button(self):
        text = self.editor
        sentence_now = self.editor.toPlainText()
        print(sentence_now)
        result = predict(sentence_now,
                         self.trainedModel,
                         write_to_csv=False,
                         path=None)
        self.click_iter += len(result['prediction'])
        for i in range(len(result['prediction'])):
            sentence = QStandardItem()
            sentence.setText(result['sentence'][i])
            sentence.setColumnCount(1)

            prediction = QStandardItem()
            prediction_value = result['prediction'][i]
            prediction.setText(prediction_value)
            prediction.setColumnCount(2)

            sentiment_score = QStandardItem()
            sentiment_score_value = str(result['sentiment_score'][i].item())
            sentiment_score.setText(sentiment_score_value)
            sentiment_score.setColumnCount(3)

            self.model.appendRow([sentence, prediction, sentiment_score])

        self.listView.setModel(self.model)
Пример #2
0
def test_predict():
    from transformers import AutoModelForSequenceClassification
    sys.path.append(r'./deps/finBERT')

    from finbert.finbert import predict

    model_path = 'deps/finBERT/models/classifier_model/finbert-sentiment'
    model = AutoModelForSequenceClassification.from_pretrained(model_path,num_labels=3,cache_dir=None)

    sentimenter = CryptoSentimenter()
    sentimenter.scan('/r/cryptomarkets')
    #sentimenter.scan('/r/cryptocurrency')
    #sentimenter.scan('/r/cryptocurrencies')
    #sentimenter.scan('/r/cryptomoonshots')
    #sentimenter.scan('/r/satoshistreetbets')

    df = sentimenter.get_dataframe()

    # Testing first few sentences
    text = '\n'.join(list(df['text'])[:10])

    results = predict(text, model)
    results.to_csv('finbert_results.csv')
    #print('results {}'.format(results))

    print('Writing sentiment_summary.csv')
    with open('sentiment_summary.csv', 'w+') as f:
        df.to_csv(f)
Пример #3
0
def apply(input, output):
    if isinstance(input, dict):
        text = str(input['text'])
        result = finbert.predict(text, output['classification_model'])
        blob = TextBlob(text)
        result['textblob_prediction'] = [
            sentence.sentiment.polarity for sentence in blob.sentences
        ]
        resp = {
            "text": text,
            "ndarray": result.logit.mean().tolist(),
            "names": ["Positive", "Negative", "Neutral"]
        }

        md5 = hashlib.md5()
        md5.update(text.encode())
        filename = str(md5.hexdigest()) + '.json'

        with output['pach_client'].commit("raw_data", "master") as commit:
            output['pach_client'].put_file_bytes(
                commit, filename,
                json.dumps(format_prediction(resp)).encode('utf-8'))

        return resp
    else:
        raise Exception('input must be a json object.')
Пример #4
0

        
Пример #5
0
def get_sentiment(stock_ticker):
    """
    This function extracts the last 100 headlines of a given stock from finviz.com and then analyses their sentiment using the finBERT sentiment analyser.

    Parameters
    ----------
    stock_ticker: Stock ticker string (ex: NRZ)

    Returns
    -------
    finbert_sentiment: The sentiment score of the headlines listed in finviz.com for a given stock ticker

    """

    stock_ticker = re.sub(r"\.", "-", stock_ticker)

    finviz_url = 'https://finviz.com/quote.ashx?t='

    news_tables = {}
    
    url = finviz_url + stock_ticker

    req = Request(url=url, headers={'user-agent': 'my-app'})
    response = urlopen(req)

    html = BeautifulSoup(response, features='html.parser')
    news_table = html.find(id='news-table')
    news_tables[stock_ticker] = news_table
    
    
    parsed_data = []
    
    for stock_ticker, news_table in news_tables.items():
        for row in news_table.findAll('tr'):
            title = row.a.text
            date_data = row.td.text.split(' ')
    
            if len(date_data) == 1:
                time = date_data[0]
            else:
                date = date_data[0]
                time = date_data[1]
    
            parsed_data.append([stock_ticker, date, time, title])
    
    df = pd.DataFrame(parsed_data, columns=['stock_ticker', 'date', 'time', 'title'])
    
    model = AutoModelForSequenceClassification.from_pretrained("pytorch_model")
    bert_sentiment = lambda x: predict(x, AutoModelForSequenceClassification.from_pretrained("pytorch_model"))['sentiment_score'].mean()
    
    headlines = '. '.join(df['title'].tolist())
    finbert_sentiment = bert_sentiment(headlines)
    return(finbert_sentiment)
Пример #6
0
def predict_batch(N, data_path="CC_data/", save_path="output/"):
    model = BertForSequenceClassification.from_pretrained(args.model_path, num_labels=3, cache_dir=None)
    sentence_pred_df = []

    start_main = time.time()

    data = pickle.load(open(data_path + "BERTnews_all.p", "rb"))
    data = data.reset_index(drop=True)

    # for i in range(len(data)):
    for i in range(N):
        pred = predict(data.loc[i]['text'], data.loc[i]['index'], model, write_to_csv=False)
        sentence_pred_df.extend(pred)

    sentence_pred_df = pd.DataFrame.from_dict(sentence_pred_df)
    sentence_pred_df.to_csv(save_path + "BERTnews_preds.csv")

    end_main = time.time()
    print("TIME for batch_id: {}".format(round(end_main - start_main, 2)))
Пример #7
0
def item_sentiment_score(item_txt):
    if item_txt == "missing items":
        return item_txt

    else:
        item_txt = item_txt.replace('\n', ' ')
        model = BertForSequenceClassification.from_pretrained(
            'models/classifier_model/finbert-sentiment',
            num_labels=3,
            cache_dir=None)
        sentiment_df = predict(item_txt, model, write_to_csv=False)

        full_item_txt = " ".join(sentiment_df['sentence'])
        try:
            w_sentiment = pd.DataFrame(summarize(full_item_txt,
                                                 ratio=1.0,
                                                 scores=True),
                                       columns=['sentence', 'w'])
            sentiment_df = sentiment_df.merge(w_sentiment,
                                              how='inner',
                                              on='sentence')
            if len(sentiment_df) > 1:
                sentiment_df['w'] = sentiment_df['w'] / np.sum(
                    sentiment_df['w'])
                return np.sum(sentiment_df['w'] *
                              sentiment_df['sentiment_score'])
            else:
                ## nothing to join on
                return "join return empty except title row"
        except Exception as e:
            print(e)
            if len(full_item_txt) > 0:
                return "summa won't rank"
            else:
                return "full_text is empty"
            pass
Пример #8
0
from pathlib import Path
import datetime
import os

parser = argparse.ArgumentParser(description='Sentiment analyzer')

parser.add_argument('-a', action="store_true", default=False)

parser.add_argument('--text_path', type=str, help='Path to the text file.')
parser.add_argument('--output_dir',
                    type=str,
                    help='Where to write the results')
parser.add_argument('--model_path', type=str, help='Path to classifier model')

args = parser.parse_args()

if not os.path.exists(args.output_dir):
    os.mkdir(args.output_dir)

with open(args.text_path, 'r') as f:
    text = f.read()

model = BertForSequenceClassification.from_pretrained(args.model_path,
                                                      num_labels=3,
                                                      cache_dir=None)
#now = datetime.datetime.now().strftime("predictions_%B-%d-%Y-%I:%M.csv")
output = Path(args.text_path).stem + '_predictions.csv'
predict(text,
        model,
        write_to_csv=True,
        path=os.path.join(args.output_dir, output))
Пример #9
0
def score():
    text = request.get_json()['text']
    return (predict(text, model).to_json(orient='records'))
Пример #10
0
def score():
    text = request.get_json()['text']
    result_df = predict(text, model)
    print(result_df)
    return (result_df.to_json(orient='records'))
  "Remove numbers from the text"
  return text

if not os.path.exists(outputpath):
    os.mkdir(outputpath)

model = BertForSequenceClassification.from_pretrained(model_path,num_labels=3,cache_dir=None)
#now = datetime.datetime.now().strftime("predictions_%B-%d-%Y-%I:%M.csv")
sections = ['item1a', 'item7']
for file in files:
  df = pd.read_csv(inputpath+file)
  for i in range(len(df)):
    row = df.iloc[i]
    filename = row['Ticker'] + '_' + str(int(row["Year"]) +2000) + ".json"
    filepath = os.path.join(outputpath, filename)
    if os.path.exists(filepath):
     continue
    out = {}
    for section in sections:
      text = row[section]
      if type(text) == str:
        text = preprocessText(text)
        score = predict(text,model)
        out[section] = score
      else:
        break
    if len(out.keys())==2:
      f = open(filepath, "w+")
      f.write(json.dumps(out))
      f.close()
Пример #12
0
print('Getting list of model directories...')
models_list = get_models(args)
print(models_list)
make_output_directory(args.output_dir, get_models(args))

for model_path in models_list:
    model_dir = os.path.join(args.model_dir, model_path)
    print('Loading {} model....'.format(model_path))
    model = AutoModelForSequenceClassification.from_pretrained(model_dir,num_labels=3,cache_dir=None)
    print('{} model loaded. Processing .csv files...'.format(model_path))
    output_dir = os.path.join(args.output_dir, model_path)
    for df_filename in sentences_df_list:
        print('Processing {} using {} model...'.format(df_filename, model_path))
        df = pd.read_csv(os.path.join(args.input_dir, df_filename))
        text_inputs = df['sentence'].tolist()
        final_df = predict(text_inputs,model,write_to_csv=False)
        processed_csv_filename = '{}_{}'.format(model_path, df_filename)
        print('Finished processing {}. Saving as .csv file named {}...'.format(df_filename, processed_csv_filename))
        final_df.to_csv(os.path.join(output_dir, processed_csv_filename), index=False)
        
# if not os.path.exists(args.output_dir):
#     os.mkdir(args.output_dir)

# if args.text_path.endswith('.txt'):
#     with open(args.text_path,'r') as f:
#         text = f.read()
#         text_inputs = text.strip().split('\n\n')
# elif args.text_path.endswith('.csv'):
#     df = pd.read_csv(args.text_path)
#     text_inputs = df['sentence'].tolist()
Пример #13
0
def predict_news(x):
    pred = predict(x[1], x[0], model, write_to_csv=False)
    return pred