Пример #1
0
    def on_click(self):
        filename = self.openFileNameDialog()
        if filename:
            self.label.setText('Processing File: {}'.format(
                filename.split('/')[-1]))
            self.button.setEnabled(False)
            # Load DataFrame
            df = read_CSV(filename)
            # Scale DataFrame
            dfs_scaled = pd.DataFrame(self.scaler.transform(df.values),
                                      index=df.index,
                                      columns=df.columns)
            # init hidden and cell state
            hidden = init_hidden(1, 1024, self.device)
            #Predict 30 laps
            target = self.scaler.inverse_transform(dfs_scaled.values)

            input = torch.tensor(dfs_scaled.iloc[0:10].values,
                                 dtype=torch.float)
            input = torch.reshape(input, (1, input.shape[0], input.shape[1]))
            input.to(self.device)

            out1, hidden = predict(input, self.predictor, hidden)

            input = torch.reshape(out1, (1, out1.shape[0], out1.shape[1]))
            out2, hidden = predict(input, self.predictor, hidden)

            input = torch.reshape(out2, (1, out2.shape[0], out2.shape[1]))
            out3, hidden = predict(input, self.predictor, hidden)

            out = torch.cat((out1, out2, out3), dim=0)
            out_np = out.cpu().numpy()
            out_np = self.scaler.inverse_transform(out_np)
            out_np = np.concatenate((target[0:10], out_np), axis=0)

            # Plotting
            out_np_time = [
                datetime.datetime(1970, 1, 1) +
                datetime.timedelta(seconds=time) for time in out_np[:, -1]
            ]
            plt.plot(out_np_time, label='prediction')
            target_time = [
                datetime.datetime(1970, 1, 1) +
                datetime.timedelta(seconds=time) for time in target[:, -1]
            ]
            plt.plot(target_time, label='original')
            plt.title('Estimated Lap Times')
            plt.ylabel("Time")
            plt.xlabel("Lap")
            plt.gca().yaxis.set_major_formatter(DateFormatter('%M:%S'))
            plt.tight_layout()
            plt.legend()
            plt.show()

            self.button.setEnabled(True)
def show_webcam(mirror=False):
    cam = cv2.VideoCapture(0)
    while True:
        time.sleep(1)
        ret_val, img = cam.read()
        if mirror: 
            img = cv2.flip(img, 1)
        cv2.imshow('my webcam', img)
        print('img.shape', img.shape)
        predict(img)
        if cv2.waitKey(1) == 27: 
            break  # esc to quit
    cv2.destroyAllWindows()
Пример #3
0
def post_process(Arguments):
    ''' 
    This is simply a distibutor for all functions involved in 
    postprocessing POSE results from previous calculations, which 
    is required to make sense of most POSE output
    '''

    if Arguments.PostProcess == "Validate":
        validate(Arguments)

    if Arguments.PostProcess == "Predict":
        predict(Arguments)

    return
Пример #4
0
def post_process(Arguments):
    ''' 
    This is simply a distibutor for all functions involved in 
    postprocessing POSE results from previous calculations, which 
    is required to make sense of most POSE output
    '''

    if Arguments.PostProcess == "Validate":
        validate(Arguments)

    if Arguments.PostProcess == "Predict":
        predict(Arguments)

    return 
def PredictTestData():
    token_vocab, label_vocab = load_predict_info()
    times = str(time.strftime("%d%m%Y%H%M%S"))
    filename = "PredictOutput_testing_" + times + ".xlsx"
    workbook = xlsxwriter.Workbook(filename)
    worksheet = workbook.add_worksheet()
    worksheet.write(0, 0, 'News Headlines')
    worksheet.write(0, 1, 'Polarity')
    worksheet.write(0, 2, 'Probability')

    rows = 1
    col = 0
    conn = psycopg2.connect(host="localhost",
                            database="StockMarket",
                            user="******",
                            password="******")
    cur = conn.cursor()
    postgreSQL_select_Query = """ select * from public."testdataset" """
    cur.execute(postgreSQL_select_Query)
    news = cur.fetchall()
    Predict = []
    for row in news:
        article = row[1]
        print(article)
        polarity, probability = predict(token_vocab, label_vocab, article)
        Predict.append(polarity)
        worksheet.write(rows, col, article)
        worksheet.write(rows, col + 1, polarity)
        worksheet.write(rows, col + 2, probability)
        rows += 1

    workbook.close()
    os.startfile(filename)
    return Predict
Пример #6
0
def upload():
    if request.method == 'POST':
        f = request.files['file']
        basepath = os.path.dirname(__file__)  # 当前文件所在路径
        upload_path = os.path.join(basepath, 'static\\uploads',
                                   f.filename)  #注意:没有的文件夹一定要先创建,不然会提示没有该路径
        f.save(upload_path)
        data = Image.open(upload_path)
        '''跟文件名没有关系'''
        pred = predict(4, 'models/vgg.ckpt', data)
        pred = np.squeeze(pred)
        pred = pred.tolist()
        fenlei = ''
        index = pred.index(max(pred))
        '''获取最大值进行判断'''
        if index == 0:
            fenlei = '草体'

        elif index == 2:
            fenlei = '篆体'

        elif index == 3:
            fenlei = '隶书'

        else:
            fenlei = ''
        return render_template('upload.html',
                               result=fenlei,
                               imgpath=upload_path)
    return render_template('upload.html')
    def predict(self):
        headline = self.Entry1.get()
        if headline == "":
            messagebox.showinfo("Error", "Please insert news headline")

        else:
            conn = psycopg2.connect(host="localhost",
                                    database="StockMarket",
                                    user="******",
                                    password="******")
            cur = conn.cursor()

            token_vocab, label_vocab = load_predict_info()
            polarity, probability = predict(token_vocab, label_vocab, headline)
            if polarity == 'POS':
                polarity = 'POSITIVE'

            if polarity == 'NEU':
                polarity = 'NEUTRAL'

            if polarity == 'NEG':
                polarity = 'NEGATIVE'

            self.Text1.delete('1.0', 'end')
            self.Text2.delete('1.0', 'end')
            self.Text1.insert('end', polarity)
            self.Text2.insert('end', probability)

            probability = str(probability)
            sql = """INSERT INTO public."testing_records"("test_news","test_polarity","test_probability") VALUES ( %s, %s, %s);"""
            data = (headline, polarity, probability)
            cur.execute(sql, data)
            conn.commit()
    def predictFile(self):
        file = self.Entry2.get()

        try:
            if file.lower().endswith(('.txt', '.csv')):
                conn = psycopg2.connect(host="localhost",
                                        database="StockMarket",
                                        user="******",
                                        password="******")
                cur = conn.cursor()
                times = str(time.strftime("%d%m%Y%H%M%S"))
                filename = "PredictOutput" + times + ".xlsx"
                workbook = xlsxwriter.Workbook(filename)
                worksheet = workbook.add_worksheet()
                worksheet.write(0, 0, 'News Headlines')
                worksheet.write(0, 1, 'Polarity')
                worksheet.write(0, 2, 'Probability')

                row = 1
                col = 0

                token_vocab, label_vocab = load_predict_info()
                with open(file, "r", encoding='utf-8') as f:
                    for headlines in f:
                        headlines = headlines.rstrip('\n\r')
                        polarity, probability = predict(
                            token_vocab, label_vocab, headlines)

                        if polarity == 'POS':
                            polarity = 'POSITIVE'

                        if polarity == 'NEU':
                            polarity = 'NEUTRAL'

                        if polarity == 'NEG':
                            polarity = 'NEGATIVE'

                        probability = str(probability)
                        sql = """INSERT INTO public."testing_records"("test_news","test_polarity","test_probability") VALUES ( %s, %s, %s);"""
                        data = (headlines, polarity, probability)
                        cur.execute(sql, data)

                        print(headlines, polarity, probability)
                        worksheet.write(row, col, headlines)
                        worksheet.write(row, col + 1, polarity)
                        worksheet.write(row, col + 2, probability)
                        row += 1

                conn.commit()
                workbook.close()
                os.startfile(filename)

            else:
                messagebox.showinfo("Error",
                                    "Please insert the correct format file")
        except:
            messagebox.showinfo(
                "Error", "The file inserted had some error, please try again.")
    def predict(self):
        #先将图片转为PIL形式

        image = self.image
        pred = predict(image)
        self.label_4.setText('Predicting')
        #pred=predict(image)
        self.label_4.setText('Predicted')
        self.label_2.setText(str(pred.numpy().item()))
Пример #10
0
    def predict(self):
        #先将图片转为PIL形式

        image = self.image
        pred = predict(image)
        self.label_4.setText('Predicting')
        #pred=predict(image)
        self.label_4.setText('Predicted')
        self.label_2.setText(CLASSES[pred])
Пример #11
0
def onclick(args):
    if args == 1:
        preprocess()
    elif args == 2:
        train()
    elif args == 3:
        test()
    elif args == 4:
        show_layer1_filters()
        show_layer2_filters()
        show_number_of_learned_parameters()
    elif args == 5:
        pick_sample_test_data()
        pick_sample_train_data()
    else:
        path = tk.filedialog.askopenfile(title="Select Image File", filetypes=(("jpeg files", "*.jpg"), ("all files", "*.*")))
        print(path)
        try:
            predict(path.name)
        except:
            print("Error While Reading File")
Пример #12
0
def solve_expression():
    if request.method == "POST":
        try:
            file = request.files["image"]
            img_byte = file.read()
            img = cv2.imdecode(np.frombuffer(img_byte, np.uint8), -1)
            mask = predict(img)
            mask_path = "results/mask" + file.filename
            cv2.imwrite(mask_path, mask*255)
            return {
                "filename": mask_path
            }
        except:
            handle_404()
def kernel_variation():
    accuracy_sum = 0
    model_count = 3

    accuracy = {}

    for trained_model in range(model_count):
        net = Net(kernel_size=7, conv_output_size=2)
        model = 'test' + str(trained_model)
        train(net, model_name=model)
        accuracy[trained_model] = predict(net, model_name=model)
        accuracy_sum += accuracy[trained_model]

    accuracy['average_accuracy'] = accuracy_sum / model_count
    print(accuracy)
Пример #14
0
def PredictTrainData():
    token_vocab, label_vocab = load_predict_info()
    conn = psycopg2.connect(host="localhost",
                            database="StockMarket",
                            user="******",
                            password="******")
    cur = conn.cursor()
    postgreSQL_select_Query = """ select * from public."traindataset" """
    cur.execute(postgreSQL_select_Query)
    news = cur.fetchall()
    Predict = []
    for row in news:
        article = row[1]
        polarity, probability = predict(token_vocab, label_vocab, article)
        Predict.append(polarity)

    return Predict
Пример #15
0
def handle_location(message):
    if (message.text.strip() == '/place') :
        bot.send_message(message.chat.id, 'okay there is  example :<br>"<b>\n/place Macdondals in centre of London </b> "', parse_mode='HTML')
    else: 
        file =  open('data.txt', 'r')
        print(file.read())
        if (len(file.read()) > 0 ):
            result_message = '<b>Sorry , Dude.</b>\nFirstlu you should\nshare location with me.\n📍 /set_location'
        else:
            file = open('data.txt', 'r')
            result_message = '🔮loading ... '
            bot.send_message(message.chat.id, result_message, parse_mode='HTML')
            keyword = message.text[7:]
            language = 'en'
            url = 'https://maps.googleapis.com/maps/api/place/textsearch/json?&query={0}&language={1}&key={2}'.format(
                 keyword, language, config.gogole_map_api_key)
            req = requests.get(url)
            # print(req.json()['results'])
            if req.json()['status'] == 'ZERO_RESULTS' :
                bot.send_message(message.chat.id, '<b>Not found </b> 🤬' ,parse_mode='HTML')
            else:
                for result in req.json()['results'][::6]:
                    vin = '<b>'+ result['name'] +'</b>\n📍'+ result['formatted_address']
                    url = 'https://maps.googleapis.com/maps/api/place/details/json?placeid={0}&key={1}'.format(result['place_id'], config.gogole_map_api_key)
                    detail_req = requests.get(url)
                    # print(detail_req.json()['status'])
                    if detail_req.json()['status'] == 'OK' :
                        # print(detail_req.json()['result']['reviews'])
                        if ( 'reviews' in detail_req.json()['result']):
                            reviews = []
                            for review in detail_req.json()['result']['reviews']:
                                if (review['language'] == 'en'):
                                    reviews.append(review['text'])
                            predicteds = predict(reviews)
                            if (len(predicteds) > 0):
                                vin += '\nThere are <b>' + str(len(predicteds)) + '</b> review(s).'
                                bot.send_message(message.chat.id, vin, parse_mode='HTML')
                                print('===========')
                                print(predicteds)
                                print('===========')
                    else :
                        vin += '\nSorry , this venue has no any reviews.'
                        bot.send_message(message.chat.id, vin, parse_mode='HTML')
Пример #16
0
def kairec(upload_path):
    data = Image.open(upload_path)
    '''跟文件名没有关系'''
    pred = predict(5, 'kaimodels/vgg.ckpt', data)
    pred = np.squeeze(pred)
    pred = pred.tolist()
    fenlei = ''
    index = pred.index(max(pred))
    '''获取最大值进行判断'''
    if index == 0:
        fenlei = "楷体(" + "欧体" + ")"
    elif index == 1:
        fenlei = "楷体(" + "赵体" + ")"
    elif index == 2:
        fenlei = "楷体(" + "柳体" + ")"
    elif index == 3:
        fenlei = "楷体(" + "魏碑" + ")"
    elif index == 4:
        fenlei = "楷体(" + "颜体" + ")"
    return fenlei
Пример #17
0
def uploadTestTemData():
    check_state = checkResponseFile(request.files)
    if check_state != 1:
        return check_state
    file = request.files['file']

    fileData = file.read()
    fileName = getFileMD5(fileData)

    originSavePath = os.path.join(TEM_SAVE_PATH, fileName)
    resizeOutputPath = os.path.join(TEM_LOW_SAVE_PATH, fileName)

    if not os.path.exists(originSavePath):
        writeFile(originSavePath, fileData)
        if resizeMovie(originSavePath, resizeOutputPath) != 0:
            removeFile(originSavePath)
            removeFile(resizeOutputPath)
            return jsonify({'code': -5, 'msg': 'Resize video failed'})

    result = predict([resizeOutputPath], utilDict)

    return jsonify({'code': 0, 'result': result, 'msg': 'Predict done'})
def fc_variation():
    print('Fully Connected Layer Size Variation')

    # Combinations of fully connected layer sizes
    fc1_size = [120, 10, 200]
    fc2_size = [84, 10, 120]

    # result dictionary
    accuracy = {}

    # Iterate through all the fully connected layer sizes
    for fc1 in fc1_size:
        for fc2 in fc2_size:
            # create an instance of the neural network
            net = Net(fc1_size=fc1, fc2_size=fc2)
            model = 'fc_variation' + str(fc1) + '_' + str(fc2)

            # train the model
            train(net, model_name=(model))

            # measure model accuracy with testing dataset
            accuracy[model] = predict(net, model_name=model)

    print(accuracy)
Пример #19
0
 def setCategoryEnglish(self, article):
     import sys
     sys.path.insert(0, "../EnglishTextCategorization/")
     from Predict import predict
     predictedCategory = predict(article)
     return predictedCategory
# -*- coding: utf-8 -*-
"""
Spyder Editor

This is a temporary script file.
"""
from model import create_model
from DataGeneration import dataGeneration
from Checkpoint import createCheckpoint
from Performance import plot, calculate
from training import train
from Predict import predict

loadPreviouslySavedWeights = 1

network = create_model()

dataSet = dataGeneration()

cp_callback = createCheckpoint()

train(network, dataSet, cp_callback, loadPreviouslySavedWeights)

calculate(network, dataSet)

predict(network, dataSet)
Пример #21
0
from python_to_mysql import get_categories, add_category, get_prints, set_print_categories
from python_to_mysql import mock_classifier  # classifier returning random results for quick tests
from Predict import predict

print("took", round(time.time() - t1), "s to load dependencies")

### Classify the articles

print("\nLoading articles from databaes...")
t1 = time.time()
x_ids, x_titles, x_booktitles, x_keywords = get_prints()
print("took", round(time.time() - t1), "s to load", len(x_ids), "prints")

print("\nPredicting articles categories...", flush=True)
t1 = time.time()
prediction = predict(x_titles, x_keywords, x_booktitles)
print("took", round(time.time() - t1), "s to predict", len(prediction),
      "categories")

print("\nSaving predicted categories in databaes...", flush=True)
t1 = time.time()
set_print_categories(x_ids, prediction)
print("took",
      round(time.time() - t1),
      "s to save",
      len(prediction),
      "predictions",
      flush=True)

print("\nClassification done!\n")
Пример #22
0
    def scrape(self, response):
        single_url = 'https://www.malaysiastock.biz/Corporate-Infomation.aspx?securityCode='
        #--- retrieve the tokens and labels
        token_vocab, label_vocab = load_predict_info()
        article = NewsItem()
        i = 0
        alldata = []
        imgsrc = 0
        StockCode = response.meta.get('code')

        completeurl = single_url + StockCode
        conn = psycopg2.connect(host="localhost",
                                database="StockMarket",
                                user="******",
                                password="******")
        cur = conn.cursor()

        #x = HtmlXPathSelector(response)
        r = requests.get(completeurl)
        #--- crawl the news
        all_links = response.css('.tablelist td span a')
        # iterate over links
        print(completeurl, '\n')
        for sel in all_links:
            try:
                html_as_string = r.text
                soup = BeautifulSoup(html_as_string, 'html.parser')
                article['headline'] = (soup.select(".line td span a"))[i].text
                article['link'] = response.css('.tablelist td span a').xpath(
                    '@href').extract()[i]
                article['code'] = StockCode
                # --- filter the news headlines that are not in english language
                if re.findall('[\u4e00-\u9fff]+', article['headline']):
                    if "News" in article['link']:
                        imgsrc = imgsrc + 1

                # --- filter the news headlines to prevent duplication problem
                else:
                    postgreSQL_select_Query = """ select * from public."company_news" """
                    cur.execute(postgreSQL_select_Query)
                    news = cur.fetchall()
                    Newsexist = False
                    for row in news:
                        if article['headline'] == row[1] and article[
                                'code'] == row[5]:
                            Newsexist = True
                            if "News" in article['link']:
                                imgsrc = imgsrc + 1

                    # --- if the news headline does not exist in the db, then predict and store
                    if Newsexist == False:
                        singledata = []

                        if "Blog" in article['link']:
                            print(article['link'])
                            b = requests.get(article['link'])
                            html_as_string = b.text
                            p = BeautifulSoup(html_as_string, 'html.parser')
                            # article['source'] = response.css('.line td a').xpath('text()').extract()
                            article['source'] = p.find(
                                'label', {
                                    'id': 'MainContent2_lbAuthorProfile'
                                }).find('a').text
                            data = p.find('label', {
                                'id': 'MainContent2_lbAuthorProfile'
                            }).text
                            unusedata, date = data.split('e:')
                            date = date.strip()
                            date = str(date)
                            for item in date.splitlines():
                                d = parser.parse(item)
                                article['time'] = d.strftime("%Y-%m-%d")

                            polarity, probability = predict(
                                token_vocab, label_vocab, article['headline'])
                            probability = str(probability)
                            singledata.append(article['headline'])
                            singledata.append(article['link'])
                            singledata.append(polarity)
                            singledata.append(probability)
                            singledata.append(article['code'])
                            singledata.append(article['source'])
                            singledata.append(article['time'])
                            alldata.append(singledata)
                            print(i, article['headline'])
                            print('link:', article['link'])
                            print('published time:', article['time'])
                            print('polarity:', polarity)
                            print('source:', article['source'])

                        else:
                            print(article['link'])
                            article['source'] = response.css(
                                '.line td a img').xpath(
                                    '@src').extract()[imgsrc]
                            if 'thestar' in article['source']:
                                s = requests.get(article['link'])
                                html_as_string = s.text
                                s = BeautifulSoup(html_as_string,
                                                  'html.parser')
                                date = s.find('p', {'class': 'date'})

                                if date == None:
                                    article['time'] = None

                                else:
                                    date = s.find('p', {'class': 'date'}).text
                                    date = date.replace(' ', '')
                                    date = date.strip()
                                    date = str(date)

                                    for item in date.splitlines():
                                        d = parser.parse(item)
                                        article['time'] = d.strftime(
                                            "%Y-%m-%d")

                            else:
                                s = requests.get(article['link'])
                                html_as_string = s.text
                                s = BeautifulSoup(html_as_string,
                                                  'html.parser')
                                date = s.find('span',
                                              {'class': 'post-created'})
                                if date is None:
                                    article['time'] = None

                                else:
                                    date = s.find('span', {
                                        'class': 'post-created'
                                    }).text
                                    date = str(date.lower())

                                    if date is '':
                                        article['time'] = None

                                    else:
                                        if 'am' in date:
                                            date = date.replace('am', '')

                                        if 'pm' in date:
                                            date = date.replace('pm', '')

                                        for item in date.splitlines():
                                            d = parser.parse(item)
                                            article['time'] = d.strftime(
                                                "%Y-%m-%d")

                            polarity, probability = predict(
                                token_vocab, label_vocab, article['headline'])
                            probability = str(probability)
                            singledata.append(article['headline'])
                            singledata.append(article['link'])
                            singledata.append(polarity)
                            singledata.append(probability)
                            singledata.append(article['code'])
                            singledata.append(article['source'])
                            singledata.append(article['time'])
                            alldata.append(singledata)
                            print(i, article['headline'])
                            print('link:', article['link'])
                            print('published time:', article['time'])
                            print('polarity:', polarity)
                            print('source:', article['source'])
                            imgsrc = imgsrc + 1

                i = i + 1
            except:
                print("The news headline had some error or does not exist.")

        for news_num in range(len(alldata), 0, -1):
            news_num = news_num - 1
            if alldata[news_num][6] is None:
                print("The news is not exit or got errors.")

            else:
                sql = """INSERT INTO public."company_news"("news_title", "news_link", "news_time", "news_accuracy", "news_code", "news_source", "news_polarity" ) VALUES ( %s, %s, %s, %s, %s, %s, %s);"""
                data = (alldata[news_num][0], alldata[news_num][1],
                        alldata[news_num][6], alldata[news_num][3],
                        alldata[news_num][4], alldata[news_num][5],
                        alldata[news_num][2])
                cur.execute(sql, data)
                conn.commit()

        cur.close()
Пример #23
0
         label=hp_inst["label"],
         time_span=hp_inst["time_span"],
         max_depth=hp_inst["max_depth"],
         max_features=hp_inst["max_features"],
         min_samples_leaf=hp_inst["min_samples_leaf"],
         min_samples_split=hp_inst["min_samples_split"],
         n_estimators=hp_inst["n_estimators"],
         cv=hp_inst["cv"],
         precision=hp_inst["precision"],
         edge_forecasting=hp_inst["edge_forecasting"],
         normalize_data=hp_inst["normalize_data"],
         criterion=hp_inst["criterion"],
     )
     predict(
         label=hp_inst["label"],
         precision=hp_inst["precision"],
         target_date=target_date,
         normalize_data=hp_inst["normalize_data"],
     )
     MLp, MLa, TWNa, ECa, MLw, TWNw, ECw = post_mortem(
         target_date=str(
             get_datetime(target_date) + datetime.timedelta(1)))
     MLp_agg.append(MLp)
     MLa_agg.append(MLa)
     TWNa_agg.append(TWNa)
     ECa_agg.append(ECa)
     MLw_agg.append(MLw)
     TWNw_agg.append(TWNw)
     ECw_agg.append(ECw)
 except Exception as e:
     loggr.exception(
         "Something went wrong for this date. See next line for details. Skipping date..."
Пример #24
0
from __future__ import print_function
from createDataSet import initDataSet
from Predict import predict
import six.moves.cPickle as pickle
import theano
import theano.tensor as T
from SaveCnn import CnnModel
from SaveCnn import HiddenLayer
from SaveCnn import LeNetConvPoolLayer
from SaveCnn import LogisticRegression
from SaveCnn import load_data
import copy
import BinAndCrop

if __name__ == '__main__':
    start = BinAndCrop.BinAndCropClass()
    start.bin()
    print('222')
    initDataSet()
    result = predict('file-126.pkl.gz')
    print(result)
Пример #25
0
                                b,
                                X,
                                Y,
                                num_iterations=101,
                                learning_rate=0.005,
                                print_cost=True)
print("w = " + str(params["w"]))
print("b = " + str(params["b"]))
print("dw = " + str(grads["dw"]))
print("db = " + str(grads["db"]))
print("cost = " + str(costs))

# pridict
from Predict import predict

print("prediction = " + str(predict(w, b, X)))
print()

# model
from Model import model

d = model(X_train=train_set_x,
          Y_train=train_set_y,
          X_test=test_set_x,
          Y_test=test_set_y,
          num_iteration=2000,
          learn_rate=0.005,
          print_cost=True)
print()

# Example of a picture that was wrongly classified.
    def parse(self, response):
        token_vocab, label_vocab = load_predict_info()
        article = NewsItem()
        i = 0
        imgsrc = 0
        blogsrc = 0
        alldata = []
        all_links = response.css('td .newsHeadline a')
        conn = psycopg2.connect(host="localhost",
                                database="StockMarket",
                                user="******",
                                password="******")
        cur = conn.cursor()

        r = requests.get(
            'https://www.malaysiastock.biz/Blog/Blog-Headlines.aspx')
        # --- crawl the latest news
        for sel in all_links:
            try:
                html_as_string = r.text
                soup = BeautifulSoup(html_as_string, 'html.parser')
                article['headline'] = soup.findAll(
                    'span', {'class': 'newsHeadline'})[i].text
                article['link'] = response.css('td .newsHeadline a').xpath(
                    '@href').extract()[i]
                # --- filter the news headlines that are not in english language
                if re.findall('[\u4e00-\u9fff]+', article['headline']):
                    #print("The news headline is not in english language")
                    if "Blog" in article['link']:
                        blogsrc = blogsrc + 1

                    else:
                        imgsrc = imgsrc + 1

                # --- filter the news headlines to prevent duplication problem
                else:
                    postgreSQL_select_Query = """ select * from public."latest_news" """
                    cur.execute(postgreSQL_select_Query)
                    news = cur.fetchall()
                    Newsexist = False
                    for row in news:
                        if article['headline'] == row[1] and article[
                                'link'] == row[2]:
                            #print("The news headline had existed in the database.")
                            Newsexist = True
                            if "Blog" in article['link']:
                                blogsrc = blogsrc + 1

                            else:
                                imgsrc = imgsrc + 1

                    # --- if the news headline does not exist in the db, then predict and store
                    if Newsexist == False:
                        singledata = []
                        if "Blog" in article['link']:
                            print(article['link'])
                            article['source'] = response.css(
                                'td .newsMedia a').xpath(
                                    'text()').extract()[blogsrc]
                            b = requests.get(article['link'])
                            html_as_string = b.text
                            p = BeautifulSoup(html_as_string, 'html.parser')
                            data = p.find('label', {
                                'id': 'MainContent2_lbAuthorProfile'
                            }).text
                            unusedata, date = data.split('e:')
                            date = date.strip()
                            date = str(date)
                            for item in date.splitlines():
                                d = parser.parse(item)
                                article['time'] = d.strftime("%Y-%m-%d %H:%M")
                            polarity, probability = predict(
                                token_vocab, label_vocab, article['headline'])
                            probability = str(probability)
                            singledata.append(article['headline'])
                            singledata.append(article['link'])
                            singledata.append(polarity)
                            singledata.append(probability)
                            singledata.append(article['source'])
                            singledata.append(article['time'])
                            alldata.append(singledata)
                            print('\n')
                            print(i, article['headline'])
                            print('link:', article['link'])
                            print('published time:', article['time'])
                            print('polarity:', polarity)
                            print('source:', article['source'])
                            print('\n')
                            blogsrc = blogsrc + 1

                        else:
                            print(article['link'])
                            article['source'] = response.css(
                                'td span a img').xpath(
                                    '@src').extract()[imgsrc]
                            if 'thestar' in article['source']:
                                s = requests.get(article['link'])
                                html_as_string = s.text
                                s = BeautifulSoup(html_as_string,
                                                  'html.parser')
                                date = s.find('p', {'class': 'date'})

                                if date == None:
                                    article['time'] = None

                                else:
                                    date = s.find('p', {'class': 'date'}).text
                                    time = s.find('time', {
                                        'class': 'timestamp'
                                    }).text
                                    date = date.replace(' ', '')
                                    date = date.strip()
                                    date = str(date)
                                    time = time.replace(' ', '')
                                    time = time.strip()
                                    time = str(time)
                                    datetime = date + " " + time
                                    if time is '':
                                        othertime = response.css(
                                            '.line td').xpath(
                                                'text()').extract()[i]
                                        datetime = date + " " + othertime
                                    print(datetime)

                                    if 'MYT' in datetime:
                                        datetime = datetime.replace('MYT', '')

                                    for item in datetime.splitlines():
                                        d = parser.parse(item)
                                        article['time'] = d.strftime(
                                            "%Y-%m-%d %H:%M")

                            else:
                                s = requests.get(article['link'])
                                html_as_string = s.text
                                s = BeautifulSoup(html_as_string,
                                                  'html.parser')
                                date = s.find('span',
                                              {'class': 'post-created'})
                                if date is None:
                                    article['time'] = None

                                else:
                                    date = s.find('span', {
                                        'class': 'post-created'
                                    }).text
                                    date = str(date.lower())

                                    if date is '':
                                        article['time'] = None

                                    else:
                                        if 'am' in date:
                                            date = date.replace('am', '')

                                        if 'pm' in date:
                                            date = date.replace('pm', '')

                                        for item in date.splitlines():
                                            d = parser.parse(item)
                                            article['time'] = d.strftime(
                                                "%Y-%m-%d %H:%M")

                            polarity, probability = predict(
                                token_vocab, label_vocab, article['headline'])
                            probability = str(probability)
                            singledata.append(article['headline'])
                            singledata.append(article['link'])
                            singledata.append(polarity)
                            singledata.append(probability)
                            singledata.append(article['source'])
                            singledata.append(article['time'])
                            alldata.append(singledata)
                            print('\n')
                            print(i, article['headline'])
                            print('link:', article['link'])
                            print('published time:', article['time'])
                            print('polarity:', polarity)
                            print('source:', article['source'])
                            print('\n')
                            imgsrc = imgsrc + 1

                i = i + 1
            except:
                print("The news headline had some error or does not exist.")

        for news_num in range(len(alldata), 0, -1):
            news_num = news_num - 1
            if alldata[news_num][5] is None:
                print("The news is not exit or got errors.")

            else:
                sql = """INSERT INTO public."latest_news"("news_title", "news_link", "news_polarity", "news_accuracy", "news_source", "news_time") VALUES ( %s, %s, %s, %s, %s, %s);"""
                data = (alldata[news_num][0], alldata[news_num][1],
                        alldata[news_num][2], alldata[news_num][3],
                        alldata[news_num][4], alldata[news_num][5])
                cur.execute(sql, data)
                conn.commit()

        cur.close()
Пример #27
0
def home():
    print('home', file=sys.stdout)
    error_message = False
    if request.method == 'POST':
        print('home post', file=sys.stdout)
        try:
            # check if the post request has the file part
            if len(request.files) == 0:
                flash('No file part')
                return redirect(request.url)
            # get first file
            file = request.files.values().__next__()
            # if user does not select file, browser also
            # submit an empty part without filename
            if file.filename == '':
                flash('No selected file')
                return redirect(request.url)
            if file:
                print('file uploaded', file=sys.stdout)
                filename = secure_filename(file.filename)
                test_file_path = os.path.join(app.config['UPLOAD_FOLDER'],
                                              filename)
                print(test_file_path, file=sys.stdout)
                file.save(test_file_path)
                # check for number of lines
                with open(test_file_path, "r") as f:
                    lines = len(f.readlines())
                max_len = 50000
                if lines > max_len:
                    raise IndexError('file is too long')
                # read form
                tcr_encoding_model = request.form['model_type']
                dataset = request.form['dataset']
                use_alpha = 'use_alpha' in request.form
                use_vj = 'use_vj' in request.form
                use_mhc = 'use_mhc' in request.form
                use_t_type = 'use_t_type' in request.form
                # version flags
                version = ''
                version += '1'
                if dataset == 'vdjdb':
                    version += 'v'
                elif dataset == 'mcpas':
                    version += 'm'
                if tcr_encoding_model == 'AE':
                    version += 'e'
                elif tcr_encoding_model == 'LSTM':
                    version += 'l'
                if use_alpha:
                    version += 'a'
                if use_vj:
                    version += 'j'
                if use_mhc:
                    version += 'h'
                if use_t_type:
                    version += 't'
                print('version: ' + version, file=sys.stdout)
                df = predict(version, test_file_path)
                df.to_csv(app.config['UPLOAD_FOLDER'] + '/results.csv',
                          index=False)
                os.remove(test_file_path)
                return send_from_directory(
                    directory=app.config['UPLOAD_FOLDER'],
                    filename='results.csv')
        except IndexError:
            error_message = True
            print('long file error', file=sys.stderr)
            os.remove(test_file_path)
            return render_template("too_long_input_file.html",
                                   error_message=error_message)
        except:
            error_message = True
            os.remove(test_file_path)
    return render_template("home.html", error_message=error_message)
def model(X_train,
          Y_train,
          X_test,
          Y_test,
          num_iteration=2000,
          learn_rate=0.5,
          print_cost=False):
    """
       Builds the logistic regression model by calling the function you've implemented previously

       Arguments:
       X_train -- training set represented by a numpy array of shape (num_px * num_px * 3, m_train)
       Y_train -- training labels represented by a numpy array (vector) of shape (1, m_train)
       X_test -- test set represented by a numpy array of shape (num_px * num_px * 3, m_test)
       Y_test -- test labels represented by a numpy array (vector) of shape (1, m_test)
       num_iterations -- hyperparameter representing the number of iterations to optimize the parameters
       learning_rate -- hyperparameter representing the learning rate used in the update rule of optimize()
       print_cost -- Set to true to print the cost every 100 iterations

       Returns:
       d -- dictionary containing information about the model.
       """

    # initialize parameters with zeros (≈ 1 line of code)
    from Initialize_with_zeros import initialize_with_zeros
    w, b = initialize_with_zeros(X_train.shape[0])

    from Optimize import optimize
    # Gradient descent
    params, grads, costs = optimize(w,
                                    b,
                                    X_train,
                                    Y_train,
                                    num_iterations=num_iteration,
                                    learning_rate=learn_rate,
                                    print_cost=print_cost)

    # Retrieve parameters w and b from dictionary "parameters"
    w = params["w"]
    b = params["b"]

    # Predict test/train set examples
    from Predict import predict
    Y_pridiction_train = predict(w=w, b=b, X=X_train)
    Y_pridiction_test = predict(w=w, b=b, X=X_test)

    # Print train/test Errors
    print("train accuracy: {} %".format(
        100 - np.mean(np.abs(Y_pridiction_train - Y_train)) * 100))
    print("train accuracy: {} %".format(
        100 - np.mean(np.abs(Y_pridiction_test - Y_test)) * 100))

    d = {
        "costs": costs,
        "w": w,
        "b": b,
        "Y_prediction_train": Y_pridiction_train,
        "Y_prediction_test": Y_pridiction_test,
        "learning_rate": learn_rate,
        "num_iterations": num_iteration
    }
    return d
Пример #29
0
        time_span=hp["time_span"],
        rolling_average_window=hp["rolling_average_window"],
        rolling_average_min_periods=hp["rolling_average_min_periods"],
        TWN_EC_split=hp["TWN_EC_split"],
        date_efficient=hp["date_efficient"],
        region_efficient=hp["region_efficient"],
        drop_columns=hp["drop_columns"],
        include_only_columns=hp["include_only_columns"],
        label=hp["label"],
        real_time=hp["real_time"],
    )
    loggr.info("Predictions table is now ready")
except Exception as e:
    loggr.exception("Wrangle.py could not run. Here's why: \n {e}")

try:
    loggr.info("Running predictions for tomorrow ({})".format(
        datetime.datetime.now().date() + datetime.timedelta(1)))
    predict(
        label=hp["label"],
        precision=hp["precision"],
        target_date=hp["start_date"],
        normalize_data=hp["normalize_data"],
    )

    loggr.info("Predictions are now ready")
except Exception as e:
    loggr.exception("Predict.py could not run. Here's why: \n {e}")

loggr.info("All done for today!")
Пример #30
0
def validate():
    # Load the feature and label csv file, join them according to complaint ID
    complaints_with_sentiment = "data/complaints_with_sentiment_metric.csv"
    narrative_preprocessed_file = "data/narrative_preprocessed.csv"
    complaints_features = merge_narrative_processed_and_sentiment_metrics(
        narrative_preprocessed_file, complaints_with_sentiment)
    # Extract the validation data
    #print(complaints_features.head())
    validation_size = VALIDATION_SIZE
    complaints_features_for_validation = complaints_features[:validation_size]
    #print(complaints_features_for_validation.columns)

    company_response_columns = complaints_features_for_validation.loc[:,
                                                                      'company_response_Closed':
                                                                      'company_response_Closed with non-monetary relief']

    narratives = complaints_features_for_validation[
        "Consumer complaint narrative"]
    company_responses = []
    predicted_product_types = []
    predicted_disputes = []
    suggested_responses = []
    escalate_prob = []
    wont_escalate_list = []

    print("Loading models...")
    model_dir = "trained_models"
    clf_product_file = model_dir + "/" + "product_classifier_lgreg.sav"
    clf_escalation_file = model_dir + "/" + "lgreg.all.joblib"
    tf_idf_vectorizer_file = model_dir + "/" + "tfidf_vectorizer_max50000.all.joblib"
    scaler_file = model_dir + "/" + "scaler.joblib"
    clf_product, clf_escalation, tf_idf_vectorizer, scaler = load_models(
        clf_product_file, clf_escalation_file, tf_idf_vectorizer_file,
        scaler_file)
    print("Predicting...")
    i = 0
    for narrative in narratives:
        product_type, escalation_prob_fig, \
            suggest_response, escalation_probas_of_responses = predict(narrative,
                                                                      clf_product,
                                                                      clf_escalation,
                                                                      tf_idf_vectorizer,
                                                                      scaler)
        company_response_index = get_response_index(
            company_response_columns.loc[i, :])
        i += 1
        company_responses.append(get_response_types()[company_response_index])
        predicted_product_types.append(product_type)
        predict_dispute = (
            1 if escalation_probas_of_responses[company_response_index] > 0.5
            else 0)
        predicted_disputes.append(predict_dispute)
        suggested_responses.append(suggest_response)
        escalate_prob.append(escalation_probas_of_responses)
        wont_escalate_list.append(
            wont_escalate(escalation_probas_of_responses))

    predict_result = pd.DataFrame()

    print(complaints_features_for_validation.columns)
    predict_result["Complaint ID"] = complaints_features_for_validation[
        "Complaint ID"]
    predict_result[
        "Consumer complaint narrative"] = complaints_features_for_validation[
            "Consumer complaint narrative"]
    predict_result["Product"] = complaints_features_for_validation["Product"]
    predict_result["Company response to consumer"] = company_responses
    predict_result["dispute_result"] = [
        0 if x == 'No' else 1
        for x in complaints_features_for_validation["Consumer disputed?"]
    ]

    # Assign the predicted result
    predict_result["predicted_product"] = predicted_product_types
    predict_result["predicted_dispute"] = predicted_disputes
    predict_result["suggested_response"] = suggested_responses
    predict_result["escalation_probs"] = escalate_prob
    predict_result["wont_escalate"] = wont_escalate_list

    print(predict_result.head())
    predict_result.to_csv("data/predicted_result_of_validation.csv",
                          index=False)
Пример #31
0
from Predict import predict, get_reviews_from_array

a = input()
result = predict([a])
print(result)