Python Preprocessing.Preprocessor示例

def getModelApi():
    preprocessor = Preprocessing.Preprocessor()
    summarizer = Summarizer.Summarizer('./vocab', './')
    def preprocessorApi(article):
        tokenized=preprocessor.tokenize(article)
        tokenized=(' '.join(tokenized))
        return preprocessor.adjust_article(tokenized.split('*N*')).encode('utf-8')

    def modelApi(preprocessed_articles):
        return summarizer.summarize(preprocessed_articles)

    return preprocessorApi,modelApi

示例#2

显示文件

文件： main.py 项目： HardikDarshi/ML_Projects

def index():
    if request.method == 'POST':
        try:
            f = request.files['file1']
            df = pd.read_excel(f)
            df.columns = df.columns.str.replace(' ', '')
            columns = [
                'id', 'MRBTS', "redirFreqCdma",
                "Item-redirGeranArfcnStructL-redirGeranArfcnPrio",
                'redirGeranArfcnStructL', 'redirGeranBandIndicator',
                'Item-redirGeranArfcnStructL-redirGeranArfcnValue'
            ]
            preprocessor = Preprocessing.Preprocessor()

            df = preprocessor.handle_missing_values(df)
            df = preprocessor.remove_columns(df, columns)
            filename1 = 'DTmodel_Pred_REDRTV1.sav'
            loaded_model = pickle.load(open(filename1, 'rb'))
            prediction = loaded_model.predict(df)
            result = pd.DataFrame(prediction)
            result.columns = ['Pred']
            result = preprocessor.int_to_categorical(result)
            final_sheet = pd.merge(df,
                                   result,
                                   left_index=True,
                                   right_index=True)
            print('final sheet is prepared')
            resp = make_response(final_sheet.to_csv())
            resp.headers[
                "Content-Disposition"] = "attachment; filename=outputfile.csv"
            resp.headers["Content-Type"] = "text/csv"
            return resp
            #final_sheet.to_excel('https://github.com/HardikDarshi/ML_Projects/tree/Master/outputfile.xlsx', index = True, header = True)
            #final_sheet.save(os.path.join("uploads", final_sheet.filename))
            #return 'Output file is saved to uploads folder'
        except Exception as e:
            print('The Exception  message is:', e)
            return 'Something is Wrong'
    else:
        return render_template('index.html')

示例#3

显示文件

文件： main.py 项目： DonaldRR/Challenger-text-classification

# -*- coding:utf-8 -*-

from model import TextClassifier
import config
import Preprocessing
import pandas as pd
import numpy as np

if __name__ == '__main__':
    p = Preprocessing.Preprocessor()

    #读取训练集，验证集和测试集s
    Train_raw_data = pd.read_csv(config.train_data_path)
    Validation_raw_data = pd.read_csv(config.validate_data_path)
    Test_raw_data = pd.read_csv(config.test_data_path)

    tmp = p.preprocess_content(Train_raw_data['content'])
    Train_sequences = p.preprocess_text(tmp, train_flag=True)

    tmp = p.preprocess_content(Validation_raw_data['content'])
    Validation_sequences = p.preprocess_text(tmp, train_flag=False)

    tmp = p.preprocess_content(Test_raw_data['content'])
    Test_sequences = p.preprocess_text(tmp, train_flag=False)

    # # +Process Training Input, Validation Input, and Test Input
    # #   1. Divide Content and word Tags
    # #   2. Remove Stop Words
    # #   3. Encoding word Tags
    # #   4. Construct words Dictionary and Padding sequences
    #