Python handle примеры, Allcodefiles.Exceptionhandling.handle Python примеры использования

Пример #1

0

Показать файл

def synonym_relation(text1, text2):
    try:
        if (text1 == 'no info' or text2 == 'no info'):
            return 0
        else:
            text1 = stopwordsremove(text1)
            text2 = stopwordsremove(text2)
            syn_set = set()
            count = 0
            if (len(text1) == 0 or len(text2) == 0):
                return 0
            if (len(text1) < len(text2)):
                for word in text2:
                    for syn in wordnet.synsets(word):
                        for l in syn.lemmas():
                            syn_set.add(l.name())

                for word in text1:
                    if word in syn_set:
                        count += 1
                return (count / len(text1))
            else:
                for word in text1:
                    for syn in wordnet.synsets(word):
                        for l in syn.lemmas():
                            syn_set.add(l.name())

                for word in text2:
                    if word in syn_set:
                        count += 1
                return (count / len(text2))
    except Exception as e:
        handle('synonym relation finding process')

Пример #2

0

Показать файл

def missing_values(data):
    """
    All columns might contain missing values we have to decide how to handle
    each column. Some columns need to handled seperately.
    """
    """
    1.location

    a second level of handling is done as of to remove numeric values in the
    location data. regex is used to remove those numeric data and replace with
    no info.
    """

    print('Handling Missing Data')
    try:
        data['location'].fillna('no info', inplace=True)
        withoutcomma = data[~data['location'].str.contains(",")].index
        withcomma = data[data['location'].str.contains(",")].index

        for i in withcomma:
            data.loc[i,
                     'country'] = data.loc[i,
                                           'location'].split(',')[0].strip()

        for i in withoutcomma:
            data.loc[i, 'country'] = data.loc[i, 'location'].strip()
        """2.salary range"""

        data['salary_range'].fillna('0-0', inplace=True)

        for i in range(0, data.shape[0]):
            str = data.loc[i, 'salary_range']
            if re.search(r'[a-z,A-Z]', str):
                data.loc[i, 'salary_range'] = '0-0'

            if (data.loc[i, 'salary_range'].find("-") != -1):
                data.loc[i, 'minimum_salary'] = data.loc[
                    i, 'salary_range'].split('-')[0]
                data.loc[i, 'maximum_salary'] = data.loc[
                    i, 'salary_range'].split('-')[1]
            else:
                data.loc[i, 'minimum_salary'] = data.loc[i, 'salary_range']
                data.loc[i, 'maximum_salary'] = data.loc[i, 'salary_range']
        """3. All other categorical columns and remaining numeric columns."""

        columns = data.columns
        for i in columns:
            if (data[i].isna().any()):
                if (data[i].dtypes == 'object'):
                    data[i].fillna('no info', inplace=True)
                    data[i] = data[i].str.lower()

                else:
                    data[i].fillna(0, inplace=True)

        data.drop(['salary_range', 'location'], axis=1, inplace=True)
        return data
    except Exception as e:
        handle('missing data handling process')

Пример #3

0

Показать файл

def categorical_cols_test(data):
    print('Categorical Encoding')
    try:
        encoder = pickle.load(open("model/encoder.p", "rb"))
        newdata = encoder.transform(data)
        return newdata
    except Exception as e:
        handle('categorical columns handling for testing process')

Пример #4

0

Показать файл

Файл: training.py Проект: saketh97/FakeJobPrediction

def training():
    try:
        data = read_csv('data/train.csv')

        (data.pipe(missing_values).pipe(texthandling)
             .pipe(categorical_cols_train).pipe(train_and_save_model))

    except Exception as e:
        handle("Training piepline")

Пример #5

0

Показать файл

def testing():
    try:
        data = read_csv('data/test.csv')

        (data.pipe(missing_values).pipe(texthandling).pipe(
            categorical_cols_test).pipe(load_model_predict))

    except Exception as e:
        handle('testing process')

Пример #6

0

Показать файл

Файл: Dataread.py Проект: saketh97/FakeJobPrediction

def read_csv(path):
    try:
        if ('csv' == path.split(".")[-1]):
            data = pd.read_csv(path)
        else:
            print("The files is not a CSV file")
    except Exception as e:
        handle('file reading')
    return data

Пример #7

0

Показать файл

def stopwordsremove(text):
    try:
        word_token = word_tokenize(text)
        ps = PorterStemmer()
        filtered = [
            ps.stem(w.lower()) for w in word_token if not w in stop_words
        ]
        return filtered
    except Exception as e:
        handle('stop words removing')

Пример #8

0

Показать файл

def categorical_cols_train(data):
    try:
        print('Categorical Encoding')
        encoder = ce.BinaryEncoder(cols=[
            'employment_type', 'required_experience', 'required_education',
            'country'
        ])
        newdata = encoder.fit_transform(data)
        pickle.dump(encoder, open("model/encoder.p", "wb"))
        return newdata
    except Exception as e:
        handle('categorical column handling')

Пример #9

0

Показать файл

def load_model_predict(data):
    try:
        X_test = data.drop('fraudulent', axis=1)
        y_test = data['fraudulent']

        scaler = pickle.load(open("model/scaler.p", "rb"))
        X_test = scaler.transform(X_test)

        filename = 'model/finalized_model.p'
        model = pickle.load(open(filename, 'rb'))

        y_pred = model.predict(X_test)
        score_and_save(y_pred)
    except Exception as e:
        handle('prediction process')

Пример #10

0

Показать файл

def removeuncessary(text):
    try:
        '''
        1. removing punctuations,
        2. removing numbered words,
        3. removing unknown characters

        '''
        text = re.sub('[%s]' % re.escape(string.punctuation), '', str(text))
        text = re.sub('\w*\d\w*', '', str(text))
        text = re.sub('[^a-zA-Z ]+', ' ', str(text))

        return text
    except Exception as e:
        handle('removing unnecessary text')

Пример #11

0

Показать файл

def score_and_save(y_pred):
    try:
        data = read_csv('data/test.csv')

        y_test = data['fraudulent']
        cm = confusion_matrix(y_test, y_pred)
        print("\n" + "SCORES")
        print("confusion matrix")
        print(cm)
        print('F1-Score' + ' = ' + str(round(f1_score(y_test, y_pred), 4)))
        print('Precision' + ' = ' +
              str(round(precision_score(y_test, y_pred), 4)))
        print('Recall' + ' = ' + str(round(recall_score(y_test, y_pred), 4)))
        print('Accuracy' + ' = ' +
              str(round(accuracy_score(y_test, y_pred), 4)))

        data['fraud_prediction'] = y_pred

        data.to_csv('predictionoutput/testsetprediction.csv')
    except Exception as e:
        handle('scoring and saving process')

Пример #12

0

Показать файл

def train_and_save_model(data):
    try:
        print("Model Training")
        X_train = data.drop('fraudulent', axis=1)
        y_train = data['fraudulent']

        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        pickle.dump(sc, open("model/scaler.p", "wb"))

        from sklearn.ensemble import RandomForestClassifier
        model = RandomForestClassifier(n_estimators=100,
                                       criterion='entropy',
                                       random_state=1)

        model.fit(X_train, y_train)

        filename = 'model/finalized_model.p'
        pickle.dump(model, open(filename, 'wb'))
    except Exception as e:
        handle('Model Creation and training')

Пример #13

0

Показать файл

def texthandling(data):
    print('Text Handling')
    try:
        '''
            This function is for handling text data columns company profile,
            description, requirements, benefits are there is multiple text in
            those columns we need to do something about them.
            '''
        stop_words = set(stopwords.words('english'))
        for i in range(0, data.shape[0]):

            data.loc[i, 'company_profile'] = removeuncessary(
                data.loc[i, 'company_profile'])
            data.loc[i,
                     'description'] = removeuncessary(data.loc[i,
                                                               'description'])
            data.loc[i, 'requirements'] = removeuncessary(
                data.loc[i, 'requirements'])
            data.loc[i, 'benefits'] = removeuncessary(data.loc[i, 'benefits'])
            data.loc[i, 'title'] = removeuncessary(data.loc[i, 'title'])
            data.loc[i, 'department'] = removeuncessary(data.loc[i,
                                                                 'department'])
            data.loc[i, 'industry'] = removeuncessary(data.loc[i, 'industry'])
            data.loc[i, 'function'] = removeuncessary(data.loc[i, 'function'])

            words = str(data.loc[i, 'company_profile'])
            if (words == 'no info'):
                data.loc[i, 'company_profile_word_count'] = 0
            else:
                data.loc[i, 'company_profile_word_count'] = len(words.split())

            words = str(data.loc[i, 'benefits'])
            if (words == 'no info'):
                data.loc[i, 'benefits_word_count'] = 0
            else:
                data.loc[i, 'benefits_word_count'] = len(words.split())

            data.loc[i, 'title_and_job_similarity'] = synonym_relation(
                data.loc[i, 'title'], data.loc[i, 'description'])

            data.loc[i, 'title_and_req_similarity'] = synonym_relation(
                data.loc[i, 'title'], data.loc[i, 'requirements'])

            data.loc[i, 'profile_and_job_similarity'] = synonym_relation(
                data.loc[i, 'company_profile'], data.loc[i, 'description'])

            data.loc[i, 'profiel_and_req_similarity'] = synonym_relation(
                data.loc[i, 'company_profile'], data.loc[i, 'requirements'])

            data.loc[i,
                     'title_and_department_syn_similarity'] = synonym_relation(
                         data.loc[i, 'title'], data.loc[i, 'department'])

            data.loc[i,
                     'title_and_industry_syn_similarity'] = synonym_relation(
                         data.loc[i, 'title'], data.loc[i, 'industry'])

            data.loc[i,
                     'title_and_function_syn_similarity'] = synonym_relation(
                         data.loc[i, 'title'], data.loc[i, 'function'])

            data.loc[
                i,
                'industry_and_department_syn_similarity'] = synonym_relation(
                    data.loc[i, 'industry'], data.loc[i, 'department'])

            data.loc[
                i,
                'function_and_department_syn_similarity'] = synonym_relation(
                    data.loc[i, 'function'], data.loc[i, 'department'])
            data.loc[
                i, 'industry_and_function_syn_similarity'] = synonym_relation(
                    data.loc[i, 'industry'], data.loc[i, 'function'])

        for i in [
                'title_and_job_similarity', 'title_and_req_similarity',
                'profile_and_job_similarity', 'profiel_and_req_similarity',
                'title_and_department_syn_similarity',
                'title_and_industry_syn_similarity',
                'title_and_function_syn_similarity',
                'function_and_department_syn_similarity',
                'industry_and_department_syn_similarity',
                'industry_and_function_syn_similarity'
        ]:

            data[i].fillna(0, inplace=True)

        data.drop([
            'company_profile', 'benefits', 'description', 'requirements',
            'title', 'department', 'industry', 'function', 'job_id'
        ],
                  axis=1,
                  inplace=True)
        return data
    except Exception as e:
        handle('Text handling process')

Пример #14

0

Показать файл

Файл: Main.py Проект: saketh97/FakeJobPrediction

import argparse
from Allcodefiles.training import training
from Allcodefiles.testing import testing
from Allcodefiles.Exceptionhandling import handle

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description = 'Fake Job Prediction')
    parser.add_argument('-r', '--return_object', choices = ['train', 'test'],
                        default = 'train', type = str,
                        help = 'Select what task to be done')

    args = parser.parse_args()
    var_args = vars(args)

try:
    if(var_args['return_object'] == 'train'):
        training()
    else:
        testing()

except Exception as e:
    handle('Main file')

Python handle примеры использования