#'loan_advances',
                 #'long_term_incentive',
                 'poi_messages_pctg',
                 #'restricted_stock',
                 #'salary',
                 #'shared_receipt_with_poi',
                 'total_payments',
                 'total_stock_value'
                 ]

### Load the dictionary containing the dataset
data_dict = pickle.load(open("final_project_dataset.pkl", "r"))

### Task 2: Remove outliers
outliers = ['TOTAL', 'THE TRAVEL AGENCY IN THE PARK', 'LOCKHART EUGENE E']
remove_outliers(data_dict, outliers)
### Task 3: Create new feature(s)
### Fields to be considered as cash received
cash_fields = ['salary','bonus','exercised_stock_options','loan_advances']
### Add cash received fields
calc_cash_received(data_dict, cash_fields)
### Add POI message percentage
calc_poi_messages_pctg(data_dict)
### Store to my_dataset for easy export below
my_dataset = data_dict

### Extract features and labels from dataset for local testing
data = featureFormat(my_dataset, features_list, sort_keys = True)
labels, features = targetFeatureSplit(data)

# Create Min/Max Scaler
Exemplo n.º 2
0
    unsorted_pairs = zip(features_list[1:], scores)
    # Sort list
    sorted_pairs = list(reversed(sorted(unsorted_pairs, key=lambda x: x[1])))
    # Create dict
    if k == 'all':
        k_best_features = dict(sorted_pairs)
    else:
        k_best_features = dict(sorted_pairs[:k])
    return k_best_features

if __name__ == '__main__':
    # Load the dictionary containing the dataset
    data_dict = pickle.load(open("../project/final_project_dataset.pkl", "r"))
    outliers = ['TOTAL', 'THE TRAVEL AGENCY IN THE PARK', 'LOCKHART EUGENE E']
    # Remove outliers
    remove_outliers(data_dict, outliers)
    # Fields to be considered as cash received
    cash_fields = ['salary','bonus','exercised_stock_options','loan_advances']
    # Add cash received fields
    calc_cash_received(data_dict, cash_fields)
    # Add POI message %
    calc_poi_messages_pctg(data_dict)
    #pprint(data_dict)
    # Create features list
    features_list = ['poi',
                     'bonus',
                     'cash_received',
                     'cash_received_pctg',
                     'deferral_payments',
                     'deferred_income',
                     'director_fees',