def cleanBill2(): from helper import load_file from pandas import merge from CleanComponents import cleanComponents from sklearn.decomposition import PCA from pandas import DataFrame billOfComponents = load_file("bill_of_materials.csv") billOfComponents['tube_assembly_id'] = billOfComponents.index components = cleanComponents() components['component_id'] = components.index names = components.columns.values for i in range(1, 9): cols = names + "_" + str(i) components.columns = cols billOfComponents = merge(billOfComponents, components, how='left', on="component_id" + "_" + str(i)) billOfComponents = billOfComponents.drop("component_id" + "_" + str(i), 1) billOfComponents.index = billOfComponents['tube_assembly_id'] billOfComponents = billOfComponents.drop("tube_assembly_id", 1) billOfComponents = billOfComponents.fillna(0) pca = PCA(n_components=20) pca = pca.fit_transform(billOfComponents) billOfComponents = DataFrame(pca, billOfComponents.index) cols = ["pca_" + str(i) for i in billOfComponents.columns.values] billOfComponents.columns = cols return billOfComponents
def cleanBill(): from pandas import merge import numpy as np from CleanComponents import cleanComponents components = cleanComponents() billOfComponents = processBill() billOfComponents['tube_assembly_id'] = billOfComponents.index components['component_id'] = components.index billOfComponents = merge(billOfComponents, components, how='left', on="component_id") billOfComponents.index = billOfComponents.tube_assembly_id billOfComponents = billOfComponents.drop('component_id', 1) billOfComponents = billOfComponents.fillna(0) quantities = billOfComponents.quantity comp_numbers = billOfComponents.component_number ids = billOfComponents.tube_assembly_id cols = billOfComponents.columns.values.tolist() cols.remove('quantity') cols.remove('component_number') cols.remove('tube_assembly_id') billOfComponents = billOfComponents[cols].multiply(billOfComponents.quantity, axis="index") billOfComponents['quantity'] = quantities billOfComponents['component_number'] = comp_numbers billOfComponents['tube_assembly_id'] = ids billOfComponents = billOfComponents.groupby('tube_assembly_id').aggregate(np.sum) billOfComponents = enrichCompNumber(billOfComponents) return billOfComponents