def cleanBill2():
    from helper import load_file
    from pandas import merge
    from CleanComponents import cleanComponents
    from sklearn.decomposition import PCA
    from pandas import DataFrame
    
    billOfComponents = load_file("bill_of_materials.csv")
    billOfComponents['tube_assembly_id'] = billOfComponents.index
    
    components = cleanComponents()
    components['component_id'] = components.index
    names = components.columns.values
    
    for i in range(1, 9):
        cols = names + "_" + str(i)
        components.columns = cols
        billOfComponents = merge(billOfComponents, components, how='left', on="component_id" + "_" + str(i))
        billOfComponents = billOfComponents.drop("component_id" + "_" + str(i), 1)
    
    billOfComponents.index = billOfComponents['tube_assembly_id']
    billOfComponents = billOfComponents.drop("tube_assembly_id", 1)
    billOfComponents = billOfComponents.fillna(0)
    
    pca = PCA(n_components=20)
    pca = pca.fit_transform(billOfComponents)
    
    billOfComponents = DataFrame(pca, billOfComponents.index)
    cols = ["pca_" + str(i) for i in billOfComponents.columns.values]

    billOfComponents.columns = cols
    
    return billOfComponents
def cleanBill():
    from pandas import merge
    import numpy as np
    
    from CleanComponents import cleanComponents
    
    components = cleanComponents()
    billOfComponents = processBill()
    
    billOfComponents['tube_assembly_id'] = billOfComponents.index
    components['component_id'] = components.index
    
    billOfComponents = merge(billOfComponents, components, how='left', on="component_id")
    billOfComponents.index = billOfComponents.tube_assembly_id
    billOfComponents = billOfComponents.drop('component_id', 1)
    
    billOfComponents = billOfComponents.fillna(0)
        
    quantities = billOfComponents.quantity
    comp_numbers = billOfComponents.component_number
    ids = billOfComponents.tube_assembly_id

    cols = billOfComponents.columns.values.tolist()
    cols.remove('quantity')
    cols.remove('component_number')
    cols.remove('tube_assembly_id')
        
    billOfComponents = billOfComponents[cols].multiply(billOfComponents.quantity, axis="index")
    billOfComponents['quantity'] = quantities
    billOfComponents['component_number'] = comp_numbers
    billOfComponents['tube_assembly_id'] = ids

    billOfComponents = billOfComponents.groupby('tube_assembly_id').aggregate(np.sum)
    billOfComponents = enrichCompNumber(billOfComponents)
        
    return billOfComponents