def train_autoEncoder(trainFile, testFile, recordParserCallback, n_visible, 
                      n_hidden, learning_rates, callback, corruption_levels, 
                      activation, training_epochs, legend, normalize, 
                      generate_attr_vec_callback, total, layer_no, params=None, 
                      modulo=1000, useMomentum=False, momentumRate=0.90):

    if len(corruption_levels) != len(n_hidden):
        raise Exception("corruption level not provided for each layer...will use default")

    if len(learning_rates) != len(n_hidden):
        raise Exception("learning rates not provided for each layer...will use default")
        
    legend.append("SAE %d layers" % len(n_hidden))    
    
    sample = sparse.csc_matrix(name='s', dtype='float32') 
    label = sparse.csc_matrix(name='l', dtype='float32')      
    x = sparse.csc_matrix(name='x', dtype='float32')  
    y = sparse.csc_matrix(name='y', dtype='float32')  

    corruption_level = T.scalar('corruption')  # % of corruption to use
    learning_rate = T.scalar('lr')  # learning rate to use
    momentum_rate = T.scalar('momentum')  # learning rate to use

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    #bhid_value = numpy.zeros(n_hidden, dtype=theano.config.floatX)                           
    W = None
    b = None
    if params is not None:    
        W = params['W']
        if len(params['b']) == len(params['W']):
            b = params['b']
                    
    sda = SdA(numpy_rng=rng, theano_rng=theano_rng, n_ins=n_visible,
              hidden_layers_sizes=n_hidden, input=x, label=y,
              activation=activation, W=W, b=b, useMomentum=useMomentum)
                  #activation=activation, b=bhid_value)
            
    pretraining_fns = sda.pretraining_functions(sample, label, learning_rate, 
                                                corruption_level, momentum_rate)
        
    #sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback)            
    if not params:
        train(recordParserCallback, generate_attr_vec_callback, pretraining_fns, 
              learning_rates, corruption_levels, momentumRate, training_epochs, 
              batch_size=1, modulo=modulo)
        
    # plot without before feature learning
    #plot_transformed_vectors(testMatrix.toarray(), testDataLabel, title="before feature learning")
    print "about to test..."    
    test(testFile, recordParserCallback, generate_attr_vec_callback, sda)

    #sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback)
    #errorVector = sda.get_reconstruction_errors(sampleMatrix.tocsc())   

    #testMatrix = generate_feature(testData, total, generate_attr_vec_callback)
    #errorVectorTest = sda.get_reconstruction_errors(testMatrix.tocsc())    
    

    '''
    def find_avg_error(errorMatrix):
        error = errorMatrix
        sqrdErrorMatrix = numpy.dot(error, numpy.transpose(error))
        return numpy.diag(sqrdErrorMatrix)

    print "error train: " + str(math.sqrt(sum(find_avg_error(errorVector))))
    print "error test: " + str(math.sqrt(sum(find_avg_error(errorVectorTest))))
    '''
    
    # look at individual errors:
    callback(sda, errorVectors, labels, legend)
def train_autoEncoder(trainData, testData, trainDataLabel, testDataLabel, 
                      n_visible, n_hidden, learning_rates, callback, 
                      corruption_levels, activation, training_epochs, legend,
                      normalize, generate_attr_vec_callback, total, layer_no, 
                      params=None):

    if len(corruption_levels) != len(n_hidden):
        raise Exception("corruption level not provided for each layer...will use default")

    if len(learning_rates) != len(n_hidden):
        raise Exception("learning rates not provided for each layer...will use default")
        
    legend.append("SAE %d layers" % len(n_hidden))    
    
    sample = sparse.csc_matrix(name='s', dtype='float32') 
    label = sparse.csc_matrix(name='l', dtype='float32')      
    x = sparse.csc_matrix(name='x', dtype='float32')  
    y = sparse.csc_matrix(name='y', dtype='float32')  

    corruption_level = T.scalar('corruption')  # % of corruption to use
    learning_rate = T.scalar('lr')  # learning rate to use

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    #bhid_value = numpy.zeros(n_hidden, dtype=theano.config.floatX)                           
    W = None
    b = None
    if params is not None:    
        W = params['W']
        if len(params['b']) == len(params['W']):
            b = params['b']
                    
    sda = SdA(numpy_rng=rng, theano_rng=theano_rng, n_ins=n_visible,
              hidden_layers_sizes=n_hidden, input=x, label=y,
              activation=activation, W=W, b=b)
                  #activation=activation, b=bhid_value)
        
    '''cost, updates = sda.get_cost_updates(corruption_level=corruption_level,
                                        learning_rate=learning_rate)

    # this is the function that theano will call to optimize the cost
    # function.
    train_da = theano.function([sample, label], cost, updates=updates,
         givens={x: sample,
                 y: label})
    '''
    
    pretraining_fns = sda.pretraining_functions(sample, label, learning_rate, 
                                                corruption_level)
    
    if normalize:
        def normalize_data(data):
            # normalize data
            inputArray = numpy.array(data)
            
            minValue = inputArray.flatten().min()
            maxValue = inputArray.flatten().max()    
            inputArray = (inputArray - float(minValue))/(float(maxValue - minValue))
            
            return inputArray
            
        trainData = normalize_data(trainData)
        testData = normalize_data(testData)
        
    #sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback)
    for idx, fn in enumerate(pretraining_fns):
        print "training layer #%s" % str(idx)                
        for i in range(0, training_epochs):
            for sample in trainData:
                sampleMatrix = generate_feature([sample], total, generate_attr_vec_callback)
                error = fn(sampleMatrix, sampleMatrix, 
                           corruption_levels[idx], learning_rates[idx])
            print "error train cost: " + str(error)

    sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback)
    errorVector = sda.get_reconstruction_errors(sampleMatrix.tocsc())   

    testMatrix = generate_feature(testData, total, generate_attr_vec_callback)
    errorVectorTest = sda.get_reconstruction_errors(testMatrix.tocsc())    
    
    # plot without before feature learning
    plot_transformed_vectors(testMatrix.toarray(), testDataLabel, title="before feature learning")
    

    def find_avg_error(errorMatrix):
        error = errorMatrix
        sqrdErrorMatrix = numpy.dot(error, numpy.transpose(error))
        return numpy.diag(sqrdErrorMatrix)

    print "error train: " + str(math.sqrt(sum(find_avg_error(errorVector))))
    print "error test: " + str(math.sqrt(sum(find_avg_error(errorVectorTest))))
    
    # look at individual errors:
    callback(sda, trainData, trainDataLabel, testData, 
             testDataLabel, generate_attr_vec_callback, legend, total)

    transformSample = sda.get_hidden_values(sampleMatrix.tocsc())
    transformTest = sda.get_hidden_values(testMatrix.tocsc())
    
    return transformSample.eval(), transformTest.eval()
def plot_learning_curve(trainingFile, testFile, recordParserCallback, 
          generate_attr_vec_callback, corruption_levels, 
          learning_rates, momentumRate, training_epochs, n_visible, n_hidden,
          activation, useMomentum=False, batch_size=1000, modulo=1000):
              
    sample = sparse.csc_matrix(name='s', dtype='float32') 
    label = sparse.csc_matrix(name='l', dtype='float32')      
    x = sparse.csc_matrix(name='x', dtype='float32')  
    y = sparse.csc_matrix(name='y', dtype='float32')  

    corruption_level = T.scalar('corruption')  # % of corruption to use
    learning_rate = T.scalar('lr')  # learning rate to use
    momentum_rate = T.scalar('momentum')  # learning rate to use

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    #bhid_value = numpy.zeros(n_hidden, dtype=theano.config.floatX)                           
    W = None
    b = None
    if params is not None:    
        W = params['W']
        if len(params['b']) == len(params['W']):
            b = params['b']
                    
    sda = SdA(numpy_rng=rng, theano_rng=theano_rng, n_ins=n_visible,
              hidden_layers_sizes=n_hidden, input=x, label=y,
              activation=activation, W=W, b=b, useMomentum=useMomentum)
                  #activation=activation, b=bhid_value)
            
    pretraining_fns = sda.pretraining_functions(sample, label, learning_rate, 
                                                corruption_level, momentum_rate)
        
    
    pl.clf()    
    def plotCurve(x, y, label):
        # Compute ROC curve and area the curve
        pl.plot(x, y, label=label)
        #pl.plot([0, 1], [0, 1], 'k--')
        #pl.xlim([0.0, 1.0])
        #pl.ylim([0.0, 1.0])
        #pl.xlabel('False Positive Rate')
        #pl.ylabel('True Positive Rate')
        #pl.title('Receiver operating characteristic')
        pl.legend(loc="lower right")
        #pl.show()        
        
    #sampleMatrix = generate_feature(trainData, total, generate_attr_vec_callback)
    x = []
    y = []
    y_cost = []
    for iteration in range(1, 5):            
        cost = train(recordParserCallback, generate_attr_vec_callback, pretraining_fns, 
              learning_rates, corruption_levels, momentumRate, training_epochs, 
              batch_size=iteration*batch_size, modulo=modulo, 
              stopping_fn=stop_after_mini_batch)
        x.append(iteration)
        y.append(cost)
        
        print "about to test..."    
        testCost = test(testFile, recordParserCallback, generate_attr_vec_callback, sda)
        y_cost.append(testCost)
        
    # plot without before feature learning
    #plot_transformed_vectors(testMatrix.toarray(), testDataLabel, title="before feature learning")
    #print "about to test..."    
    #test(testFile, recordParserCallback, generate_attr_vec_callback, sda)
     
    plotCurve(x, y, "train")
    plotCurve(x, y_cost, "test")
         
    pl.show()