示例#1
0
class weak_learner:
    def __init__(self,  moniker, cache_name, passes):
        self.moniker=moniker
        self.model=VW(moniker=moniker, name= cache_name, passes=passes , csoaa=10)


    def train(self, instance_stream):
        with self.model.training():
            seen=0
            for instance in instance_stream:
                self.model.push_instance(instance)
                seen+=1
                if seen % 1000 ==0:
                    print 'setreamed %d instances already' % seen
            print 'streaming finished'
        print '%s: trained on %d data points' % (self.moniker, seen)

    def predict(self, instance_stream):
        #print '%s: predicting' % self.moniker
        instances = []
        seen=0
        
        with self.model.predicting():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                instances.append(instance)
                seen += 1
        print '%s: predicted for %d data points' % (self.moniker, seen)
        predictions = list(self.model.read_predictions_())
        if seen != len(predictions):
           raise Exception("Number of labels and predictions do not match!  (%d vs %d)" % \
                            (seen, len(predictions)))
        return  predictions[:len(predictions)]
示例#2
0
class SimpleModel(object):
    def __init__(self, moniker):
        self.moniker = moniker
        self.model = VW(moniker=moniker, \
                        **{'passes': 10,
                           'learning_rate': 15,
                           'power_t': 1.0, })

    def train(self, instance_stream):
        """
        Trains the model on the given data stream.
        """
        print '%s: training' % (self.moniker)
        with self.model.training():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                seen += 1
                if seen % 10000 == 0:
                    print 'streamed %d instances...' % seen
            print 'done streaming.'
        print '%s: trained on %d data points' % (self.moniker, seen)
        return self

    def predict_library(self, instance_stream):
        print '%s: predicting' % self.moniker
        with self.model.predicting_library():
            seen = 0
            for instance in instance_stream:
                yield instance, self.model.push_instance(instance)
                seen += 1

        print '%s: predicted for %d data points' % (self.moniker, seen)

    def predict(self, instance_stream):
        print '%s: predicting' % self.moniker
        instances = []
        with self.model.predicting():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                instances.append(instance)
                seen += 1

        print '%s: predicted for %d data points' % (self.moniker, seen)
        predictions = list(self.model.read_predictions_())
        if seen != len(predictions):
            raise Exception("Number of labels and predictions do not match!  (%d vs %d)" % \
                (seen, len(predictions)))
        return itertools.izip(instances, predictions)
示例#3
0
class SimpleModel(object):
    def __init__(self, moniker):
        self.moniker = moniker
        self.model = VW(moniker=moniker, \
                        **{'passes': 10,
                           'learning_rate': 15,
                           'power_t': 1.0, })

    def train(self, instance_stream):
        """
        Trains the model on the given data stream.
        """
        print('%s: training' % (self.moniker))
        with self.model.training():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                seen += 1
                if seen % 10000 == 0:
                    print('streamed %d instances...' % seen)
            print('done streaming.')
        print('%s: trained on %d data points' % (self.moniker, seen))
        return self

    def predict_library(self, instance_stream):
        print('%s: predicting' % self.moniker)
        with self.model.predicting_library():
            seen = 0
            for instance in instance_stream:
                yield instance, self.model.push_instance(instance)
                seen += 1

        print('%s: predicted for %d data points' % (self.moniker, seen))

    def predict(self, instance_stream):
        print('%s: predicting' % self.moniker)
        instances = []
        with self.model.predicting():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                instances.append(instance)
                seen += 1

        print('%s: predicted for %d data points' % (self.moniker, seen))
        predictions = list(self.model.read_predictions_())
        if seen != len(predictions):
            raise Exception("Number of labels and predictions do not match!  (%d vs %d)" % \
                (seen, len(predictions)))
        return zip(instances, predictions)
示例#4
0
 def __init__(self, moniker):
     self.moniker = moniker
     self.log = VPLogger()
     self.model = VW(vw = "/usr/local/bin/vw", \
                     moniker=moniker, \
                     logger=self.log, \
                     **{'passes': 10,
                        'learning_rate': 15,
                        'power_t': 1.0, })
示例#5
0
class SimpleModel(object):
    def __init__(self, moniker):
        self.moniker = moniker
        self.log = VPLogger()
        self.model = VW(vw = "/usr/local/bin/vw", \
                        moniker=moniker, \
                        logger=self.log, \
                        **{'passes': 10,
                           'learning_rate': 15,
                           'power_t': 1.0, })

    def train(self, instance_stream):
        """
        Trains the model on the given data stream.
        """
        self.log.info('%s: training' % (self.moniker))
        with self.model.training():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                seen += 1
                if seen % 10000 == 0:
                    self.log.debug('streamed %d instances...' % seen)
            self.log.debug('done streaming.')
        self.log.info('%s: trained on %d data points' % (self.moniker, seen))
        return self

    def predict(self, instance_stream):
        self.log.info('%s: predicting' % self.moniker)
        instances = []
        with self.model.predicting():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                instances.append(instance)
                seen += 1

        self.log.info('%s: predicted for %d data points' % (self.moniker, seen))
        predictions = list(self.model.read_predictions_())
        if seen != len(predictions):
            raise Exception("Number of labels and predictions do not match!  (%d vs %d)" % \
                (seen, len(predictions)))
        return itertools.izip(instances, predictions)
示例#6
0
 def __init__(self,  moniker, cache_name, passes):
     self.moniker=moniker
     self.model=VW(moniker=moniker, name= cache_name, passes=passes , csoaa=10)
示例#7
0
 def __init__(self, moniker):
     self.moniker = moniker
     self.model = VW(moniker=moniker, \
                     **{'passes': 10,
                        'learning_rate': 15,
                        'power_t': 1.0, })
示例#8
0
from vowpal_porpoise import VW

# Initialize the model
vw = VW(moniker='test',    # a name for the model
        passes=10,         # vw arg: passes
        loss='quadratic',  # vw arg: loss
        learning_rate=10,  # vw arg: learning_rate
        l1=0.01)           # vw arg: l1

# Inside the with training() block a vw process will be 
# open to communication
with vw.training():
    for instance in ['1 |big red square',\
                      '0 |small blue circle']:
        vw.push_instance(instance)

    # here stdin will close
# here the vw process will have finished

# Inside the with predicting() block we can stream instances and 
# acquire their labels
with vw.predicting():
    for instance in ['1 |large burnt sienna rhombus',\
                      '0 |little teal oval']:
        vw.push_instance(instance)

# Read the predictions like this:
predictions = list(vw.read_predictions_())
示例#9
0
 def __init__(self, moniker):
     self.moniker = moniker
     self.model = VW(moniker=moniker, \
                     **{'passes': 10,
                        'learning_rate': 15,
                        'power_t': 1.0, })
示例#10
0
 def __init__(self,  moniker, path, passes,rounds = 5):
     self.T = rounds
     self.moniker=moniker
     self.wlearner = []
     self.alpha = np.zeros(rounds)
     self.model=VW(moniker=moniker, name= 'cache_d', passes=passes , csoaa=10)
示例#11
0
class adaboostMM:
    def __init__(self,  moniker, path, passes,rounds = 5):
        self.T = rounds
        self.moniker=moniker
        self.wlearner = []
        self.alpha = np.zeros(rounds)
        self.model=VW(moniker=moniker, name= 'cache_d', passes=passes , csoaa=10)
    
    
    '''MNIST_DATA is a list of strings'''
    def fit(self, MNIST_DATA,Y):
        k = np.unique(Y)
        print k

        m = np.size(MNIST_DATA)
   
        '''In our case, the k is 10 for MNIST data set'''
        f = np.zeros((m, len(k)))
        C = np.zeros((m, len(k)))
        #vw_cost is the cost matrix in vowpal wabbit conpatibel version
        


        for t in range(self.T):
            '''choose cost matrix C'''
            # set values where l != yi
            #C = np.exp(f - np.choose(Y, f.T)[:, np.newaxis])

            for i in range(m):
                for l in range(len(k)):
                    C[i,l]=np.exp(f[i,l]-f[i,Y[i]])


            # set values where l == yi
            C[np.array(range(m)), Y] = 0
            print 'the first line after making zero is ',C[0,:]
            d_sum = np.sum(C, axis = 1)
            C[np.array(range(m)), Y] = -d_sum
            print 'd_sum is ', d_sum
            print 'the first line is ',C[0,:]
            
            #for x in csoaa_data:
            #     tempfile.write(str(x))
            # break

            #csoaa is a list of strings with the format vw takes 
            csoaa_data=self.transform(C,MNIST_DATA)

            for i in range(1000,1050):
                print i
                print Y[i]
                print 'csoaa format is ', csoaa_data[i]

            print 'current t is ', t
            
            #call vowpal wabbit for training a weak classifier.
            self.wlearner.append(self.train(csoaa_data))
            #_, prediction_file = tempfile.mkstemp(dir='.', prefix=self.model.get_prediction_file())
            temp_htx = self.wlearner[t].predict(csoaa_data)
            #htx is an array of prediction across the whole data in integer format
            htx=[int(i) for i in temp_htx]
            
            #calculate delta using the predicions, cost matrix and f
            delta = -np.sum(C[np.array(range(m)), np.array(htx)-1])/(np.sum(d_sum))
            
            #calculate alpha
            self.alpha[t] = 0.5 * np.log(1.0 * (1 + delta) / (1 - delta))
            
            #update f matrix
            for i in range(m):
                for l in range(len(k)):
                    f[i,l] = f[i,l] + self.alpha[t] * (htx[i]==(l+1))
            '''
            ind_vec_htx = np.zeros_like(f) 
            ind_vec_htx[np.array(range(m)), np.array(htx)-1] = self.alpha[t]
            print 'ALPHA', self.alpha[t]
            f += ind_vec_htx
            '''
            print f[0,:], htx[0]
            print 'current round data', float(sum(htx==(Y+1)))/m
    
    '''vw_mnist is a list type and COST_MATRIX is a ndarray type'''
    def transform(self, COST_MATRIX, vw_mnist):
        n_samples, n_features = np.shape(COST_MATRIX)
        result = []
        for i in range(n_samples):
            tuple_exampe=vw_mnist[i].split('| ')
            feature_value=tuple_exampe[1]
            vw_csoaa_example=' '.join([' '.join([str(j+1)+':'+`COST_MATRIX[i,j]` for j in range(n_features)]),'|',feature_value])
            result.append(vw_csoaa_example)

        return result
    
    def train(self, instance_stream):
        with self.model.training():
            seen=0
            for instance in instance_stream:
                self.model.push_instance(instance)
                seen+=1
                if seen % 1000 ==0:
                    print 'setreamed %d instances already' % seen
            print 'streaming finished'
        print '%s: trained on %d data points' % (self.moniker, seen)
        return self
    

    def predict(self, instance_stream):
        #print '%s: predicting' % self.moniker
        instances = []
        seen=0
        
        with self.model.predicting():
            seen = 0
            for instance in instance_stream:
                self.model.push_instance(instance)
                instances.append(instance)
                seen += 1
        print '%s: predicted for %d data points' % (self.moniker, seen)
        predictions = list(self.model.read_predictions_())
        if seen != len(predictions):
           raise Exception("Number of labels and predictions do not match!  (%d vs %d)" % \
                            (seen, len(predictions)))
        return  predictions[:len(predictions)]
    

    def single_predict(self, instance):
        instances = []
        with self.model.predicting():
            self.model.push_instance(instance)
            instances.append(instance)
        prediction = list(self.model.read_predictions_())
        return  prediction



    def read_MnistFile(self, file_path):
        examples=open(file_path,"r")
        mnist_after=[]
        examples_no=0
        for example in examples:
            mnist_after.append(example)
            examples_no+=1
        examples.close()

        examples=open(file_path,"r")
        class_set=np.zeros(examples_no,dtype=int)
        m=0
        for ex in examples:
            class_set[m]= ord(ex[0])-48
            m+=1
        examples.close()
        return (mnist_after,class_set)


    '''For this case, we have 10 classes <1...10>'''
    def ada_classifier(self, example):
        result=[self.F_T(example,i) for i in range(1,11)]
        print 'before choos the argmax', result
        return np.argmax(result)+1

    '''Output weighted combination of weak classifier F_T'''
    def F_T(self, example, class_ass):
        result=0
        for t in range(self.T):
            naive_result=self.wlearner[t].single_predict(example)
            result+=self.alpha[t]*(int(naive_result[0])==class_ass)
            print 'result is ', result, int(naive_result[0])
        return result


    def test_adaboost(self, file_path):
        y_est=[]
        examples=open(file_path,"r")
        for example in examples:
            y_est.append(self.ada_classifier(example))
            print 'class as ',self.ada_classifier(example)
        #accuracy_rate=float(sum(y_est==list(label)))/len(Y)
        return y_est

    def test(self, file_path):
        examples=open(file_path,"r")
        print self.wlearner[0].predict(examples)
        return self

    def test_naive(self, file_path):
        examples=open(file_path,"r")
        for example in examples:
            print self.wlearner[0].single_predict(example)
            print self.wlearner[1].single_predict(example)
        return self