def his_match_list(match_day): url = url_his_ml % match_day data = req(url) if data is None: print 'request his_match_list error' # raise error return item_ptn = re.compile(r'<tr height[^>]+>(.*?)</tr>') field_ptn = re.compile(r'<td[^>]*>(.*?)</td>') ret = [] for item in item_ptn.findall(data): m = field_ptn.findall(item) ret.append({ "match_id": utils.retrieve_id(m[-1]), "league": m[0], "home": utils.drop(m[3]), "visiting": utils.drop(m[5]), "match_time": m[1], "is_betting": utils.is_bet(m[-1]), "full_score": utils.drop_font(m[4]), "half_score": utils.drop_font(m[6]), "status": m[2], }) return ret
def load_pretrained_layers(self): # Current state of base state_dict = self.state_dict() param_names = list(state_dict.keys()) # Pretrained vgg16 pretrained_state_dict = torchvision.models.vgg16( pretrained=True).state_dict() pretrained_param_names = list(pretrained_state_dict.keys()) # Copy weights from pretrained model for i, param in enumerate(param_names[:-4]): state_dict[param] = pretrained_state_dict[ pretrained_param_names[i]] pretrained_fc6_weight = pretrained_state_dict[ 'classifier.0.weight'].view(4096, 512, 7, 7) pretrained_fc6_bias = pretrained_state_dict['classifier.0.bias'] state_dict['conv6.weight'] = drop(pretrained_fc6_weight, factors=[4, 1, 3, 3]) # (1024, 512, 3, 3) state_dict['conv6.bias'] = drop(pretrained_fc6_bias, factors=[4]) # 1024 pretrained_fc7_weight = pretrained_state_dict[ 'classifier.3.weight'].view(4096, 4096, 1, 1) pretrained_fc7_bias = pretrained_state_dict['classifier.3.bias'] state_dict['conv7.weight'] = drop(pretrained_fc7_weight, factors=[4, 4, 1, 1]) state_dict['conv7.bias'] = drop(pretrained_fc7_bias, factors=[4])
def delete(): if request.method == 'GET': id = int(request.args.get('id')) print id sql = 'delete from taokey where id=%d;' % id print sql if drop(id, sql): return redirect('/userlist/')
def load_from_file(filename, offset=None): grants_memo = load_memo_from_database(sdb_db.Grant) author_memo = load_memo_from_database(sdb_db.Author) institution_memo = load_memo_from_database(sdb_db.Institution) present_count = 0 added_count = 0 if offset: print 'starting at row %d' % offset with open(filename) as f, ManagedSession() as session: reader = DictReader(f, delimiter=",") for csv_fields in (reader if offset is None else drop(offset, reader)): grant = grant_from_csv(csv_fields) if grant.uuid() in grants_memo: present_count += 1 continue update_terms(grant) # print grant authors = authors_from_csv(csv_fields) if authors: mem_auths = [ memoized_row(author_memo, author) for author in authors ] # if len(mem_auths) != len(authors): # print 'fewer memoized authors!' # import pprint # print pprint.pprint(csv_fields) # if len(set(mem_auths)) != len(mem_auths): # print 'duplicate memoized authors!' # import pprint # print pprint.pprint(csv_fields) for author in set(mem_auths): grant.authors.append(author) institution = institution_from_csv(csv_fields) if institution: grant.institution = memoized_row(institution_memo, institution) session.add(grant) added_count += 1 grants_memo[grant.uuid()] = grant if (added_count % 1000 == 0): session.commit() print '%s more records added' % added_count session.commit() print '-----------------------' print '%s records were added' % added_count print '%s records already in the db' % present_count print '%s total records parsed' % (added_count + present_count)
def load_from_file(filename, offset=None): grants_memo = load_memo_from_database(sdb_db.Grant) author_memo = load_memo_from_database(sdb_db.Author) institution_memo = load_memo_from_database(sdb_db.Institution) present_count = 0 added_count = 0 if offset: print 'starting at row %d' % offset with open(filename) as f, ManagedSession() as session: reader = DictReader(f, delimiter=",") for csv_fields in (reader if offset is None else drop(offset, reader)): grant = grant_from_csv(csv_fields) if grant.uuid() in grants_memo: present_count += 1 continue update_terms(grant) # print grant authors = authors_from_csv(csv_fields) if authors: mem_auths = [memoized_row(author_memo, author) for author in authors] # if len(mem_auths) != len(authors): # print 'fewer memoized authors!' # import pprint # print pprint.pprint(csv_fields) # if len(set(mem_auths)) != len(mem_auths): # print 'duplicate memoized authors!' # import pprint # print pprint.pprint(csv_fields) for author in set(mem_auths): grant.authors.append(author) institution = institution_from_csv(csv_fields) if institution: grant.institution = memoized_row(institution_memo, institution) session.add(grant) added_count += 1 grants_memo[grant.uuid()] = grant if (added_count % 1000 == 0): session.commit() print '%s more records added' % added_count session.commit() print '-----------------------' print '%s records were added' % added_count print '%s records already in the db' % present_count print '%s total records parsed' % (added_count + present_count)
def __init__(self, rng, is_train, input_data, filter_shape, image_shape, ssample=(1, 1), bordermode='valid', p=0.5, alpha=0.0): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ assert image_shape[1] == filter_shape[1] # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) // numpy.prod(ssample)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv2d(input=input_data, filters=self.W, filter_shape=filter_shape, input_shape=image_shape, subsample=ssample, border_mode=bordermode) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height activated_output = T.nnet.relu(conv_out + self.b.dimshuffle('x', 0, 'x', 'x'), alpha=alpha) dropped_output = drop(activated_output, p) self.output = T.switch(T.neq(is_train, 0), dropped_output, p * activated_output) # store parameters of this layer self.params = [self.W, self.b] # keep track of model input self.input = input_data
def test_AllCNN_Models_DA_BN(use_bn=False, model='c', learning_rate=0.05, n_epochs=350, batch_size=200, L2_reg=0.001, input_ndo_p=0.8, layer_ndo_p=0.5, save_model=True, save_freq=50, s1=5, s2=5): """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type batch_size: int :param batch_size: the number of training examples per batch """ rng = numpy.random.RandomState(23455) datasets = load_data2(theano_shared=False) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] train_set_x = train_set_x.reshape(len(train_set_x),3,32,32) valid_set_x = valid_set_x.reshape(len(valid_set_x),3,32,32) test_set_x = test_set_x.reshape(len(test_set_x),3,32,32) train_set_x = numpy.asarray(train_set_x, dtype=theano.config.floatX) valid_set_x = numpy.asarray(valid_set_x, dtype=theano.config.floatX) test_set_x = numpy.asarray(test_set_x, dtype=theano.config.floatX) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.shape[0] n_valid_batches = valid_set_x.shape[0] n_test_batches = test_set_x.shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size print 'n_train_batches: ', n_train_batches print 'n_valid_batches: ', n_valid_batches print 'n_test_batches: ', n_test_batches learning_rate = numpy.asarray(learning_rate, dtype=numpy.float32) print 'learning_rate: ', learning_rate # allocate symbolic variables for the data #index = T.lscalar() # index to a [mini]batch lr = T.fscalar() training_enabled = T.iscalar('training_enabled') # start-snippet-1 x = T.tensor4('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') #layer0_input = x.reshape((batch_size, 3, 32, 32)) # drop the input only while training, don't drop while testing #dropout_input = T.switch(T.neq(training_enabled, 0), drop(layer0_input, p=input_ndo_p), input_ndo_p * layer0_input) dropout_input = T.switch(T.neq(training_enabled, 0), drop(x, p=input_ndo_p), input_ndo_p * x) classifier = None Model_Name = None if use_bn: if model == 'a': Model_Name = ModelA_AllCNN_BN elif model == 'b': Model_Name = ModelB_AllCNN_BN elif model == 'c': Model_Name = ModelC_AllCNN_BN else: raise RuntimeError('Invalid model parameter!') else: if model == 'a': Model_Name = ModelA_AllCNN elif model == 'b': Model_Name = ModelB_AllCNN elif model == 'c': Model_Name = ModelC_AllCNN else: raise RuntimeError('Invalid model parameter!') classifier = Model_Name(rng, dropout_input, y, batch_size, training_enabled, layer_ndo_p, L2_reg ) print 'Training Model: ', classifier.__class__.__name__ test_model = theano.function( [x, y], classifier.errors, givens={ training_enabled: numpy.cast['int32'](0) } ) validate_model = theano.function( [x, y], classifier.errors, givens={ training_enabled: numpy.cast['int32'](0) } ) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. momentum =theano.shared(numpy.cast[theano.config.floatX](0.9), name='momentum') updates = [] for param in classifier.params: param_update = theano.shared(param.get_value()*numpy.cast[theano.config.floatX](0.)) updates.append((param, param - lr * param_update)) updates.append((param_update, momentum*param_update + (numpy.cast[theano.config.floatX](1.) - momentum)*T.grad(classifier.cost, param))) train_model = theano.function( [x, y, lr], classifier.cost, updates=updates, givens={ training_enabled: numpy.cast['int32'](1) } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters # patience = 10000 # look as this many examples regardless # patience_increase = 2 # wait this much longer when a new best is found # improvement_threshold = 0.995 # a relative improvement of this much is considered significant # validation_frequency = min(n_train_batches, patience // 2) validation_frequency = n_train_batches // 2 best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False if use_bn: updateLRAfter = 100 else: updateLRAfter = 200 while (epoch < n_epochs) and (not done_looping): # shuffle data before starting the epoch epoch = epoch + 1 if(epoch > updateLRAfter): learning_rate *= 0.1 updateLRAfter += 50 for minibatch_index in range(n_train_batches): #print 'epoch: {0}, minibatch: {1}'.format(epoch, minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index # if iter % 50 == 0: # print('training @ iter = ', iter) train_x = augmentImages(train_set_x[minibatch_index * batch_size: (minibatch_index + 1) * batch_size], shift1=s1, shift2=s2) train_y = train_set_y[minibatch_index* batch_size: (minibatch_index + 1) * batch_size] cost_ij = train_model(train_x, train_y, learning_rate) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(valid_set_x[ii * batch_size: (ii + 1) * batch_size], valid_set_y[ii * batch_size: (ii + 1) * batch_size]) for ii in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough # if this_validation_loss < best_validation_loss * \ # improvement_threshold: # patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(test_set_x[ii * batch_size: (ii + 1) * batch_size], test_set_y[ii * batch_size: (ii + 1) * batch_size]) for ii in range(n_test_batches) ] test_score= numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # if patience <= iter: # done_looping = True # break if save_model and epoch % save_freq == 0: # add model name to the file to differentiate different models with gzip.open('parameters_epoch_{0}.pklz'.format(epoch), 'wb') as fp: cPickle.dump([param.get_value() for param in classifier.params], fp, protocol=2) end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.))
def test_ModelC_AllCNN(learning_rate=0.05, n_epochs=350, batch_size=200, L2_reg=0.001, input_ndo_p=0.8, layer_ndo_p=0.5, save_model=True, save_freq=50): """ :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type batch_size: int :param batch_size: the number of training examples per batch """ rng = numpy.random.RandomState(23455) datasets = load_data2() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size print 'n_train_batches: ', n_train_batches print 'n_valid_batches: ', n_valid_batches print 'n_test_batches: ', n_test_batches learning_rate = numpy.asarray(learning_rate, dtype=numpy.float32) print 'learning_rate: ', learning_rate # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch lr = T.fscalar() training_enabled = T.iscalar('training_enabled') # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') layer0_input = x.reshape((batch_size, 3, 32, 32)) # drop the input only while training, don't drop while testing dropout_input = T.switch(T.neq(training_enabled, 0), drop(layer0_input, p=input_ndo_p), input_ndo_p * layer0_input) layer0 = myConvLayer(rng, is_train=training_enabled, input_data=dropout_input, filter_shape=(96, 3, 3, 3), image_shape=(batch_size, 3, 32, 32), ssample=(1, 1), bordermode='half', p=1.0) layer1 = myConvLayer(rng, is_train=training_enabled, input_data=layer0.output, filter_shape=(96, 96, 3, 3), image_shape=(batch_size, 96, 32, 32), ssample=(1, 1), bordermode='half', p=1.0) layer2 = myConvLayer(rng, is_train=training_enabled, input_data=layer1.output, filter_shape=(96, 96, 3, 3), image_shape=(batch_size, 96, 32, 32), ssample=(2, 2), bordermode='half', p=layer_ndo_p) layer3 = myConvLayer(rng, is_train=training_enabled, input_data=layer2.output, filter_shape=(192, 96, 3, 3), image_shape=(batch_size, 96, 16, 16), ssample=(1, 1), bordermode='half', p=1.0) layer4 = myConvLayer(rng, is_train=training_enabled, input_data=layer3.output, filter_shape=(192, 192, 3, 3), image_shape=(batch_size, 192, 16, 16), ssample=(1, 1), bordermode='half', p=1.0) layer5 = myConvLayer(rng, is_train=training_enabled, input_data=layer4.output, filter_shape=(192, 192, 3, 3), image_shape=(batch_size, 192, 16, 16), ssample=(2, 2), bordermode='half', p=layer_ndo_p) layer6 = myConvLayer(rng, is_train=training_enabled, input_data=layer5.output, filter_shape=(192, 192, 3, 3), image_shape=(batch_size, 192, 8, 8), ssample=(1, 1), bordermode='half', p=1.0) layer7 = myConvLayer(rng, is_train=training_enabled, input_data=layer6.output, filter_shape=(192, 192, 1, 1), image_shape=(batch_size, 192, 8, 8), ssample=(1, 1), bordermode='half', p=1.0) layer8 = myConvLayer(rng, is_train=training_enabled, input_data=layer7.output, filter_shape=(10, 192, 1, 1), image_shape=(batch_size, 192, 8, 8), ssample=(1, 1), bordermode='half', p=1.0) # make sure this is what global averaging does global_average = layer8.output.mean(axis=(2, 3)) softmax_layer = SoftmaxWrapper(input_data=global_average, n_in=10, n_out=10) L2_sqr = ((layer0.W**2).sum() + (layer1.W**2).sum() + (layer2.W**2).sum() + (layer3.W**2).sum() + (layer4.W**2).sum() + (layer5.W**2).sum() + (layer6.W**2).sum() + (layer7.W**2).sum() + (layer8.W**2).sum()) # the cost we minimize during training is the NLL of the model cost = (softmax_layer.negative_log_likelihood(y) + L2_reg * L2_sqr) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], softmax_layer.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) validate_model = theano.function( [index], softmax_layer.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) # create a list of all model parameters to be fit by gradient descent params = layer8.params + layer7.params + layer6.params + layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. momentum = theano.shared(numpy.cast[theano.config.floatX](0.9), name='momentum') updates = [] for param in params: param_update = theano.shared(param.get_value() * numpy.cast[theano.config.floatX](0.)) updates.append((param, param - lr * param_update)) updates.append((param_update, momentum * param_update + (numpy.cast[theano.config.floatX](1.) - momentum) * T.grad(cost, param))) train_model = theano.function( [index, lr], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) }) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters # patience = 10000 # look as this many examples regardless # patience_increase = 2 # wait this much longer when a new best is found # improvement_threshold = 0.995 # a relative improvement of this much is considered significant # validation_frequency = min(n_train_batches, patience // 2) validation_frequency = n_train_batches // 2 best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False updateLRAfter = 200 while (epoch < n_epochs) and (not done_looping): # shuffle data before starting the epoch epoch = epoch + 1 if (epoch > updateLRAfter): learning_rate *= 0.1 updateLRAfter += 50 print 'epoch: ', epoch print 'updateLRAfter: ', updateLRAfter print 'learning_rate: ', learning_rate for minibatch_index in range(n_train_batches): #print 'epoch: {0}, minibatch: {1}'.format(epoch, minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 50 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index, learning_rate) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough # if this_validation_loss < best_validation_loss * \ # improvement_threshold: # patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) # if patience <= iter: # done_looping = True # break if save_model and epoch % save_freq == 0: # add model name to the file to differentiate different models with gzip.open('parameters_epoch_{0}.pklz'.format(epoch), 'wb') as fp: cPickle.dump([param.get_value() for param in params], fp, protocol=2) end_time = timeit.default_timer() print('Optimization complete.') print( 'Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), sys.stderr)
def test_drop(self): self.assertEqual(drop([1,2,3,4,5],3) , [4,5]) self.assertEqual(drop([1,2,3,4,5],6) , [])