Пример #1
0
num_training_distinct = 16000
numIterations = 100
numTrainingPairs = 30

import time
t0 = time.time()
data_d, data_model, header = init(inputFile)


print "importing data ..."

if os.path.exists('learned_settings.json') :
  data_model, predicates = core.readSettings('learned_settings.json')
else:
  #lets do some active learning here
  training_data, training_pairs, data_model = activeLearning(data_d, data_model, consoleLabel, numTrainingPairs)

  predicates = trainBlocking(training_pairs,
                             (wholeFieldPredicate,
                              tokenFieldPredicate,
                              commonIntegerPredicate,
                              sameThreeCharStartPredicate,
                              sameFiveCharStartPredicate,
                              sameSevenCharStartPredicate,
                              nearIntegersPredicate,
                              commonFourGram,
                              commonSixGram),
                             data_model, 1, 1)

  core.writeSettings('learned_settings.json',
                     data_model,
Пример #2
0
num_training_distinct = 16000
numIterations = 100
numTrainingPairs = 30

import time
t0 = time.time()
data_d, data_model, header = init(inputFile)


print "importing data ..."

if os.path.exists('learned_settings.json') :
  data_model, predicates = core.readSettings('learned_settings.json')
else:
  #lets do some active learning here
  training_data, training_pairs, data_model = activeLearning(sampleDict(data_d, 700), data_model, consoleLabel, numTrainingPairs)

  predicates = trainBlocking(training_pairs,
                             (wholeFieldPredicate,
                              tokenFieldPredicate,
                              commonIntegerPredicate,
                              sameThreeCharStartPredicate,
                              sameFiveCharStartPredicate,
                              sameSevenCharStartPredicate,
                              nearIntegersPredicate,
                              commonFourGram,
                              commonSixGram),
                             data_model, 1, 1)

  core.writeSettings('learned_settings.json',
                     data_model,