Пример #1
0
def create_main_feature(qid, word, token, text, entities, tags):
  tmp_bow = {}
  tmp_bow['qid'] = qid
  tmp_bow['TIENE_RAIZ'] = has_root(word)
  tmp_bow['FULL_MAYUSCULAS'] = full_upper(word)
  tmp_bow['FULL_MINUSCULAS'] = full_lower(word)
  tmp_bow['INICIO_MAYUSCULAS_RESTO_MINUSCULAS'] = is_capitalized(word)
  # print word.encode('utf-8')
  cat = sin_cat(word, tags)
  tmp_bow['CAT_SINTACTICA_' + cat] = 1
  tmp_bow['PALABRA_'+word] = 1
  tmp_bow['PALABRA_LARGO'] = word_len(word)
  tmp_bow['ES_TOKEN'] = token
  return tmp_bow
Пример #2
0
 def test_is_lower(self):
     word_for_test = "was"
     result = full_lower(word_for_test)
     self.assertEqual(1, result)