Exemplo n.º 1
0
def create_main_feature(qid, word, token, text, entities, tags):
  tmp_bow = {}
  tmp_bow['qid'] = qid
  tmp_bow['TIENE_RAIZ'] = has_root(word)
  tmp_bow['FULL_MAYUSCULAS'] = full_upper(word)
  tmp_bow['FULL_MINUSCULAS'] = full_lower(word)
  tmp_bow['INICIO_MAYUSCULAS_RESTO_MINUSCULAS'] = is_capitalized(word)
  # print word.encode('utf-8')
  cat = sin_cat(word, tags)
  tmp_bow['CAT_SINTACTICA_' + cat] = 1
  tmp_bow['PALABRA_'+word] = 1
  tmp_bow['PALABRA_LARGO'] = word_len(word)
  tmp_bow['ES_TOKEN'] = token
  return tmp_bow
Exemplo n.º 2
0
 def test_is_not_capitalized(self):
     word_for_test = "WAs"
     result = is_capitalized(word_for_test)
     self.assertEqual(0, result)
Exemplo n.º 3
0
 def test_is_capitalized_one_letter(self):
     word_for_test = "I"
     result = is_capitalized(word_for_test)
     self.assertEqual(1, result)