Exemplo n.º 1
0
def sim_ox_wn_defi_WDS_via_defi_of_curr_main_syn(word):
  dict_vectors_wn = WordnetParseDefinition.get_dict_vectors_synsets_for_word(word)
  (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn)

  dict_vectors_wn_defi = WordnetParseDefinition.get_vectors_defi_for_word(word)
  (keys_wn_defi, vectors_wn_defi) = Util.get_keys_values_of_dict(dict_vectors_wn_defi)


  definitions = OxfordParser.get_definitions_of_word(word)

  m2d_sim = [[0 for x in range(len(definitions))] for x in range(len(vectors_wn))]

  for i in range(len(vectors_wn)):
    vector_wn = vectors_wn[i]
    vector_wn_defi = vectors_wn_defi[i]

    dict_vectors_ox = OxParseDefinition.get_dict_vectors_synsets_for_word(word, vector_wn_defi)
    (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox)

    for j in range(len(vectors_ox)):
      vector_ox = vectors_ox[j]
      m2d_sim[i][j] = sim_2_vector(vector_ox, vector_wn)

  cal_sim_ngrams(word)

  return m2d_sim
Exemplo n.º 2
0
def sim_ox_wn_value_main_synsets(word):
  dict_vectors_wn = WordnetParseDefinition.get_dict_vectors_value_for(word)
  synsets_wn = WordnetHandler.get_synsets_for_word(word,'n')
  dict_vectors_ox = OxParseDefinition.get_vectors_value_for_word(word, synsets_wn)

  (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn)
  (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox)

  m2d_sim_defi_temp =  sim_ox_wn_defi_WDS_via_main_syns(word)
  DebugHandler.print_2d_matrix(m2d_sim_defi_temp)

  m2d_sim_defi = [[0 for x in range(len(vectors_wn))] for x in range(len(vectors_ox))]
  for i in range(len(vectors_wn)):
    for j in range(len(vectors_ox)):
      m2d_sim_defi[j][i] = m2d_sim_defi_temp[i][j]

  m2d_sim = [[0 for x in range(len(vectors_ox))] for x in range(len(vectors_wn))]
  for i in range(len(vectors_wn)):
    vector_wn = vectors_wn[i]
    print vector_wn
    for j in range(len(vectors_ox)):
      vector_ox = vectors_ox[j]
      cosine = spatial.distance.cosine(m2d_sim_defi[j], vector_wn)
      m2d_sim[i][j] = cosine

  print "\n"
  for j in range(len(vectors_ox)):
    vector_ox = vectors_ox[j]
    print vector_ox
  return m2d_sim
Exemplo n.º 3
0
def sim_ox_wn_defi_WDS_via_main_syns(word):
  dict_vectors_wn = WordnetParseDefinition.get_dict_vectors_synsets_for_word(word)
  synsets_wn = WordnetHandler.get_synsets_for_word(word,'n')
  dict_vectors_ox = OxParseDefinition.get_dict_vectors_synsets_for_word(word, synsets_wn)
#
  (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn)
  (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox)
#
  m2d_sim = sim_wn_ox_vector(vectors_ox, vectors_wn)
#
  cal_sim_ngrams(word)

# write to file
#  # - - - - - - - - - - - - - - - - - - - - - - - - -
#  for i in range(len(keys_wn)):
#    m2d_sim[i].insert(0,keys_wn[i]);
#  # - - - - - - - - - - - - - - - - - - - - - - - - -
#  # row
#  row_dict = [];
#  row_dict.append(word);
#  for i in range(len(keys_ox)):
#    row_dict.append(keys_ox[i].encode('utf8'));
#  # - - - - - - - - - - - - - - - - - - - - - - - - -
#  filename = 'Results/vector_definition/' + word + '.csv'
#  FileProcess.append_to_excel_file(filename, row_dict, m2d_sim)
#  # - - - - - - - - - - - - - - - - - - - - - - - - -

  return m2d_sim
Exemplo n.º 4
0
def sim_ox_wn_defi_WDS_via_1_main_syn(word):
  dict_vectors_wn = WordnetParseDefinition.get_dict_vectors_synsets_for_word(word)
  synsets_wn = WordnetHandler.get_synsets_for_word(word,'n')
  dict_vectors_ox = OxParseDefinition.get_dict_vectors_synsets_for_word(word, synsets_wn)

  (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn)
  (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox)

  m2d_sim = sim_wn_ox_vector(vectors_ox, vectors_wn)

  return m2d_sim
Exemplo n.º 5
0
def sim_ox_wn_defi_WDS_via_main_syns_for_reduce(synsets_wn, status_synsets_wn, word):
  dict_vectors_wn = WordnetParseDefinition.get_dict_vectores_synsets_for_synsets(synsets_wn)
  synsets_reduce_wn = list(synsets_wn)
  for i in reversed(range(len(synsets_reduce_wn))):
    if status_synsets_wn[i] == 0:
      del synsets_reduce_wn[i]

  dict_vectors_ox = OxParseDefinition.get_dict_vectors_synsets_for_word(word, synsets_reduce_wn)

  (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(dict_vectors_wn)
  (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(dict_vectors_ox)

  m2d_sim = sim_wn_ox_vector_reduce(vectors_ox, vectors_wn, status_synsets_wn)

  return m2d_sim
Exemplo n.º 6
0
def sim_ox_wn_defi_WDS_via_align_all(word):
  words_wn = WordnetParseDefinition.get_dict_vectors_words_for_word(word)
  (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(words_wn)
  words_ox = OxParseDefinition.get_dict_vectors_word_for_word(word)
  (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(words_ox)

  m2d_sim = [[0 for x in range(len(keys_ox))] for x in range(len(keys_wn))]

  for i in range(len(keys_wn)):
    vector_wn = vectors_wn[i]
    words_ox = []
    for j in range(len(keys_ox)):
      words_ox += vectors_ox[j]
    synsets_wn = WordnetHandler.get_nearest_synsets_words_words_order(vector_wn, words_ox)

    for j in range(len(keys_ox)):
      synsets_ox = WordnetHandler.get_nearest_synsets_words_synsets_order(vectors_ox[j], synsets_wn)
      m2d_sim[i][j] = sim_2_vector(synsets_wn, synsets_ox)

  cal_sim_ngrams(word)

  return m2d_sim
Exemplo n.º 7
0
def sim_ox_wn_defi_WDS_via_align(word):
  words_wn = WordnetParseDefinition.get_dict_vectors_words_for_word(word)
  (keys_wn, vectors_wn) = Util.get_keys_values_of_dict(words_wn)
  words_ox = OxParseDefinition.get_dict_vectors_word_for_word(word)
  (keys_ox, vectors_ox) = Util.get_keys_values_of_dict(words_ox)

  synsets_wn = WordnetHandler.get_synsets_for_word(word, 'n')

  m2d_sim = [[0 for x in range(len(keys_ox))] for x in range(len(keys_wn))]

  for i in range(len(keys_wn)):
    vector_wn = vectors_wn[i]
    synset_wn = synsets_wn[i]
    for j in range(len(keys_ox)):
      vector_ox = vectors_ox[j]
      m2d_sim[i][j] = WordnetHandler.sim_for_words_words_no_order(vector_ox, vector_wn, synset_wn)
#      (vector_1, vector_2) = WordnetHandler.get_nearest_synsets_words_words_noorder(vector_ox, vector_wn)
#      m2d_sim[i][j] = sim_2_vector(vector_1, vector_2)

  cal_sim_ngrams(word)

  return m2d_sim
Exemplo n.º 8
0
def cal_sim_ngrams(word):
  dict_gloss_wn = WordnetParseDefinition.get_gloss_for_jacc(word)
  (key_wn, wn_gloss) = Util.get_keys_values_of_dict(dict_gloss_wn)
  ox_gloss = OxfordParser.get_definitions_of_word_for_jacc(word)
  cal_n_grams_sim(word, wn_gloss, ox_gloss, 2)
  cal_n_grams_sim(word, wn_gloss, ox_gloss, 3)
  cal_n_grams_sim(word, wn_gloss, ox_gloss, 4)

  for i in range(len(wn_gloss)):
    wn_gloss[i] = wn_gloss[i].replace(word, "")
  for i in range(len(ox_gloss)):
    ox_gloss[i] = ox_gloss[i].replace(word, "")
  cal_jacc_sim(word, wn_gloss, ox_gloss)