Пример #1
0
def write_sens_for_reading(syns_wn, syns_ox, filename_output):
  for i_wn in range(len(syns_wn)):
    for i_ox in range(len(syns_ox)):
      defi_wn = syns_wn[i_wn].definition()
      defi_ox = syns_ox[str(i_ox)]["d"]
      value = defi_wn + "\t" + defi_ox
      FileProcess.append_value_to_file(value, filename_output)
Пример #2
0
def parse_ox_wn_defi_to_input(word):
  defis_wn = WordnetHandler.get_definitions_for_word(word)
  defis_ox = OxfordParser.get_definitions_of_word_for_svm(word)

  for defi_wn in defis_wn:
    for defi_ox in defis_ox:
      value = defi_wn + "\t" + defi_ox
      FileProcess.append_value_to_file(value, __filename_input_sen__)
Пример #3
0
def get_synset_gloss(synset, filename):
  result = ""
  for lemma in synset.lemmas():
    gloss = lemma.name().replace("_", " ")
    result += gloss + ". "

  result += synset.definition() + ". "
  for example in synset.examples():
    result += example + "."

  FileProcess.append_value_to_file(result, filename)

  for hypo in synset.hyponyms():
    get_synset_gloss(hypo, filename)
Пример #4
0
def create_input_sens_test(dict_ox):

  flag_can_go = False
  for word in dict_ox:

    if word == "blockage":
      flag_can_go = True

    if flag_can_go == False:
      continue

    if len(dict_ox[word]) == 0:
      continue

    defis_wn = WordnetHandler.get_definitions_for_word(word)
    defis_ox = OxfordParser.get_definitions_of_word_for_svm(word)

    if len(defis_ox) == 1 and len(defis_wn) == 1:
      continue

    if len(defis_ox) == 1 and len(defis_wn) > 1:
      all_defi_wn = ""
      for defi_wn in defis_wn:
        all_defi_wn += defi_wn + "\t"

      if all_defi_wn != "":
        all_defi_wn = all_defi_wn[:-1]
      for defi_wn in defis_wn:
        for defi_ox in defis_ox:
          value = defi_wn + "\t" + defi_ox + "\t" + all_defi_wn
          FileProcess.append_value_to_file(value, __filename_input_sen_test__)
    else:
      for defi_wn in defis_wn:
        all_defi_ox = ""
        for defi_ox in defis_ox:
          all_defi_ox += defi_ox + "\t"

        if all_defi_ox != "":
          all_defi_ox = all_defi_ox[:-1]

        for defi_ox in defis_ox:
          value = defi_wn + "\t" + defi_ox + "\t" + all_defi_ox
          FileProcess.append_value_to_file(value, __filename_input_sen_test__)
Пример #5
0
def cal_features_from_sens_write_to_file(filename_sens, filename_output):
  f = open(filename_sens,'r');
  line = f.readline();
  while (line):
    if len(line) > 0:

      feature_values = ""

      sens = line.split("\t")

      sen_1 = sens[0]
      sen_2 = sens[1]

      feature_values += str(Literal.levenshtein_in_context(sen_1, sen_2, sens)) + "\t"
#      feature_values += str(ShallowSyntactic.jaccard_POS_in_context(sen_1, sen_2, sens)) + "\t"
      feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 0))
#      feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 1))

      FileProcess.append_value_to_file(feature_values, filename_output)

      line = f.readline();

  f.close()
Пример #6
0
def create_input_sen_via_ox_vn(dict_vn, dict_ox):

  for word in dict_ox:
    if len(dict_ox[word]) == 0:
      continue

    if word in dict_vn:

      word_syns_vn = dict_vn[word]
      word_syns_ox = dict_ox[word]
      if len(word_syns_ox) == 1 and len(word_syns_ox) == 1:
        continue
      for i_vn in word_syns_vn:
        syn_vn = word_syns_vn[i_vn]

        all_defi_ox = ""
        for i_ox in word_syns_ox:
          syn_ox = word_syns_ox[i_ox]
          if "tv" not in syn_ox:
            continue
          defi_ox = syn_ox['d']
          all_defi_ox += defi_ox + "\t"

        flag_can_use = False
        for i_ox in word_syns_ox:
          syn_ox = word_syns_ox[i_ox]
          if "tv" not in syn_ox:
            continue
          if check_tv_similar(syn_vn['tv'], syn_ox['tv']) == 1:
            defi_vn = syn_vn['d']
            defi_ox = syn_ox['d']
            value = defi_vn + "\t" + defi_ox + all_defi_ox
            FileProcess.append_value_to_file(value, __filename_input_sen__)
            FileProcess.append_value_to_file("1", __filename_input_gs__)
            flag_can_use = True
          else:
            if flag_can_use == True:
              defi_vn = syn_vn['d']
              defi_ox = syn_ox['d']
              value = defi_vn + "\t" + defi_ox + all_defi_ox
              FileProcess.append_value_to_file(value, __filename_input_sen__)
              FileProcess.append_value_to_file("0", __filename_input_gs__)
Пример #7
0
def cal_features_and_write_to_file_for(syns_wn, syns_ox, filename_output):
  if len(syns_ox) == 1 and len(syns_wn) > 1:

    # cal all features between syns in ox with syn in wn
    syns_values_in_row = []
    for i_wn in range(len(syns_wn)):
      syn_wn = syns_wn[i_wn]
      syn_ox = syns_ox[str(0)]
      feature_values = cal_feature_values_for(syn_wn, syn_ox)
      syns_values_in_row.append(feature_values)

    # cal max values of each feature
    arr_root_values_of_feature = []
    for i_feature in range(len(syns_values_in_row[0])):
      root = root_values_of_a_feature_in_row(syns_values_in_row, i_feature)
      arr_root_values_of_feature.append(root)

    for i_wn in range(len(syns_wn)):

      # cal value for svm
      for i_ox in range(len(syns_ox)):
        feature_values_for_svm = ""
        feature_values_1_syn = syns_values_in_row[i_wn]
        for i_feature in range(len(feature_values_1_syn)):
          root_value = arr_root_values_of_feature[i_feature]
          feature_value = feature_values_1_syn[i_feature]
          feature_value_for_svm = feature_value/root_value
          feature_values_for_svm += str(feature_value_for_svm) + "\t"

        if feature_values_for_svm != "":
          feature_values_for_svm = feature_values_for_svm[:-1]

        FileProcess.append_value_to_file(feature_values_for_svm, filename_output)
  else:
    for i_wn in range(len(syns_wn)):

      # cal all features between syns in ox with syn in wn
      syns_values_in_row = []
      for i_ox in range(len(syns_ox)):
        syn_wn = syns_wn[i_wn]
        syn_ox = syns_ox[str(i_ox)]
        feature_values = cal_feature_values_for(syn_wn, syn_ox)
        syns_values_in_row.append(feature_values)

      # cal max values of each feature
      arr_root_values_of_feature = []
      for i_feature in range(len(syns_values_in_row[0])):
        root = root_values_of_a_feature_in_row(syns_values_in_row, i_feature)
        arr_root_values_of_feature.append(root)

      # cal value for svm
      for i_ox in range(len(syns_ox)):
        feature_values_for_svm = ""
        feature_values_1_syn = syns_values_in_row[i_ox]
        for i_feature in range(len(feature_values_1_syn)):
          root_value = arr_root_values_of_feature[i_feature]
          feature_value = feature_values_1_syn[i_feature]
          feature_value_for_svm = feature_value/root_value
          feature_values_for_svm += str(feature_value_for_svm) + "\t"

        if feature_values_for_svm != "":
          feature_values_for_svm = feature_values_for_svm[:-1]

        FileProcess.append_value_to_file(feature_values_for_svm, filename_output)
Пример #8
0
def write_label_for_svm(syns_wn, syns_ox, dict_gold):
  for i_wn in range(len(syns_wn)):
    for i_ox in range(len(syns_ox)):
      FileProcess.append_value_to_file(dict_gold[i_wn][i_ox], __filename_input_gs_train__)
Пример #9
0
def create_input_sen_via_gold_data(dict_vn, dict_ox, dict_gold):

  for word in dict_ox:

    if len(dict_ox[word]) == 0 or word not in dict_gold:
      continue

    if word == "blockage":
      return

    if word in dict_vn:
      word_syns_vn = dict_vn[word]
      word_syns_ox = dict_ox[word]

      if len(word_syns_ox) == 1 and len(word_syns_vn) == 1:
        continue

      if len(word_syns_ox) == 1 and len(word_syns_vn) > 1:
        all_defi_vn = ""
        for i_vn in word_syns_vn:
          syn_vn = word_syns_vn[i_vn]
          if "tv" not in syn_vn:
            continue
          defi_vn = syn_vn['d']
          all_defi_vn += defi_vn + "\t"

        if all_defi_vn != "":
          all_defi_vn = all_defi_vn[:-1]

        for i_vn in word_syns_vn:
          syn_vn = word_syns_vn[i_vn]


          for i_ox in word_syns_ox:
            syn_ox = word_syns_ox[i_ox]
            if "tv" not in syn_ox:
              continue

            defi_vn = syn_vn['d']
            defi_ox = syn_ox['d']
            value = defi_vn + "\t" + defi_ox + "\t" + all_defi_vn
            if dict_gold[word][int(i_vn)][int(i_ox)] == "1":
              FileProcess.append_value_to_file(value, __filename_input_sen__)
              FileProcess.append_value_to_file("1", __filename_input_gs__)
            else:
              FileProcess.append_value_to_file(value, __filename_input_sen__)
              FileProcess.append_value_to_file("0", __filename_input_gs__)
      else:
        for i_vn in word_syns_vn:
          syn_vn = word_syns_vn[i_vn]

          all_defi_ox = ""
          for i_ox in word_syns_ox:
            syn_ox = word_syns_ox[i_ox]
            if "tv" not in syn_ox:
              continue
            defi_ox = syn_ox['d']
            all_defi_ox += defi_ox + "\t"

          if all_defi_ox != "":
            all_defi_ox = all_defi_ox[:-1]

          for i_ox in word_syns_ox:
            syn_ox = word_syns_ox[i_ox]
            if "tv" not in syn_ox:
              continue

            defi_vn = syn_vn['d']
            defi_ox = syn_ox['d']
            value = defi_vn + "\t" + defi_ox + "\t" + all_defi_ox
            if dict_gold[word][int(i_vn)][int(i_ox)] == "1":
              FileProcess.append_value_to_file(value, __filename_input_sen__)
              FileProcess.append_value_to_file("1", __filename_input_gs__)
            else:
              FileProcess.append_value_to_file(value, __filename_input_sen__)
              FileProcess.append_value_to_file("0", __filename_input_gs__)