Пример #1
0
def parse_datatype(current, infile):
    '''parse the datatype and return Trit object that identifies the datatype
    '''
    if isinstance (current, Keyword):
        if current.name == "trit":
            return (nextToken(infile), "trit", 0)
        next =  compareKeywords(current, Keyword("trit_vector"), infile)

        valueOne = compareTokens(next, Token("("), infile)

        if not isinstance(valueOne, Literal):
            printError(valueOne, Literal("integer"))
        elif valueOne.value <= 0:
            printError(valueOne, Literal("greater than zero"))
        valueTwo = nextToken(infile)

        length = valueOne.value + 1

        valueThree = compareTokens(valueTwo, Keyword("downto"), infile)
        if not isinstance(valueThree, Literal):
            printError(valueThree, Literal("integer"))
        elif valueThree.value != 0:
            printError(valueThree, Literal(0))
        valueFour = nextToken(infile)
        
        valueFive = compareTokens(valueFour, Token(")"), infile)
        
        # construct datatype object for trit_vector and return it
        # along with the next token
        return (valueFive, "trit_vector", length)

    printError(current, Keyword("trit|trit_vector"))
Пример #2
0
def tokenizeNumber(infile, value):
    '''tokenizeNumber: identify the next integer in the file
    '''
    next = infile.read(1)
    if next.isdigit():
        value = value + next
        return tokenizeNumber(infile, value)
    else:
        infile.seek(infile.tell() - 1)
        return Literal(str(value))
Пример #3
0
def cal_features_from_sens_write_to_file(filename_sens, filename_output):
  f = open(filename_sens,'r');
  line = f.readline();
  while (line):
    if len(line) > 0:

      feature_values = ""

      sens = line.split("\t")

      sen_1 = sens[0]
      sen_2 = sens[1]

      feature_values += str(Literal.levenshtein_in_context(sen_1, sen_2, sens)) + "\t"
#      feature_values += str(ShallowSyntactic.jaccard_POS_in_context(sen_1, sen_2, sens)) + "\t"
      feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 0))
#      feature_values += str(WordnetBased.wordnet_based_in_context(sen_1, sen_2, sens, 1))

      FileProcess.append_value_to_file(feature_values, filename_output)

      line = f.readline();

  f.close()
Пример #4
0
def cal_feature_values_for(syn_wn, syn_ox):
  feature_values = []

  defi_wn = WordnetHandler.get_defi_for_syn(syn_wn)
  defi_ox = OxfordParser.get_defi_for_syn(syn_ox)

  gloss_wn = WordnetHandler.get_gloss_for_syn(syn_wn)
  gloss_ox = OxfordParser.get_gloss_for_syn(syn_ox)

  lemma_wn = WordnetHandler.get_lemma_for_synset(syn_wn)
  sd_ox = OxfordParser.get_short_defi_for_syn(syn_ox)

  ex_wn = WordnetHandler.get_ex_for_syn(syn_wn)
  ex_ox = OxfordParser.get_ex_for_syn(syn_ox)

  cl_ox =  OxfordParser.get_collocation_for_syn(syn_ox)
  hyper_wn = WordnetHandler.get_hyper_defi_for_synset(syn_wn)
  mero_wn = WordnetHandler.get_mero_defi_for_synset(syn_wn)

  # # # # # # # # # # # # # # # # #
  # Literal
  literal_leven_value = 1-Literal.levenshtein(defi_wn, defi_ox)
  feature_values.append(literal_leven_value)

  literal_jacc_value = 1.00001-Literal.jaccard(defi_wn, defi_ox)
  feature_values.append(literal_jacc_value)
#  feature_values.append(literal_jacc_value+literal_leven_value)

  # # # # # # # # # #

  literal_leven_value = 1-Literal.levenshtein(gloss_wn, gloss_ox)
  feature_values.append(literal_leven_value)

  literal_jacc_value = 1.00001-Literal.jaccard(gloss_wn, gloss_ox)
  feature_values.append(literal_jacc_value)
#  feature_values.append(literal_jacc_value+literal_leven_value)

  # # # # # # # # # #

  literal_leven_ngram = literal_leven_value
  literal_jacc_ngram = literal_jacc_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 2)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 3)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 4)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  ngrams_value = Ngrams.ngrams_word_for(gloss_wn, gloss_ox, 5)
  literal_jacc_ngram += ngrams_value
  literal_leven_ngram += ngrams_value

  feature_values.append(literal_jacc_ngram)
#  feature_values.append(literal_leven_ngram)

  # # # # # # # # # #

#  gloss_split_wn = Literal.split_and_stem(gloss_wn)
#  gloss_split_ox = Literal.split_and_stem(gloss_ox)
#  literal_jaro_winkler = Jelly.jaro_winkler(gloss_wn, gloss_ox)
#  feature_values.append(literal_jaro_winkler + literal_jacc_value)

  # # # # # # # # # #

#  literal_jacc_value = 1.00001-Literal.jaccard(ex_wn, ex_ox)
#  feature_values.append(literal_jacc_value)

  # # # # # # # # # # # # # # # # #
  # ShallowSyntactic

#  shallow_jaccard_POS = 0
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS(gloss_wn, gloss_ox)
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 2)
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 3)
#  shallow_jaccard_POS += 1.0001 - ShallowSyntactic.jaccard_POS_ngrams(gloss_wn, gloss_ox, 4)
#  feature_values.append(shallow_jaccard_POS)

  # # # # # # # # # # # # # # # # #
  # wordnet-based, WSD

  wn_value = WordnetBased.wordnet_based(defi_wn, defi_ox, 0)
  feature_values.append(wn_value)

#  wn_value = WordnetBased.wordnet_based(hyper_wn, defi_ox, 0)
#  feature_values.append(wn_value)

#  hypo_value = 0
#  if len(syn_wn.hyponyms()) > 0:
#    for hypo in syn_wn.hyponyms():
#      hypo_value += WordnetBased.wordnet_based_synset(hypo, defi_ox)
#    hypo_value /= len(syn_wn.hyponyms())
#  feature_values.append(hypo_value)

#  hyper_value = 0
#  if len(syn_wn.hypernyms()) > 0:
#    for hyper in syn_wn.hypernyms():
#      hyper_value += WordnetBased.wordnet_based_synset(hyper, defi_ox)
#    hyper_value /= len(syn_wn.hypernyms())
#  feature_values.append(hyper_value)
#
#  wn_value = WordnetBased.wordnet_based(ex_wn, ex_ox,0)
#  feature_values.append(wn_value)
#
#  wn_value_1 = WordnetBased.wordnet_based(defi_wn, defi_ox, 1)
#  feature_values.append(wn_value + wn_value_1)
#
#  wn_value = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 0)
#  feature_values.append(wn_value)
#
#  wn_value_1 = WordnetBased.wordnet_based(gloss_wn, gloss_ox, 1)
#  feature_values.append(wn_value + wn_value_1)

  # # # # # # # # # # # # # # # # #
  # lsa
#  lsa_tfidf = LSA.sim_tfidf(defi_wn, defi_ox)
#  feature_values.append(lsa_tfidf)
##
#  lsa_tfidf = LSA.sim_tfidf(hyper_wn, defi_ox)
#  feature_values.append(lsa_tfidf)
#
#  lsa_tfidf = LSA.sim_tfidf(gloss_wn, gloss_ox)
#  feature_values.append(lsa_tfidf)

#  lsa_tfidf = LSA.sim_tfidf(lemma_wn, sd_ox)
#  feature_values.append(lsa_tfidf)
#
#  lsa_tfidf = LSA.sim_tfidf(ex_wn, ex_ox)
#  feature_values.append(lsa_tfidf)

  return feature_values
Пример #5
0
def interpret():
    #keep track of values

    proof = []

    #line by line values
    indent = 0
    last_indent = 0

    #error checking
    paren = 0
    incomplete = False

    for lnum, line in enumerate(lines):
        indent = 0

        #error checking?

        #parsing
        if line[-1] == '\n':
            line = line[:-1]

        while line[0] == '\t':
            indent += 1
            line = line[1:]

        line = line.split(' ')
        while '' in line:
            line.remove('')

        parts = []
        for i, part in enumerate(line):
            part = part.split('(')
            for x in xrange(len(part) - 1):
                part.insert(2 * x, '(')

            for j, p in enumerate(part):
                p = p.split(')')
                for x in xrange(len(p) - 1):
                    p.insert(2 * len(p) + 1, ')')

                for l in p:
                    if l != '':
                        parts.append(l)

        print parts

        if parts[0] == '#':
            proof.append(Literal.Literal(parts[1:], 0, False))
        elif parts[0] == '-':
            proof.append(Literal.Literal(parts[1:], indent, False))
        elif parts[0] == 'STOP':
            break
        else:
            reason = parts[-3:-1] + parts[-1].split(',')
            proof.append(Literal.Literal(parts[:-4], indent, False))

            #deal with reason dashes
            sub = []
            if '-' in reason[-1]:
                sub = [int(x) for x in l.split('-')]

            ind = indent
            brk = False
            if reason[0] == "AND":
                if reason[1] == "INTRO":
                    for l in reason[2:]:
                        if not proof[int(l) - 1].andintro(proof[-1]):
                            print "Invalid line", lnum
                            brk = True
                            break
                    if not brk:
                        proof[-1].valid = True
                        print "VALID AND INTRO", lnum
                elif reason[1] == "ELIM":
                    for l in reason[2:]:
                        if not proof[int(l) - 1].andelim(proof[-1]):
                            print "Invalid line", lnum
                            brk = True
                            break
                    if not brk:
                        proof[-1].valid = True
                        print "VALID AND ELIM", lnum
            elif reason[0] == "OR":
                if reason[1] == "INTRO":
                    for l in reason[2:]:
                        if not proof[int(l) - 1].orintro(proof[-1]):
                            print "Invalid line", lnum
                            brk = True
                            break
                    if not brk:
                        proof[-1].valid = True
                        print "VALID OR INTRO", lnum
                elif reason[1] == "ELIM":
                    for l in reason[2:]:
                        if not proof[int(l) - 1].orelim(proof[-1]):
                            print "Invalid line", lnum
                            brk = True
                            break
                    if not brk:
                        proof[-1].valid = True
                        print "VALID OR ELIM", lnum
            elif reason[0] == "THEN":
                if reason[1] == "INTRO":
                    part1 = False
                    brk = False
                    if proof[sub[0] - 1].thenintro(proof[-1], 0):
                        part1 = True
                    if part1:
                        for ln in proof[sub[0] - 1:sub[1]]:
                            print "CHECK", ln
                            if ln.indent == ind + 1:
                                if ln.thenintro(proof[-1], 1):
                                    brk = True
                                    print "brk"
                                    break
                            elif ln.indent < ind:
                                print "not brk"
                                break

                    if not brk:
                        print "Invalid line", lnum
                    else:
                        proof[-1].valid = True
                        print "VALID THEN INTRO", lnum
                elif reason[1] == "ELIM":
                    if not proof[int(reason[2]) - 1].thenelim(
                            proof[int(reason[3]) - 1], proof[-1]):
                        print "Invalid line", lnum
                    else:
                        proof[-1].valid = True
                        print "VALID THEN ELIM", lnum
            elif reason[0] == "IFF":
                if reason[1] == "INTRO":
                    if not proof[-1].iffintro(proof[int(reason[2]) - 1],
                                              proof[int(reason[3]) - 1]):
                        print "Invalid line", lnum
                    else:
                        proof[-1].valid = True
                        print "VALID IFF INTRO", lnum
                elif reason[1] == "ELIM":
                    if not proof[int(reason[2]) - 1].iffelim(
                            proof[int(reason[3]) - 1], proof[-1]):
                        print "Invalid line", lnum
                        brk = True
                        break
                    else:
                        proof[-1].valid = True
                        print "VALID IFF ELIM", lnum
            elif reason[0] == "NOT":
                if reason[
                        1] == "INTRO":  #copy then intro except leads to false (ln.equals("false")) and proof[-1] is negative of proof[sub[0] - 1]
                    part1 = False
                    brk = False
                    if proof[sub[0] - 1].notintro(proof[-1]):
                        part1 = True
                    if part1:
                        for ln in proof[sub[0] - 1:sub[1]]:
                            print "CHECK", ln
                            if ln.indent == ind + 1:
                                if ln.equals("false"):
                                    brk = True
                                    print "brk"
                                    break
                            elif ln.indent < ind:
                                print "not brk"
                                break

                    if not brk:
                        print "Invalid line", lnum
                    else:
                        proof[-1].valid = True
                        print "VALID NOT INTRO", lnum
                elif reason[1] == "ELIM":
                    if not proof[int(reason[2]) - 1].notelim(proof[-1]):
                        print "Invalid line", lnum
                        brk = True
                        break
                    else:
                        proof[-1].valid = True
                        print "VALID FALSE ELIM", lnum
            elif reason[0] == "FALSE":
                if reason[1] == "INTRO":
                    if not proof[int(reason[2]) - 1].falseintro(
                            proof[int(reason[3]) - 1]):
                        print "Invalid line", lnum
                    if not brk:
                        proof[-1].valid = True
                        print "VALID FALSE INTRO", lnum
                elif reason[1] == "ELIM":
                    if not proof[-1].falseelim(proof[int(reason[2]) - 1]):
                        print "Invalid line", lnum
                    else:
                        proof[-1].valid = True
                        print "VALID FALSE ELIM", lnum