Пример #1
0
 def syntactic_parse(pos_list, grammar):
     parser = ViterbiParser(grammar)
     try:
         grammar.check_coverage(pos_list)
     except ValueError:
         return None
     for tree in parser.parse(pos_list):
         return tree
Пример #2
0
	def parse(self, grammar, trace=0):
		"""
		Parse the sentence with the given grammar
		using the nltk viterbi parser.
		Return the best parse and its score.
		
		:param grammar:	the (adapted) WeightedGrammar object to parse with
		:param trace: determines the output of the parser.
		"""
#		print grammar
		parser = ViterbiParser(grammar)
		parser.trace(trace)
		parses = parser.nbest_parse(self.tokens)
		#return the best parse
		return parses[0]
Пример #3
0
    def parse(self, grammar, trace=0):
        """
		Parse the sentence with the given grammar
		using the nltk viterbi parser.
		Return the best parse and its score.
		
		:param grammar:	the (adapted) WeightedGrammar object to parse with
		:param trace: determines the output of the parser.
		"""
        #		print grammar
        parser = ViterbiParser(grammar)
        parser.trace(trace)
        parses = parser.nbest_parse(self.tokens)
        #return the best parse
        return parses[0]
Пример #4
0
  def __init__(self, the_grammar):

    print('Initialization of ParserSeger...')

    self.parser=ViterbiParser(the_grammar)
    self.max_word_len=4
    print('done')
Пример #5
0
                           "state": {
                               "equation": {
                                   "id": "eqn",
                                   "value": sin,
                                   "contentEditable": True
                               },
                           },
                       })
print(reqReq)
print("test input:", sin)
print("test output:", reqReq.json())

fdir = "/Users/gabriel/"
fname = "examples2"

parser = ViterbiParser(grammar)

with open(fdir + fname) as fin:
    sentences = set()

    mod = 0

    for eq in fin:
        eq = eq.split("\t")[2]
        eq = eq.lower()
        eq = filter(lambda ch: ch in "()1234567890+-*/.;=x", eq)
        proc = ""
        for i in eq:
            proc += i
        past = False
        for sin in proc.split(";"):
Пример #6
0
            eq = eq.lower()
            eq = filter(lambda ch: ch in "()1234567890+-*/.;=x", eq)
            proc = ""
            for i in eq:
                proc += i
            for sin in proc.split(";"):
                print(sin)
                sentences.add(sin)

    sents = [[c for c in s] for s in sentences]

    prob_inducer = pCFG_Grammar()

    prob_inducer.grammar = grammar

    parser = ViterbiParser(grammar)

    test = '(5-9x)-5+18=-2/7x'
    #test = '5+18=2/7x'
    for i in parser.parse([i for i in test]):
        print(i)

    print("INDUCING WEIGHTS:")

    prob_inducer.induce_weights(sentences)

    grammar = prob_inducer.grammar

    print("GRAMMAR:")
    print(grammar)
Пример #7
0

def tree_features(tree, path):
    ret = []
    node_str = ""
    for l in tree.leaves():
        node_str += l

    ret.append((('tree-label', path), tree.label()))
    ret.append((('value', path), node_str))

    print(len(tree))
    if len(tree) < 2:
        return ret

    left_rt = tree_features(tree[0], ('left-tree', path))
    right_rt = tree_features(tree[1], ('right-tree', path))

    return ret + left_rt + right_rt


if __name__ == "__main__":
    print(grammar)
    parser = ViterbiParser(grammar)

    sent = [c for c in "-7/2"]
    print(sent)
    for tree in parser.parse(sent):
        print(tree)
        print(tree_features(tree, 'some attribute'))
Пример #8
0
class ParserSeger():

  def __init__(self, the_grammar):

    print('Initialization of ParserSeger...')

    self.parser=ViterbiParser(the_grammar)
    self.max_word_len=4
    print('done')

  def score(self, word_candidate):  # log (prob, 2) as score
    #return -1000

    parseTree_list=self.parser.nbest_parse(word_candidate)

    if parseTree_list:  # if there is any parse
      return parseTree_list[0].logprob()
    else:
      return -1000  #2*-1000== almost zero


  def viterbi_segment(self, sentence):

    if 0:
      print('Current sent',sentence)
      print(len(sentence))
      print('***')
   

    BestSeg={}  #key: end_of_partial_sentence (python index style)
                #value: (best_segmentation, segmentation_score)
    BestSeg[0]=([],1)

    
    for i in range(1, len(sentence)+1):

      print(i)
      best_score=-1000000
      best_ptr=0
     
      for j in range(-1, -(min(i+1,len(sentence)+1, self.max_word_len+1)),-1):     
        
        word=sentence[i+j:i]
        word_score=self.score(word)
        seg_score=-1000000

        if i+j>0:
          
          best_sub_seg_record=BestSeg[i+j]
          best_sub_seg=best_sub_seg_record[0]
          best_sub_score=best_sub_seg_record[1]

          seg_score=best_sub_score+word_score

        else:
          seg_score=word_score
        
        if seg_score>best_score:

          best_ptr=i+j
          best_score=seg_score


      b=BestSeg[best_ptr]
      best_seg=copy.copy(b[0])    
      best_seg.append(''.join(sentence[best_ptr:i]))
      BestSeg[i]=(best_seg, best_score)


    final_seg_record=BestSeg[len(sentence)]
    final_seg=final_seg_record[0]
    #print(final_seg)
    return final_seg           


  def segment_corpus(self, corpus):
    

   
    Result=[]

    sent_count=0

    for sent in corpus:

      if sent_count%int(len(corpus)/100)==0:
        print(math.ceil(sent_count/len(corpus)*100),'% finished...')

      char_list=re.findall('\S',sent, re.U)# even if it is a gold standard corpus, we use the raw form (discarding the original segmentation)
      #char_seq=l.replace(" ", "") 
      #print('XXXchar_seq',char_seq)

      result=self.viterbi_segment(char_list)
      Result.append(result)
      sent_count += 1


    path_out='../working_data/base_seg.out'
    

    f3=codecs.open(path_out, 'w', 'utf-8')
    print('\nPriting out segmented corpus to file ', path_out)
    for r in Result:
      f3.write(' '.join(r)+'\n')

    print('\n','   --- Segmentation Done!  ---   ')
    print('# of sentence being segmented:', sent_count)
    print('# of sentence in test corpus(see whether it matches last num):', len(lines))