Exemplo n.º 1
0
def exercise_basic():
  tests = [
  ["a",
    ['a']
  ],
  ["a and b",
    ['a', 'b', 'and']
  ],
  ["a or b",
    ['a', 'b', 'or']
  ],
  ["not a or b",
    ['a', 'not', 'b', 'or']
  ],
  ["not a or b and c",
    ['a', 'not', 'b', 'c', 'and', 'or']
  ],
  ["not (a or b) and c",
    ['a', 'b', 'or', 'not', 'c', 'and']
  ],
  ["(not (a or b) and c)",
    ['a', 'b', 'or', 'not', 'c', 'and']
  ],
  ["not ((a or b) and c)",
    ['a', 'b', 'or', 'c', 'and', 'not']
  ],
  ]
  verbose = "--verbose" in sys.argv[1:]
  for input_string,expected_result in tests:
    infix = tokenizer.word_iterator(input_string=input_string)
    if (verbose): print input_string
    postfix = [word
      for word,word_iterator in simple_parser.infix_as_postfix(infix)]
    if (verbose): print [word.value for word in postfix]
Exemplo n.º 2
0
def rewrite_parser(
      word_iterator,
      stop_if_parse_stack_is_empty=False,
      stop_word=None,
      expect_nonmatching_closing_parenthesis=False):
  result_stack = []
  for word,word_iterator in simple_parser.infix_as_postfix(
         word_iterator=word_iterator,
         stop_if_parse_stack_is_empty=stop_if_parse_stack_is_empty,
         stop_word=stop_word,
         expect_nonmatching_closing_parenthesis
           =expect_nonmatching_closing_parenthesis):
    if (word.value == "not"):
      arg = result_stack.pop()
      result_stack.append("(!%s)" % arg)
    elif (word.value in ["and", "or"]):
      rhs = result_stack.pop()
      lhs = result_stack.pop()
      if (word.value == "and"):
        result_stack.append("(%s&%s)" % (lhs, rhs))
      else:
        result_stack.append("(%s|%s)" % (lhs, rhs))
    elif (word.value == "within"):
      assert word_iterator.pop().value == "("
      radius = float(word_iterator.pop().value)
      assert word_iterator.pop().value == ","
      nested_result = rewrite_parser(
        word_iterator=word_iterator,
        expect_nonmatching_closing_parenthesis=True)
      if (nested_result == ""): raise RuntimeError("Missing argument.")
      result_stack.append("@(%.2f,%s)" % (radius, nested_result))
    elif (word.value == "around"):
      assert word_iterator.pop().value == "("
      nested_result = rewrite_parser(
        word_iterator=word_iterator,
        stop_word=",")
      if (nested_result == ""): raise RuntimeError("Missing argument.")
      radius = float(word_iterator.pop().value)
      assert word_iterator.pop().value == ")"
      result_stack.append("@(%.2f,%s)" % (radius, nested_result))
    elif (word.value == "for"):
      var = word_iterator.pop().value
      assert word_iterator.pop().value == "in"
      nested_result = rewrite_parser(
        word_iterator=word_iterator,
        stop_if_parse_stack_is_empty=True)
      if (nested_result == ""): raise RuntimeError("Missing argument.")
      result_stack.append("(for %s in %s)" % (var, nested_result))
    else:
      result_stack.append(word.value)
  if (len(result_stack) == 0):
    return ""
  result = result_stack[0]
  for item in result_stack[1:]:
    result = "(%s&%s)" % (result, item)
  return result
Exemplo n.º 3
0
def rewrite_parser(word_iterator,
                   stop_if_parse_stack_is_empty=False,
                   stop_word=None,
                   expect_nonmatching_closing_parenthesis=False):
    result_stack = []
    for word, word_iterator in simple_parser.infix_as_postfix(
            word_iterator=word_iterator,
            stop_if_parse_stack_is_empty=stop_if_parse_stack_is_empty,
            stop_word=stop_word,
            expect_nonmatching_closing_parenthesis=
            expect_nonmatching_closing_parenthesis):
        if (word.value == "not"):
            arg = result_stack.pop()
            result_stack.append("(!%s)" % arg)
        elif (word.value in ["and", "or"]):
            rhs = result_stack.pop()
            lhs = result_stack.pop()
            if (word.value == "and"):
                result_stack.append("(%s&%s)" % (lhs, rhs))
            else:
                result_stack.append("(%s|%s)" % (lhs, rhs))
        elif (word.value == "within"):
            assert word_iterator.pop().value == "("
            radius = float(word_iterator.pop().value)
            assert word_iterator.pop().value == ","
            nested_result = rewrite_parser(
                word_iterator=word_iterator,
                expect_nonmatching_closing_parenthesis=True)
            if (nested_result == ""): raise RuntimeError("Missing argument.")
            result_stack.append("@(%.2f,%s)" % (radius, nested_result))
        elif (word.value == "around"):
            assert word_iterator.pop().value == "("
            nested_result = rewrite_parser(word_iterator=word_iterator,
                                           stop_word=",")
            if (nested_result == ""): raise RuntimeError("Missing argument.")
            radius = float(word_iterator.pop().value)
            assert word_iterator.pop().value == ")"
            result_stack.append("@(%.2f,%s)" % (radius, nested_result))
        elif (word.value == "for"):
            var = word_iterator.pop().value
            assert word_iterator.pop().value == "in"
            nested_result = rewrite_parser(word_iterator=word_iterator,
                                           stop_if_parse_stack_is_empty=True)
            if (nested_result == ""): raise RuntimeError("Missing argument.")
            result_stack.append("(for %s in %s)" % (var, nested_result))
        else:
            result_stack.append(word.value)
    if (len(result_stack) == 0):
        return ""
    result = result_stack[0]
    for item in result_stack[1:]:
        result = "(%s&%s)" % (result, item)
    return result
Exemplo n.º 4
0
def exercise_basic():
    tests = [
        ["a", ['a']],
        ["a and b", ['a', 'b', 'and']],
        ["a or b", ['a', 'b', 'or']],
        ["not a or b", ['a', 'not', 'b', 'or']],
        ["not a or b and c", ['a', 'not', 'b', 'c', 'and', 'or']],
        ["not (a or b) and c", ['a', 'b', 'or', 'not', 'c', 'and']],
        ["(not (a or b) and c)", ['a', 'b', 'or', 'not', 'c', 'and']],
        ["not ((a or b) and c)", ['a', 'b', 'or', 'c', 'and', 'not']],
    ]
    verbose = "--verbose" in sys.argv[1:]
    for input_string, expected_result in tests:
        infix = tokenizer.word_iterator(input_string=input_string)
        if (verbose): print input_string
        postfix = [
            word
            for word, word_iterator in simple_parser.infix_as_postfix(infix)
        ]
        if (verbose): print[word.value for word in postfix]
        assert [word.value for word in postfix] == expected_result
        if (verbose): print
    def selection_parser(self,
                         word_iterator,
                         optional=True,
                         callback=None,
                         stop_word=None,
                         expect_nonmatching_closing_parenthesis=False):
        have_optional = False
        result_stack = []
        for word, word_iterator in simple_parser.infix_as_postfix(
                word_iterator=word_iterator,
                stop_word=stop_word,
                expect_nonmatching_closing_parenthesis=
                expect_nonmatching_closing_parenthesis):
            lword = word.value.lower()

            def raise_syntax_error():
                raise RuntimeError(
                    'Atom selection syntax error at word "%s".' % lword)

            if (lword == "optional"):
                if (len(result_stack) != 0):
                    raise Sorry('"optional" can appear only at the beginning.')
                if (have_optional):
                    raise Sorry('"optional" can appear only once.')
                have_optional = True
            elif (lword == "not"):
                assert len(result_stack) >= 1
                arg = result_stack.pop()
                result_stack.append(~arg)
            elif (lword in ["and", "or"]):
                assert len(result_stack) >= 2
                rhs = result_stack.pop()
                lhs = result_stack.pop()
                if (lword == "and"):
                    result_stack.append(lhs & rhs)
                else:
                    result_stack.append(lhs | rhs)
            else:
                if (lword == "all"):
                    result_stack.append(flex.bool(self.n_seq, True))
                elif (lword == "none"):
                    result_stack.append(flex.bool(self.n_seq, False))
                elif (lword == "name"):
                    result_stack.append(
                        self.sel_name(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword in ["altloc", "altid"]):
                    result_stack.append(
                        self.sel_altloc(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "resname"):
                    result_stack.append(
                        self.sel_resname(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "chain"):
                    result_stack.append(
                        self.sel_chain_id(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword in ["resseq", "resid", "resi", "model"]):
                    arg = word_iterator.pop_argument(word.value)

                    def try_compose_range():
                        def is_cont():
                            if (len(arg_cont.value) == 0): return False
                            return ("0123456789".find(arg_cont.value[0]) >= 0)

                        i_colon = arg.value.find(":")
                        if (i_colon < 0):
                            arg_cont = word_iterator.try_pop()
                            if (arg_cont is None):
                                return arg.value, -1
                            if (not arg_cont.value.startswith(":")):
                                word_iterator.backup()
                                return arg.value, -1
                            if (len(arg_cont.value) == 1):
                                arg_cont = word_iterator.try_pop()
                                if (arg_cont is None):
                                    return arg.value + ":", len(arg.value)
                                if (not is_cont()):
                                    word_iterator.backup()
                                    return arg.value + ":", len(arg.value)
                                return arg.value + ":" + arg_cont.value, len(
                                    arg.value)
                            return arg.value + arg_cont.value, len(arg.value)
                        elif (i_colon + 1 == len(arg.value)):
                            arg_cont = word_iterator.try_pop()
                            if (arg_cont is not None):
                                if (is_cont()):
                                    return arg.value + arg_cont.value, i_colon
                                word_iterator.backup()
                        return arg.value, i_colon

                    def try_compose_sequence():
                        arg_next = word_iterator.try_pop()
                        if (arg_next is None):
                            word_iterator.backup()
                            return None, None
                        lnext = arg_next.value.lower()
                        if (lnext == "through"):
                            arg_final = word_iterator.pop_argument(
                                arg_next.value)
                            return arg.value, arg_final.value
                        word_iterator.backup()
                        return (None, None)

                    val, i_colon = try_compose_range()
                    if (i_colon < 0):
                        if (lword == "resseq"):
                            result_stack.append(self.sel_resseq(pattern=arg))
                        elif (lword in ["resid", "resi"]):
                            start, stop = try_compose_sequence()
                            if (start is None):
                                result_stack.append(
                                    self.sel_resid(pattern=arg))
                            else:
                                result_stack.append(
                                    self.sel_resid_sequence(start=start,
                                                            stop=stop))
                        else:
                            result_stack.append(self.sel_model_id(pattern=arg))
                    else:
                        start = val[:i_colon]
                        stop = val[i_colon + 1:]
                        if (lword == "resseq"):
                            result_stack.append(
                                self.sel_resseq_range(start=start, stop=stop))
                        elif (lword in ["resid", "resi"]):
                            result_stack.append(
                                self.sel_resid_range(start=start, stop=stop))
                        else:
                            result_stack.append(
                                self.sel_model_id_range(start=start,
                                                        stop=stop))
                elif (lword == "icode"):
                    result_stack.append(
                        self.sel_icode(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "segid"):
                    result_stack.append(
                        self.sel_segid(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "element"):
                    result_stack.append(
                        self.sel_element(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "charge"):
                    result_stack.append(
                        self.sel_charge(
                            pattern=word_iterator.pop_argument(word.value)))
                elif (lword == "anisou"):
                    result_stack.append(self.sel_anisou())
                elif (lword == "pepnames"):
                    result_stack.append(self.sel_pepnames())
                elif ((lword == "protein" or lword == "peptide")
                      and callback is None):
                    # if there is callback, these keywords shoudl be processed there,
                    # most likely it is pdb_interpretation
                    result_stack.append(self.sel_protein())
                elif lword == "nucleotide" and callback is None:
                    result_stack.append(self.sel_nucleotide())
                elif (lword == "single_atom_residue"):
                    result_stack.append(self.sel_single_atom_residue())
                elif (lword == "water"):
                    result_stack.append(self.sel_water())
                elif (lword == "hetero") or (lword == "hetatm"):
                    result_stack.append(self.sel_hetero())
                elif (lword == "bfactor") or (lword == "occupancy"):
                    op = word_iterator.pop_argument(word.value).value
                    if (not op in [">", "<", "="]):
                        raise_syntax_error()
                    else:
                        arg_next = word_iterator.try_pop()
                        lnext = arg_next.value
                        try:
                            val = float(lnext)
                        except ValueError:
                            raise_syntax_error()
                        else:
                            if (lword == "bfactor"):
                                result_stack.append(self.sel_bfactor(op, val))
                            else:
                                result_stack.append(self.sel_occupancy(
                                    op, val))
                elif ((lword == "within" or lword == 'residues_within')
                      and (self.special_position_settings is not None)):
                    assert word_iterator.pop().value == "("
                    radius = float(word_iterator.pop().value)
                    assert word_iterator.pop().value == ","
                    sel = self.selection_parser(
                        word_iterator=word_iterator,
                        callback=callback,
                        expect_nonmatching_closing_parenthesis=True)
                    if lword == 'within':
                        result_stack.append(
                            self.sel_within(radius=radius,
                                            primary_selection=sel))
                    elif lword == 'residues_within':
                        result_stack.append(
                            self.sel_residues_within(radius=radius,
                                                     primary_selection=sel))
                elif (callback is not None):
                    if (not callback(word=word,
                                     word_iterator=word_iterator,
                                     result_stack=result_stack)):
                        raise_syntax_error()
                else:
                    raise_syntax_error()
        if (optional): have_optional = False
        if (len(result_stack) == 0):
            if (have_optional): return None
            return flex.bool(self.n_seq, False)
        selection = result_stack[0]
        for result in result_stack[1:]:
            selection &= result
        if (have_optional and selection.all_eq(False)):
            return None
        return selection
Exemplo n.º 6
0
 def selection_parser(self,
       word_iterator,
       optional=True,
       callback=None,
       stop_word=None,
       expect_nonmatching_closing_parenthesis=False):
   have_optional = False
   result_stack = []
   for word,word_iterator in simple_parser.infix_as_postfix(
         word_iterator=word_iterator,
         stop_word=stop_word,
         expect_nonmatching_closing_parenthesis
           =expect_nonmatching_closing_parenthesis):
     lword = word.value.lower()
     def raise_syntax_error():
       raise RuntimeError(
         'Atom selection syntax error at word "%s".' % lword)
     if (lword == "optional"):
       if (len(result_stack) != 0):
         raise Sorry('"optional" can appear only at the beginning.')
       if (have_optional):
         raise Sorry('"optional" can appear only once.')
       have_optional = True
     elif (lword == "not"):
       assert len(result_stack) >= 1
       arg = result_stack.pop()
       result_stack.append(~arg)
     elif (lword in ["and", "or"]):
       assert len(result_stack) >= 2
       rhs = result_stack.pop()
       lhs = result_stack.pop()
       if (lword == "and"):
         result_stack.append(lhs & rhs)
       else:
         result_stack.append(lhs | rhs)
     else:
       if (lword == "all"):
         result_stack.append(flex.bool(self.n_seq, True))
       elif (lword == "none"):
         result_stack.append(flex.bool(self.n_seq, False))
       elif (lword == "name"):
         result_stack.append(
           self.sel_name(pattern=word_iterator.pop_argument(word.value)))
       elif (lword in ["altloc", "altid"]):
         result_stack.append(
           self.sel_altloc(pattern=word_iterator.pop_argument(word.value)))
       elif (lword == "resname"):
         result_stack.append(
           self.sel_resname(pattern=word_iterator.pop_argument(word.value)))
       elif (lword == "chain"):
         result_stack.append(
           self.sel_chain_id(pattern=word_iterator.pop_argument(word.value)))
       elif (lword in ["resseq", "resid", "resi", "model"]):
         arg = word_iterator.pop_argument(word.value)
         def try_compose_range():
           def is_cont():
             if (len(arg_cont.value) == 0): return False
             return ("0123456789".find(arg_cont.value[0]) >= 0)
           i_colon = arg.value.find(":")
           if (i_colon < 0):
             arg_cont = word_iterator.try_pop()
             if (arg_cont is None):
               return arg.value, -1
             if (not arg_cont.value.startswith(":")):
               word_iterator.backup()
               return arg.value, -1
             if (len(arg_cont.value) == 1):
               arg_cont = word_iterator.try_pop()
               if (arg_cont is None):
                 return arg.value+":", len(arg.value)
               if (not is_cont()):
                 word_iterator.backup()
                 return arg.value+":", len(arg.value)
               return arg.value+":"+arg_cont.value, len(arg.value)
             return arg.value+arg_cont.value, len(arg.value)
           elif (i_colon+1 == len(arg.value)):
             arg_cont = word_iterator.try_pop()
             if (arg_cont is not None):
               if (is_cont()):
                 return arg.value+arg_cont.value, i_colon
               word_iterator.backup()
           return arg.value, i_colon
         def try_compose_sequence () :
           arg_next = word_iterator.try_pop()
           if (arg_next is None) :
             word_iterator.backup()
             return None, None
           lnext = arg_next.value.lower()
           if (lnext == "through") :
             arg_final = word_iterator.pop_argument(arg_next.value)
             return arg.value, arg_final.value
           word_iterator.backup()
           return (None, None)
         val, i_colon = try_compose_range()
         if (i_colon < 0):
           if (lword == "resseq"):
             result_stack.append(self.sel_resseq(pattern=arg))
           elif (lword in ["resid", "resi"]):
             start, stop = try_compose_sequence()
             if (start is None) :
               result_stack.append(self.sel_resid(pattern=arg))
             else :
               result_stack.append(self.sel_resid_sequence(start=start,
                 stop=stop))
           else:
             result_stack.append(self.sel_model_id(pattern=arg))
         else:
           start = val[:i_colon]
           stop = val[i_colon+1:]
           if (lword == "resseq"):
             result_stack.append(
               self.sel_resseq_range(start=start, stop=stop))
           elif (lword in ["resid", "resi"]):
             result_stack.append(
               self.sel_resid_range(start=start, stop=stop))
           else:
             result_stack.append(
               self.sel_model_id_range(start=start, stop=stop))
       elif (lword == "icode"):
         result_stack.append(
           self.sel_icode(pattern=word_iterator.pop_argument(word.value)))
       elif (lword == "segid"):
         result_stack.append(
           self.sel_segid(pattern=word_iterator.pop_argument(word.value)))
       elif (lword == "element"):
         result_stack.append(
           self.sel_element(pattern=word_iterator.pop_argument(word.value)))
       elif (lword == "charge"):
         result_stack.append(
           self.sel_charge(pattern=word_iterator.pop_argument(word.value)))
       elif (lword == "anisou"):
         result_stack.append(self.sel_anisou())
       elif (lword == "pepnames"):
         result_stack.append(self.sel_pepnames())
       elif ((lword == "protein" or lword == "peptide")
           and callback is None):
         # if there is callback, these keywords shoudl be processed there,
         # most likely it is pdb_interpretation
         result_stack.append(self.sel_protein())
       elif lword == "nucleotide" and callback is None:
         result_stack.append(self.sel_nucleotide())
       elif (lword == "single_atom_residue"):
         result_stack.append(self.sel_single_atom_residue())
       elif (lword == "water"):
         result_stack.append(self.sel_water())
       elif (lword == "hetero") or (lword == "hetatm") :
         result_stack.append(self.sel_hetero())
       elif (lword == "bfactor") or (lword == "occupancy") :
         op = word_iterator.pop_argument(word.value).value
         if (not op in [">", "<", "="]) :
           raise_syntax_error()
         else :
           arg_next = word_iterator.try_pop()
           lnext = arg_next.value
           try :
             val = float(lnext)
           except ValueError :
             raise_syntax_error()
           else :
             if (lword == "bfactor") :
               result_stack.append(self.sel_bfactor(op, val))
             else :
               result_stack.append(self.sel_occupancy(op, val))
       elif ((lword == "within") and
             (self.special_position_settings is not None)) :
         assert word_iterator.pop().value == "("
         radius = float(word_iterator.pop().value)
         assert word_iterator.pop().value == ","
         sel = self.selection_parser(
           word_iterator=word_iterator,
           callback=callback,
           expect_nonmatching_closing_parenthesis=True)
         result_stack.append(self.sel_within(radius=radius,
           primary_selection=sel))
       elif (callback is not None):
         if (not callback(
                   word=word,
                   word_iterator=word_iterator,
                   result_stack=result_stack)):
           raise_syntax_error()
       else:
         raise_syntax_error()
   if (optional): have_optional = False
   if (len(result_stack) == 0):
     if (have_optional): return None
     return flex.bool(self.n_seq, False)
   selection = result_stack[0]
   for result in result_stack[1:]:
     selection &= result
   if (have_optional and selection.all_eq(False)):
     return None
   return selection