def setUp(self):
     unittest.TestCase.setUp(self)
     self.myexpr=pgf.readExpr("AdjCN (PositA crucial_A) (UseN item_N)")
     self.myexprw=pgf.readExpr("AdjCN (PositA crucial_A) (UseN wildcard_1)")
     self.myexprq=pgf.readExpr("CompoundCN ? wildcard_3 (AdjCN (PositA wildcard_1) (UseN wildcard_2))")
     self.myexprs=pgf.readExpr('(PredVP (DetCN (DetQuant IndefArt NumSg) (PossNP (AdjCN (PositA complete_A) (UseN collapse_N)) (UseQuantPN DefArt (SymbPN (MkSymb "U"))))) (UseComp (CompNP (MassNP (UseN dollar_N)))))')
     self.extExpr=ExtendedExpr(self.myexpr,None)
     self.extExprW=ExtendedExpr(self.myexprw,None)
     self.extExprQ=ExtendedExpr(self.myexprq,None)
     self.extExprS=ExtendedExpr(self.myexprs,None)
     self.bilingualPhraseSet=BilingualPhraseSet()
     self.bilingualPhraseSet.add("NATO ||| la OTAN ||| 0-0 0-1")
     
     self.mwe1=ParallelMWE()
     self.mwe1.parse("( MassNP ( UseN safety_N ) ) | ( DetCN ( DetQuant wildcard_IGNORE wildcard_IGNORE ) ( UseN security_N ) )")
     
     self.mwe2=ParallelMWE()
     self.mwe2.parse("( PossNP ( UseN wildcard_1 ) ( MassNP ( AdjCN ( PositA wildcard_2 ) ( UseN politics_N ) ) ) ) | ( PossNP ( UseN wildcard_1 ) ( DetCN ( DetQuant wildcard_IGNORE wildcard_IGNORE ) ( AdjCN ( PositA wildcard_2 ) ( UseN policy_N ) ) ) )")
     
     self.bilphrase=BilingualExpr()
     self.bilphrase.parse("( MassNP ( AdjCN ( PositA wildcard_2 ) ( UseN politics_N ) ) )  | ( DetCN ( DetQuant wildcard_IGNORE wildcard_IGNORE ) ( AdjCN ( PositA wildcard_2 ) ( UseN policy_N ) ) )", ignoreFreq=True)
     
     synDict=dict()
     synDict["politics_N"]=set(["policy_N"])
     ParallelMWE.synonymDict=synDict
class ExtendedExprTest(unittest.TestCase):
    
    def setUp(self):
        unittest.TestCase.setUp(self)
        self.myexpr=pgf.readExpr("AdjCN (PositA crucial_A) (UseN item_N)")
        self.myexprw=pgf.readExpr("AdjCN (PositA crucial_A) (UseN wildcard_1)")
        self.myexprq=pgf.readExpr("CompoundCN ? wildcard_3 (AdjCN (PositA wildcard_1) (UseN wildcard_2))")
        self.myexprs=pgf.readExpr('(PredVP (DetCN (DetQuant IndefArt NumSg) (PossNP (AdjCN (PositA complete_A) (UseN collapse_N)) (UseQuantPN DefArt (SymbPN (MkSymb "U"))))) (UseComp (CompNP (MassNP (UseN dollar_N)))))')
        self.extExpr=ExtendedExpr(self.myexpr,None)
        self.extExprW=ExtendedExpr(self.myexprw,None)
        self.extExprQ=ExtendedExpr(self.myexprq,None)
        self.extExprS=ExtendedExpr(self.myexprs,None)
        self.bilingualPhraseSet=BilingualPhraseSet()
        self.bilingualPhraseSet.add("NATO ||| la OTAN ||| 0-0 0-1")
        
        self.mwe1=ParallelMWE()
        self.mwe1.parse("( MassNP ( UseN safety_N ) ) | ( DetCN ( DetQuant wildcard_IGNORE wildcard_IGNORE ) ( UseN security_N ) )")
        
        self.mwe2=ParallelMWE()
        self.mwe2.parse("( PossNP ( UseN wildcard_1 ) ( MassNP ( AdjCN ( PositA wildcard_2 ) ( UseN politics_N ) ) ) ) | ( PossNP ( UseN wildcard_1 ) ( DetCN ( DetQuant wildcard_IGNORE wildcard_IGNORE ) ( AdjCN ( PositA wildcard_2 ) ( UseN policy_N ) ) ) )")
        
        self.bilphrase=BilingualExpr()
        self.bilphrase.parse("( MassNP ( AdjCN ( PositA wildcard_2 ) ( UseN politics_N ) ) )  | ( DetCN ( DetQuant wildcard_IGNORE wildcard_IGNORE ) ( AdjCN ( PositA wildcard_2 ) ( UseN policy_N ) ) )", ignoreFreq=True)
        
        synDict=dict()
        synDict["politics_N"]=set(["policy_N"])
        ParallelMWE.synonymDict=synDict
    
    def testNonLeafFunList(self):
        listOfFuns=self.extExpr.get_non_leaf_funtions()
        assert listOfFuns == ['AdjCN', 'PositA', 'UseN']
        
    def testLeafFunList(self):
        listOfFuns=self.extExpr.get_leaf_functions()
        assert listOfFuns == ['crucial_A','item_N']
        
        listOfFuns=self.extExprS.get_leaf_functions()
        self.assertEqual(listOfFuns , ['IndefArt','NumSg','complete_A','collapse_N','DefArt','String_U','dollar_N'])
        
        listOfFuns=self.extExprQ.get_leaf_functions()
        self.assertEqual(listOfFuns, ['?','wildcard_3','wildcard_1','wildcard_2']) 
    
    def testWildcardFunList(self):
        listOfFuns=self.extExprW.get_wildcard_leaf_functions()
        self.assertEqual(listOfFuns,['wildcard_1'])
    
    def testExtractCandidateMWEs(self):
        bilExpr=BilingualExpr()
        bilExpr.set_exprs(self.extExpr,self.extExpr)
        self.assertTrue(bilExpr.is_equal_sides())
        
        candidateMWEs=bilExpr.extract_candidate_mwes()
        self.assertEqual(len(candidateMWEs), 4)
        for mwestr in candidateMWEs:
            mwe =ParallelMWE()
            mwe.parse(" | ".join(mwestr.split(" | ")[2:]))
            self.assertTrue(mwe.is_equal_sides())
    
    def testPrint(self):
        strrep=str(self.extExpr)
        myexpragain=pgf.readExpr(strrep)
        self.assertEqual(str(self.myexpr), str(myexpragain))
        
        strrep=str(self.extExprS)
        myexpragain=pgf.readExpr(strrep)
        self.assertEqual(str(self.myexprs), str(myexpragain))
    
    def testBilingualPhraseSet(self):
        self.assertTrue(self.bilingualPhraseSet.contains_biligual_phrase("NATO", "OTAN"))
        self.assertTrue(self.bilingualPhraseSet.contains_biligual_phrase("NATO", "la OTAN"))
        self.assertTrue(self.bilingualPhraseSet.contains_biligual_phrase("NATO", "la"))
    
    def testCompositionally(self):
        self.assertFalse(self.mwe1.is_bilexpr_matched_or_reproduced(self.bilphrase).reproduced)
     
 bilingualDictionary=GFProbabilisticBilingualDictionary()
 bilingualDictionaryInv=GFProbabilisticBilingualDictionary()
 
 #read PGFs
 sourcePGF=pgf.readPGF(args.source_pgf)
 sourceLanguage=list(sourcePGF.languages.keys())[0]
 targetPGF=pgf.readPGF(args.target_pgf)
 targetLanguage=list(targetPGF.languages.keys())[0]
 
 for line in sys.stdin:
     parts=line.split("~")
     sourcePart=parts[0]
     targetPart=parts[1]
     
     bilingualPhrases=BilingualPhraseSet()
     if args.with_bilingual_phrases:
         bilingualPhraseList=parts[2]
         for bil in bilingualPhraseList.split("\t"):
             bilingualPhrases.add(bil.strip())
     
     sourceTreesRaw=split_partial_parse(sourcePart)
     targetTreesRaw=split_partial_parse(targetPart)
     #targetTreesRaw=[]
     
     if DEBUG:
         print >> sys.stderr, "source trees:"
         for t in sourceTreesRaw:
             print >> sys.stderr, t
         print >> sys.stderr, "target trees:"
         for t in targetTreesRaw: