Пример #1
0
def create_sent_files(Sents,FPStem):
    ''' three files created, two 'general' ones, marked / unmarked and
     'optspace' one, sentences with optional spaces with both variations, 
     i.e. with and without those spaces'''
    
    ZWNJ=pos_sents.ZWNJ
    (MSents,USents)=Sents

    myModule.write_strlist_asline(USents,FPStem+'_sent_gen_umkd.txt')
    
    StrWith='';StrWithout=''; Str2=''
    for MSent in MSents:
        if ZWNJ in MSent:
            (With,Without)=create_spacevariants(MSent)
            StrWith=StrWith+With+'\n'
#            FSw_OS.write(With+'\n')
            StrWithout=StrWithout+Without+'\n'
#            FSw_OS.write(Without+'\n')
        else:
            Str2=Str2+MSent+'\n'
#            FSw_GM.write(MSent+'\n')
    FSw_OS1=open(FPStem+'_sent_opt_taken.txt','tw')
    FSw_OS1.write(StrWith); FSw_OS1.close()
    FSw_OS2=open(FPStem+'_sent_opt_nottaken.txt','tw')
    FSw_OS2.write(StrWithout); FSw_OS2.close() 
    FSw_GM=open(FPStem+'_sent_gen_mkd.txt','tw')
    FSw_GM.write(Str2); FSw_GM.close()
Пример #2
0
def create_wc_files(CldWCs,FPStem):
#   ''' three files created, general longer one, and two shorter ones, 
#    one with mandatory space and one with optional space
#    they should be mutually exclusive '''
    
    # mandatory /optional wcs
    MWCs=CldWCs['mand']; OWCs=CldWCs['opt']
    RedMWCs=MWCs-OWCs

    Longer=[ OWC for OWC in OWCs if len(OWC) == 9 or len(OWC) ==10 ]+[ MWC for MWC in RedMWCs if len(MWC) == 9 or len(MWC) ==10 ]
    ShorterO=[ OWC for OWC in OWCs if len(OWC) <= 8  ]
    ShorterM=[ MWC for MWC in RedMWCs if len(MWC) <= 8  ]

    FN_l=FPStem+'_wc_longer.txt'
    FN_sm=FPStem+'_wc_shorter_mand.txt'
    for (CatL,FN) in zip([Longer,ShorterM],[FN_l,FN_sm]):
        myModule.write_strlist_asline(CatL,FN)
    
    StrWith=''; StrWithout=''
    for OWC in ShorterO:
        (With,Without)=create_spacevariants(OWC)
        StrWith=StrWith+With+'\n'
        StrWithout=StrWithout+Without+'\n'

    FSw=open(FPStem+'_wc_shorter_opt_taken.txt','tw')
    FSw.write(StrWith)
    FSw.close()
    
    FSw=open(FPStem+'_wc_shorter_opt_nottaken.txt','tw')
    FSw.write(StrWithout)
    FSw.close()