def __init__(self,logger=None,loglevel=logging.INFO): if(logger is None): self.logger=createLog(logname="subtitle",level=loglevel) else: self.logger=logger self.logger.info("\n-----------------") self.logger.info("Subtitle begin to init") self.logger.info("\n-----------------") self.files=[] self.lexicon=set() self.stem_lexicon=set() self.noUsed=set(["-","","'","“","—","”"]) #self.suffix=set(["'s","'d","'ve"]) self.newWords=None self.wordSet=None self.stem_newWords=None self.checkup=False self.punctuation=r".?\[\]!,\":%;()|^=+\/\\_`\*;.:><" #self.addPunctuation([',','!',';','.',':','>','<']) self.raw="" self.lexicon_path=None self.nameSet=set(names.words('male.txt')+names.words('female.txt')) pass
def main(argv=None, logger=None): if(logger is None): logger=createLog(logname="subtitle",level=logging.INFO) fname=None startDtime=datetime.now() print "Start time: "+str(startDtime)#.strftime("%Y-%m-%d %H:%M:%S")) print #sub=Subtitle(logging.getLogger()) sub=Subtitle(logger) try: opts, args=getopt.getopt( argv, "hvf:w:t:d:p:?lm:WDc", ["help", "version", "checkup" "file=","word=","type=","dir=","pickle=","limit="]) #print opts, args logger.info("opts:{0};args:{1}".format(opts, args)) except getopt.GetoptError as msg: print "error happened when get options!!! error:{0}".format(msg) usage() logger.error("getopt.GetoptError:{0}, exit!".format(msg)) sys.exit(2) except Exception as msg: logger.error("error:{0}, exit!".format(msg)) sys.exit(2) _is_lines_show=False _is_words_show=False sub_type = "" words_limit=None for opt, arg in opts: if(opt in ("-?","-h", "--help")): usage() sys.exit() pass elif(opt in ("-v", "--version")): version() sys.exit() pass elif(opt in ("-c", "--checkup")): sub.checkup=True pass elif(opt in ("-d", "--dir")): print "Sorry, -d --dir option still not offer" sys.exit() pass elif(opt in ("-p", "--pickle")): pkl=arg sub.setLexiconFile(pkl) pass elif(opt in ('-f',"--file")): fname= arg sub.addFile(fname) pass elif(opt == '-D'): logger.setLevel(logging.DEBUG) sub.setLogger(logger) pass elif(opt in ("-w", "--word")): word = arg sub.addWord(word) pass elif(opt in ("-t","--type")): sub_type = arg if(sub_type not in ('word', 'scan')): usage() sys.exit() pass pass elif(opt in ("-m","--limit")): words_limit= int(arg) #print words_limit _is_words_show=True pass elif(opt == '-l'): #show lines _is_lines_show=True pass elif(opt == '-W'): #show words _is_words_show=True pass """ if(len(sys.argv)<2): print "need args!!" logger.error("need args!!sys.argv:{0}".format(sys.argv)) return None pass """ #print sys.argv #sub.addPunctuation([',','!',';','.',':','>','<']) #sub.addLexicon(["hello", "world"]) if sub.lexicon_path is None: sub.setLexiconFile("lexicon.pickle") sub.loadOldData() sub.addFiles(args) #sub.addStrings("hello world, I'm wang. Please call me wang.") sub.parse() if(_is_lines_show): sub.lines_show() pass if(_is_words_show): #print words_limit sub.words_show(words_limit) pass sub.show() if(sub_type =='word'): sub.dumpData() print endDtime = datetime.now() print "End time: "+str(endDtime) timedelta = endDtime-startDtime print "Cost time: "+str(timedelta) #getChecksum(sys.argv[1]) pass
if(sub_type =='word'): sub.dumpData() print endDtime = datetime.now() print "End time: "+str(endDtime) timedelta = endDtime-startDtime print "Cost time: "+str(timedelta) #getChecksum(sys.argv[1]) pass if(__name__=="__main__"): logger=createLog(logname="subtitle",level=logging.INFO) logger.info("\n-------------------") logger.info(sys.argv) if(len(sys.argv)<2): #print "need args!!" logger.error("need args!!sys.argv:{0}".format(sys.argv)) usage() logger.info("-----------------\n\n\n") sys.exit(2) pass main(sys.argv[1:],logger) logger.info("\n-----------------\n\n\n") pass