예제 #1
0
  def __init__(self,logger=None,loglevel=logging.INFO):
    if(logger is None):
      self.logger=createLog(logname="subtitle",level=loglevel)
    else:
      self.logger=logger

    self.logger.info("\n-----------------")
    self.logger.info("Subtitle begin to init")
    self.logger.info("\n-----------------")
    self.files=[]

    self.lexicon=set()
    self.stem_lexicon=set()
    self.noUsed=set(["-","","'","“","—","”"])
    #self.suffix=set(["'s","'d","'ve"])
    self.newWords=None
    self.wordSet=None
    self.stem_newWords=None
    self.checkup=False
    self.punctuation=r".?\[\]!,\":%;()|^=+\/\\_`\*;.:><"
    #self.addPunctuation([',','!',';','.',':','>','<'])

    self.raw=""
    self.lexicon_path=None
    self.nameSet=set(names.words('male.txt')+names.words('female.txt'))
    pass
예제 #2
0
def main(argv=None, logger=None):
  if(logger is None):
    logger=createLog(logname="subtitle",level=logging.INFO)

  fname=None

  startDtime=datetime.now()
  print "Start time: "+str(startDtime)#.strftime("%Y-%m-%d %H:%M:%S"))
  print
  #sub=Subtitle(logging.getLogger())
  sub=Subtitle(logger)

  try:
    opts, args=getopt.getopt(
      argv, 
      "hvf:w:t:d:p:?lm:WDc", 
      ["help", "version", "checkup" "file=","word=","type=","dir=","pickle=","limit="])
    #print opts, args
    logger.info("opts:{0};args:{1}".format(opts, args))
  except getopt.GetoptError as msg:
    print "error happened when get options!!! error:{0}".format(msg)
    usage()
    logger.error("getopt.GetoptError:{0}, exit!".format(msg))
    sys.exit(2)
  except Exception as msg:
    logger.error("error:{0}, exit!".format(msg))
    sys.exit(2)

  _is_lines_show=False
  _is_words_show=False
  sub_type = ""
  words_limit=None
  for opt, arg in opts:
    if(opt in ("-?","-h", "--help")):
      usage()
      sys.exit()
      pass
    elif(opt in ("-v", "--version")):
      version()
      sys.exit()
      pass
    elif(opt in ("-c", "--checkup")):
      sub.checkup=True
      pass
    elif(opt in ("-d", "--dir")):
      print "Sorry, -d --dir option still not offer"
      sys.exit()
      pass
    elif(opt in ("-p", "--pickle")):
      pkl=arg
      sub.setLexiconFile(pkl)
      pass
    elif(opt in ('-f',"--file")):
      fname= arg
      sub.addFile(fname)
      pass
    elif(opt == '-D'):
      logger.setLevel(logging.DEBUG)
      sub.setLogger(logger)
      pass
    elif(opt in ("-w", "--word")):
      word = arg
      sub.addWord(word)
      pass
    elif(opt in ("-t","--type")):
      sub_type = arg
      if(sub_type not in ('word', 'scan')):
        usage()
        sys.exit()
        pass
      pass
    elif(opt in ("-m","--limit")):
      words_limit= int(arg)
      #print words_limit
      _is_words_show=True
      pass
    elif(opt == '-l'):
      #show lines
      _is_lines_show=True
      pass
    elif(opt == '-W'):
      #show words
      _is_words_show=True
      pass

  """
  if(len(sys.argv)<2):
    print "need args!!"
    logger.error("need args!!sys.argv:{0}".format(sys.argv))
    return None
    pass
  """
  #print sys.argv

  #sub.addPunctuation([',','!',';','.',':','>','<'])
  #sub.addLexicon(["hello", "world"])

  if sub.lexicon_path is None:
    sub.setLexiconFile("lexicon.pickle")
  sub.loadOldData()

  sub.addFiles(args)
  #sub.addStrings("hello world, I'm wang. Please call me wang.")

  sub.parse()

  if(_is_lines_show):
    sub.lines_show()
    pass

  if(_is_words_show):
    #print words_limit
    sub.words_show(words_limit)
    pass
  sub.show()

  if(sub_type =='word'):
    sub.dumpData()

  print 
  endDtime = datetime.now()
  print "End time: "+str(endDtime)
  timedelta  = endDtime-startDtime
  print "Cost time: "+str(timedelta) 

  #getChecksum(sys.argv[1])
  pass
예제 #3
0
  if(sub_type =='word'):
    sub.dumpData()

  print 
  endDtime = datetime.now()
  print "End time: "+str(endDtime)
  timedelta  = endDtime-startDtime
  print "Cost time: "+str(timedelta) 

  #getChecksum(sys.argv[1])
  pass


if(__name__=="__main__"):
  logger=createLog(logname="subtitle",level=logging.INFO)
  logger.info("\n-------------------")
  logger.info(sys.argv)

  if(len(sys.argv)<2):
    #print "need args!!"
    logger.error("need args!!sys.argv:{0}".format(sys.argv))
    usage()
    logger.info("-----------------\n\n\n")
    sys.exit(2)
    pass

  main(sys.argv[1:],logger)
  logger.info("\n-----------------\n\n\n")
  pass