예제 #1
0
파일: histogram.py 프로젝트: menkhus/miner
def word_histogram(source):
    """Create histogram of normalized words (no punct or digits)
	scale that in terms of percentage"""
    hist = {}
    trans = maketrans('','')
    if type(source) in (StringType,UnicodeType):  # String-like src
        for word in split(source):
            word = translate(word, trans, punctuation+digits)
	    word=word.lower()
            if len(word) > 0:
                hist[word] = hist.get(word,0) + 1
    elif hasattr(source,'read'):                  # File-like src
        try:
            from xreadlines import xreadlines     # Check for module
            for line in xreadlines(source):
                for word in split(line):
                    word = translate(word, trans, punctuation+digits)
                    word=word.lower()
                    if len(word) > 0:
                        hist[word] = hist.get(word,0) + 1
        except ImportError:                       # Older Python ver
            line = source.readline()          # Slow but mem-friendly
            while line:
                for word in split(line):
                    word = translate(word, trans, punctuation+digits)
                    word=word.lower()
                    if len(word) > 0:
                        hist[word] = hist.get(word,0) + 1
                line = source.readline()
    else:
        raise TypeError, \
              "source must be a string-like or file-like object"
    return hist
예제 #2
0
def scanIds(package):
    """Scans all our python source files for _svn_id lines of the form
       seen in this file above, returning a dictionary of Id strings for
       each filename. This doesn't import the file, since we want to
       scan its version even if it won't import (for example, it needs
       a module we don't have)
       """
    import os, re
    import PGBuild
    import PGBuild.Errors
    from xreadlines import xreadlines

    packagePath = package.__path__[0]
    moduleDict = {}
    svnlineRe = re.compile("^\s*_svn_id")

    # First, search for source files and fill moduleDict with
    # module name - module path pairs, without importing anything.
    def visit(arg, dirname, names):
        for name in names:
            # Ignore editor backups and such
            if name.endswith(".py") and not (name.startswith(".")
                                             or name.startswith("#")):
                modulePath = os.path.join(dirname, name)
                modPathList = modulePath[len(packagePath):].split(os.sep)
                # Strip off the .py extension, strip off __init__ modules from packages
                modPathList[-1] = modPathList[-1][:-3]
                if modPathList[-1] == "__init__":
                    del modPathList[-1]
                moduleName = package.__name__ + ".".join(modPathList)
                moduleDict[moduleName] = modulePath

    os.path.walk(packagePath, visit, None)

    # Open each module and search for the _svn_id line. The line must start with _svn_id,
    # but we do allow a little flexibility since we exec the line to get its value.
    for module in moduleDict:
        path = moduleDict[module]
        file = open(path)
        id = None
        for line in xreadlines(file):
            if svnlineRe.search(line):
                env = {'_svn_id': None}
                try:
                    exec line in env
                except:
                    raise PGBuild.Errors.InternalError(
                        "Malformed _svn_id found in module %s" % module)
                id = env['_svn_id']
                break
        file.close()
        if not id:
            raise PGBuild.Errors.InternalError("Module %s has no _svn_id" %
                                               module)
        moduleDict[module] = id
    return moduleDict
예제 #3
0
    def readboard(boardname):
        elements = []
        if os.access(boarddb_dir + boardname, os.F_OK):
            boarddb = file(boarddb_dir + boardname)
            for line in xreadlines.xreadlines(boarddb):
                elements.append(string.split(line, None, 1))

            boarddb.close()

        return elements
예제 #4
0
 def evalSynth(self,synthname):
     tmpname=tempfile.mktemp(".sc")
     outfile=open(tmpname,"w")
     outfile.write('SynthDef("'+synthname+'",{')
     for line in xreadlines.xreadlines(open(synthname+".sc","r")):
         outfile.write(line)
     outfile.write('}).send(Server.new(\localhost,NetAddr("'+self.ip+'",'+str(self.port)+')););\n')
     outfile.close()
     os.system("sclang "+tmpname)
     os.system("rm "+tmpname)
예제 #5
0
 def evalSynth(self, synthname):
     tmpname = tempfile.mktemp(".sc")
     outfile = open(tmpname, "w")
     outfile.write('SynthDef("' + synthname + '",{')
     for line in xreadlines.xreadlines(open(synthname + ".sc", "r")):
         outfile.write(line)
     outfile.write('}).send(Server.new(\localhost,NetAddr("' + self.ip +
                   '",' + str(self.port) + ')););\n')
     outfile.close()
     os.system("sclang " + tmpname)
     os.system("rm " + tmpname)
예제 #6
0
        def readlog(self, name):
            elements = {}
            if os.access(self.logdb_dir + name + self.logdb_file, os.F_OK):
                logdb = file(self.logdb_dir + name + self.logdb_file)
                for line in xreadlines.xreadlines(logdb):
                    key, data = string.split(line, None, 1)
                    elements[key] = string.strip(data)

                logdb.close()

            return elements
예제 #7
0
 def __init__(self, fileobj, continued=None):
     # self.seq: the underlying line-sequence
     # self.phys_num: current index into self.seq (physical line number)
     # self.logi_num: current index into self (logical line number)
     import xreadlines
     try: self.seq = fileobj.xreadlines()
     except AttributeError: self.seq = xreadlines.xreadlines(fileobj)
     self.phys_num = 0
     self.logi_num = 0
     # allow for optional passing of continued-function
     if not callable(continued):
         def continued(line):
             if line.endswith('\\\n'): return 1,line[:-2]
             else: return 0, line
     self.continued = continued
예제 #8
0
 def __init__(self, fileobj, separator=None):
     # self.seq: the underlying line-sequence
     # self.line_num: current index into self.seq (line number)
     # self.para_num: current index into self (paragraph number)
     import xreadlines
     try: self.seq = fileobj.xreadlines()
     except AttributeError: self.seq = xreadlines.xreadlines(fileobj)
     self.line_num = 0
     self.para_num = 0
     # allow for optional passing of separator-function
     if separator is None:
         def separator(line): return line == '\n'
     elif not callable(separator):
         raise TypeError, "separator argument must be callable"
     self.separator = separator
예제 #9
0
 def __init__(self, fileobj, continued=None):
     # self.seq: the underlying line-sequence
     # self.phys_num: current index into self.seq (physical line number)
     # self.logi_num: current index into self (logical line number)
     import xreadlines
     try: self.seq = fileobj
     except AttributeError: self.seq = xreadlines.xreadlines(fileobj)
     self.phys_num = 0
     self.logi_num = 0
     # allow for optional passing of continued-function
     if not callable(continued):
         def continued(line):
             if line.endswith('\\\n'): return 1,line[:-2]
             else: return 0, line
     self.continued = continued
예제 #10
0
 def __init__(self, fileobj, separator=None):
     # self.seq: the underlying line-sequence
     # self.line_num: current index into self.seq (line number)
     # self.para_num: current index into self (paragraph number)
     import xreadlines
     try: self.seq = fileobj
     except AttributeError: self.seq = xreadlines.xreadlines(fileobj)
     self.line_num = 0
     self.para_num = 0
     # allow for optional passing of separator-function
     if separator is None:
         def separator(line): return line == '\n'
     elif not callable(separator):
         raise TypeError("separator argument must be callable")
     self.separator = separator
예제 #11
0
def main():
	import sys
	
	if len(sys.argv) < 3:
		print __doc__

	if not os.path.isfile(sys.argv[1]):
		print __doc__
	
	settings = sys.argv[1]
	folder = sys.argv[2]
	
	if not os.path.isdir(folder):
		os.makedirs(folder)

	nicklist = map(extract, xreadlines(open(settings)))

	for nick in nicklist:
		writePerson(folder, nick)	
예제 #12
0
def read_words():
    """Read from stdin onto all_words"""
    # indexed by length; contents is a list of words of that length
    by_len = {}

    # XXX: You'll get a deprecation warning here for Python 2.3.  I just use
    # xreadlines for the benefit of old machines.

    for w in xreadlines(sys.stdin):
        if w[-1] == '\n':
            w = w[:-1]  # chomp

        # check chars are reasonable
        if not letters_re.match(w):
            raise ValueError()

        w = w.lower()
        l = len(w)

        # Put it into the right bucket for its length.  Make a new
        # one if needed.
        wl = by_len.get(l)
        if wl is None:
            wl = []
            by_len[l] = wl
        wl.append(w)

    # Now join up all the buckets so that we have one big list, sorted by
    # word length
    all_words = []
    lens = by_len.keys()
    lens.sort()
    for l in lens:
        all_words.extend(by_len[l])

    return all_words
예제 #13
0
def read_words():
    """Read from stdin onto all_words"""
    # indexed by length; contents is a list of words of that length
    by_len = {}

    # XXX: You'll get a deprecation warning here for Python 2.3.  I just use
    # xreadlines for the benefit of old machines.
    
    for w in xreadlines(sys.stdin):
        if w[-1] == '\n':
            w = w[:-1]                  # chomp

        # check chars are reasonable
        if not letters_re.match(w):
            raise ValueError()

        w = w.lower()
        l = len(w)

        # Put it into the right bucket for its length.  Make a new
        # one if needed.
        wl = by_len.get(l)
        if wl is None:
            wl = []
            by_len[l] = wl
        wl.append(w)

    # Now join up all the buckets so that we have one big list, sorted by
    # word length
    all_words = []
    lens = by_len.keys()
    lens.sort()
    for l in lens:
        all_words.extend(by_len[l])

    return all_words
예제 #14
0
			raise ValueError, "HrefGetter: writer must be callable."
	def handle_starttag(self, tag, attrs):
		if tag == "a":
			for attr, value in attrs:
				if attr == "href":
					self.current_href = value
			self.state = "a"
	def handle_data(self, data):
		if self.state == "a":
			self.adata = data
	def handle_endtag(self, tag):
		if tag == "a" and self.state == "a":
			self.writer(self.current_href, self.adata)
			self.state = ""
			self.adata = ""

def default_writer(href, data):
	print href, data

if __name__ == "__main__":
	import sys, xreadlines
	if len(sys.argv) < 2:
		print "%s <htmlfile>" % (sys.argv[0])
		print "emits the hyperlink reference and associated text found in an HTML file."
	hget = HrefGetter(default_writer)
	fo = open(sys.argv[1], "r")
	for line in xreadlines.xreadlines(fo):
		hget.feed(line)
	fo.close()

예제 #15
0
import sys
from xreadlines import xreadlines

for line in xreadlines(sys.stdin):
    if line.startswith("%%BoundingBox:"):
        parts = line.split()
        x0, y0, x1, y1 = int(parts[1]), int(parts[2]), int(parts[3]), int(parts[4])
        x1 += x0
        y1 += y0
        x0 = 0
        y0 = 0
        line = "%%%%BoundingBox: %d %d %d %d\n" % (x0, y0, x1, y1)
    sys.stdout.write(line)
예제 #16
0
파일: __init__.py 프로젝트: SiloDS/RToolDS
 def xreadlines(self, sizehint=-1):
     """Emulate file(...).xreadlines(...)."""
     import xreadlines
     return xreadlines.xreadlines(self)
예제 #17
0
파일: nfs4lib.py 프로젝트: fxia22/ASM_xf
 def xreadlines(self):
     if self.closed:
         raise ValueError("I/O operation on closed file")
     import xreadlines
     return xreadlines.xreadlines(self)
예제 #18
0
def get_build_requires(spec, bconds_with, bconds_without):
    cond_rx = re.compile(r"%\{(\!\?|\?\!|\?)([a-zA-Z0-9_+]+)\s*:([^%\{\}]*)\}")

    def expand_conds(l):
        def expand_one(m):
            if m.group(1) == "?":
                if macros.has_key(m.group(2)):
                    return m.group(3)
            else:
                if not macros.has_key(m.group(2)):
                    return m.group(3)
            return ""

        for i in range(10):
            l = cond_rx.sub(expand_one, l)
            if len(l) > 1000: break

        return l

    macro_rx = re.compile(r"%\{([a-zA-Z0-9_+]+)\}")

    def expand_macros(l):
        def expand_one(m):
            if macros.has_key(m.group(1)):
                return string.strip(macros[m.group(1)])
            else:
                return m.group(0)  # don't change

        for i in range(10):
            l = macro_rx.sub(expand_one, l)
            if len(l) > 1000: break

        return expand_conds(l)

    simple_br_rx = re.compile(r"^BuildRequires\s*:\s*([^\s]+)", re.I)
    bcond_rx = re.compile(r"^%bcond_(with|without)\s+([^\s]+)")
    version_rx = re.compile(r"^Version\s*:\s*([^\s]+)", re.I)
    release_rx = re.compile(r"^Release\s*:\s*([^\s]+)", re.I)
    name_rx = re.compile(r"^Name\s*:\s*([^\s]+)", re.I)
    define_rx = re.compile(r"^\%define\s+([a-zA-Z0-9_+]+)\s+(.*)", re.I)
    any_br_rx = re.compile(r"BuildRequires", re.I)

    macros = {}
    for b in bconds_with:
        macros["_with_%s" % b] = 1
    for b in bconds_without:
        macros["_without_%s" % b] = 1

    macros["__perl"] = "/usr/bin/perl"
    macros["_bindir"] = "/usr/bin"
    macros["_sbindir"] = "/usr/sbin"
    macros["kgcc_package"] = "gcc"

    build_req = []

    f = open(spec)
    for l in xreadlines.xreadlines(f):
        l = string.strip(l)
        if l == "%changelog": break

        # %bcond_with..
        m = bcond_rx.search(l)
        if m:
            bcond = m.group(2)
            if m.group(1) == "with":
                if macros.has_key("_with_%s" % bcond):
                    macros["with_%s" % bcond] = 1
            else:
                if not macros.has_key("_without_%s" % bcond):
                    macros["with_%s" % bcond] = 1
            continue

        # name,version,release
        m = version_rx.search(l)
        if m: macros["version"] = m.group(1)
        m = release_rx.search(l)
        if m: macros["release"] = m.group(1)
        m = name_rx.search(l)
        if m: macros["name"] = m.group(1)

        # %define
        m = define_rx.search(l)
        if m: macros[m.group(1)] = m.group(2)

        # *BuildRequires*
        if any_br_rx.search(l):
            l = expand_macros(l)
            m = simple_br_rx.search(l)
            if m:
                build_req.append(m.group(1))
            else:
                if l <> "" and l[0] <> '#':
                    msg("spec error (%s): %s\n" % (spec, l))

    for x in build_req:
        print(x)
예제 #19
0
 def xreadlines(self):
     return xreadlines(self.__sio)
예제 #20
0
파일: pytrainhmm.py 프로젝트: PpKarOn/febrl
def trainhmm():
  """Main routine, open file, read lines, train HMM and save it to file.

  USAGE:
    trainhmm()

  ARGUMENTS:
    None

  DESCRIPTION:
    Main routine, see description of module above.
  """

  # Process command line arguments and check for correctness  - - - - - - - - -
  #
  if (len(config.options) < 3):
    print '***** Error: %s needs at least four arguments:'% (sys.argv[0])
    print '*****        - Name of the project module'
    print '*****        - Tagging mode: "name" or "locality"'
    print '*****        - Input training file name'
    print '*****        - HMM output file name'
    print '*****          plus options'
    raise Exception()

  if (config.options[1] == config.options[2]):
    print '*** Error: Input and output files must differ'
    print '***        Input training file name:', config.options[1]
    print '***        HMM output file name:    ', config.options[1]
    raise Exception()

  in_file_name  = config.options[1]
  hmm_file_name = config.options[2]

  # Get tagging mode/lookup-tables used - - - - - - - - - - - - - - - - - - - -
  #
  tag_mode = config.options[0]
  if (tag_mode in ['name','na','n']):
    tag_mode = 'name'
  elif (tag_mode in ['locality','lolty','loc','l']):
    tag_mode = 'loc'
  else:
    print '***** Error: Illegal tagging mode:', tag_mode
    print '*****        Must be either "name" or "locality"'
    raise Exception()

  # Check for optional arguments and process if any - - - - - - - - - - - - - -
  #
  config.verbose = 0     # Default: No verbose output
  config.logging = 0     # Default: No logging into a file
  smoothing      = None  # Default: No smoothing
  config.nowarn  = 0     # Deactivate no warning flag (print/log warning
                         # messages)

  if (len(config.options) > 3):
    options =  config.options[3:]
    while (options != []):  # Do a loop processing all options

      if (options[0] == '-nowarn'):
        config.nowarn = 1  # Activate no warning flag
        options = options[1:]  # Remove processed '-nowarn' option

      elif (options[0] == '-v1'):
        config.verbose = 1  # Set to verbose output level 1
        options = options[1:]  # Remove processed '-v1' option

      elif (options[0] == '-v2'):
        config.verbose = 2  # Set to verbose output level 2
        options = options[1:]  # Remove processed '-v2' option

      elif (options[0] == '-l'):
        config.logging = 1
        if (len(options) > 1):
          if (options[1][0] != '-'):  # Not another option, must be a file name
            config.log_file = options[1]  # Get name of log file
            options = options[1:]  # Remove file_name
        options = options[1:]  # Remove processed -'l' option only

        try:
          f_log = open(config.log_file,'a')  # Test if file is appendable
        except:
          print '***** Error ********************',
          print '***** Cannot write to log file: '+config.log_file
          raise IOError()

        # Write (append) header to log file
        #
        f_log.write(os.linesep)
        f_log.write('##################################################')
        f_log.write("############"+os.linesep)
        f_log.write("#"+os.linesep)
        f_log.write("# 'pyTrainHMM.py - Version 0.1' process started at: ")
        f_log.write(time.ctime(time.time())+os.linesep)
        f_log.write("#"+os.linesep)
        f_log.write("# Input file name: "+in_file_name+os.linesep)
        f_log.write("# HMM file name:   "+hmm_file_name+os.linesep)
        f_log.write(os.linesep)
        f_log.close()

      elif (options[0] == '-s'):
        smoothing = 1  # Set to do a HMM smoothing
        smoothing = options[1]
        if (smoothing in ['l','la','lap','laplac','laplace']):
          smoothing = 'laplace'
        elif (smoothing in ['a','ad','abs','absd','absdis','absdisc',\
               'absdiscount']):
          smoothing = 'absdiscount'
        else:  # Illegal value
          print "*** Error: Illegal value for 'smoothing' argument:", smoothing
          print "***        Possible are: 'laplace' or 'absdiscount'"
          raise Exception()

        options = options[2:]  # Remove processed option

      else:
        print '*** Error: Illegal option:', options[0]
        raise Exception()

  # Get HMM states and observations from configuration module - - - - - - - - -
  #
  if (tag_mode == 'name'): 
    state_list = config.name_hmm_states
    obser_list = config.name_hmm_obser

  else:
    state_list = config.geoloc_hmm_states
    obser_list = config.geoloc_hmm_obser

  # Open input file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  #
  try:
    f_in = open(in_file_name,'r')
  except:
    inout.log_message('Cannot open input file: '+in_file_name,'err')
    raise IOError()

  line_count = 0  # Counter for lines read
  rec_count  = 0  # Counter for training records read

  # Read lines, discard comment lines and process training data lines - - - - -
  #
  training_data = []  # List of training records

  train_list = []  # List of training sequences (dictionaries), extracted from
                   # training data

  for line in xreadlines.xreadlines(f_in):

    if (line[0] != '#') and (line.strip() != ''):
      # Line must contain a training record

      line = line.strip()  # Remove line separators
      config.curr_line = line  # Make a copy of the unprocessed current line

      line_list = line.split(',')  # Split into a list of elements
      line_data = []  # Training data list for one training record

      inout.log_message(['Record number: '+str(rec_count)],'v1')
      config.curr_line_no = line_count  # Store current line number

      for elem in line_list:
        [k,v] = elem.split(':')  # Split into key and value
        tag = k.strip()
        state = v.strip()
        line_data.append((state,tag))

        if (state not in state_list):
          msg = ['Illegal state name in training record: '+state, \
                 'Line: '+str(line_count)+', record: '+str(rec_count), \
                 'Possible values: '+str(state_list)]
          inout.log_message(msg,'err')
          raise Exception()

        if (tag not in obser_list):
          msg = ['Illegal observation (tag) name in training record: '+tag, \
                 'Line: '+str(line_count)+', record: '+str(rec_count), \
                 'Possible values: '+str(obser_list)]
          inout.log_message(msg,'err')
          raise Exception()

      inout.log_message('  Training record '+str(rec_count)+':'+ \
                        str(line_data),'v1')

      train_list.append(line_data)

      rec_count += 1
      inout.log_message('','v1')  # Print empty lines between records

    line_count += 1

  # Close input file  - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  #
  f_in.close()

  inout.log_message('','v1')  # Print empty lines between records

  # Initalise HMM and train it with training data - - - - - - - - - - - - - - -
  #
  myhmm = simplehmm.hmm(state_list, obser_list)

  myhmm.train(train_list,smoothing)
  myhmm.print_hmm()

  # Save trained HMM  - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  #
  myhmm.save_hmm(hmm_file_name)  

  inout.log_message(['Read '+str(line_count)+' lines, processed '+ \
                    str(rec_count)+' training records', 'End.'],'v1')
예제 #21
0
 def xreadlines(self):
     if self.closed:
         raise ValueError("I/O operation on closed file")
     import xreadlines
     return xreadlines.xreadlines(self)
예제 #22
0
파일: generate.py 프로젝트: PpKarOn/febrl
for field_dict in field_list:
  field_name = field_dict['name']

  if (field_dict['type'] == 'freq'):  # Check for 'freq' field type

    file_name = field_dict['freq_file']  # Get the corresponding file name

    if (file_name != None):
      try:
        fin = open(file_name)  # Open file for reading
      except:
        print '  Error: Can not open frequency file %s' % (file_name)
        raise Exception
      value_list = []  # List with all values of the frequency file

      for line in xreadlines.xreadlines(fin):
        line = line.strip()
        line_list = line.split(',')
        if (len(line_list) != 2):
          print '  Error: Illegal format in  frequency file %s: %s' % \
                (file_name, line)
          raise Exception

        line_val =  line_list[0].strip()
        line_freq = int(line_list[1])

        # Append value as many times as given in frequency file
        #
        value_list += [line_val]* line_freq

      random.shuffle(value_list)  # Randomly shuffle the list of values
예제 #23
0
def get_poldek_requires():
    # precompile regexps
    name_rx = re.compile(r"\d+\. ([^\s]+)-[^-]+-[^-]+\n")
    req_rx = re.compile(r" req .* --> (.*)\n")
    pkg_name_rx = re.compile(r"([^\s]+)-[^-]+-[^-]+")

    # todo: if a and b are sets, then use sets module
    # and intersection method on set object
    def intersect(a, b):
        r = []
        for x in a:
            if x in b: r.append(x)
        return r

    # add given req-list to cur_pkg_reqs
    def add_req(reqs):
        if len(reqs) == 1:
            if reqs[0] not in cur_pkg_reqs:
                cur_pkg_reqs.append(reqs[0])
        else:
            did = 0
            for x in cur_pkg_reqs:
                if type(x) is types.ListType:
                    i = intersect(x, reqs)
                    if len(i) == 0:
                        continue
                    did = 1
                    idx = cur_pkg_reqs.index(x)
                    if len(i) == 1:
                        if i[0] in cur_pkg_reqs:
                            del cur_pkg_reqs[idx]
                        else:
                            cur_pkg_reqs[idx] = i[0]
                    else:
                        cur_pkg_reqs[idx] = i
                else:
                    if x in reqs:
                        return
            if not did:
                cur_pkg_reqs.append(reqs)

    pkg_reqs = {}
    cur_pkg_reqs = None
    cur_pkg = None

    f = chr_popen("poldek -v -v --verify --unique-pkg-names")
    for l in xreadlines.xreadlines(f):
        m = name_rx.match(l)
        if m:
            if cur_pkg:
                pkg_reqs[cur_pkg] = cur_pkg_reqs
            cur_pkg = m.groups(1)
            if pkg_reqs.has_key(cur_pkg):
                cur_pkg = None
                cur_pkg_reqs = None
            else:
                cur_pkg_reqs = []
            continue
        m = req_rx.match(l)
        if m:
            reqs = []
            for x in string.split(m.group(1)):
                if x in ["RPMLIB_CAP", "NOT", "FOUND", "UNMATCHED"]: continue
                m = pkg_name_rx.match(x)
                if m:
                    reqs.append(m.group(1))
                else:
                    msg("poldek_reqs: bad pkg name: %s\n" % x)
            if len(reqs) != 0: add_req(reqs)

    f.close()

    if cur_pkg:
        pkg_reqs[cur_pkg] = cur_pkg_reqs

    return pkg_reqs
예제 #24
0
 def xreadlines(self, sizehint=-1):
     """Emulate file(...).xreadlines(...)."""
     import xreadlines
     return xreadlines.xreadlines(self)
예제 #25
0
def trainhmm():
    """Main routine, open file, read lines, train HMM and save it to file.

  USAGE:
    trainhmm()

  ARGUMENTS:
    None

  DESCRIPTION:
    Main routine, see description of module above.
  """

    # Process command line arguments and check for correctness  - - - - - - - - -
    #
    if (len(config.options) < 3):
        print '***** Error: %s needs at least four arguments:' % (sys.argv[0])
        print '*****        - Name of the project module'
        print '*****        - Tagging mode: "name" or "locality"'
        print '*****        - Input training file name'
        print '*****        - HMM output file name'
        print '*****          plus options'
        raise Exception()

    if (config.options[1] == config.options[2]):
        print '*** Error: Input and output files must differ'
        print '***        Input training file name:', config.options[1]
        print '***        HMM output file name:    ', config.options[1]
        raise Exception()

    in_file_name = config.options[1]
    hmm_file_name = config.options[2]

    # Get tagging mode/lookup-tables used - - - - - - - - - - - - - - - - - - - -
    #
    tag_mode = config.options[0]
    if (tag_mode in ['name', 'na', 'n']):
        tag_mode = 'name'
    elif (tag_mode in ['locality', 'lolty', 'loc', 'l']):
        tag_mode = 'loc'
    else:
        print '***** Error: Illegal tagging mode:', tag_mode
        print '*****        Must be either "name" or "locality"'
        raise Exception()

    # Check for optional arguments and process if any - - - - - - - - - - - - - -
    #
    config.verbose = 0  # Default: No verbose output
    config.logging = 0  # Default: No logging into a file
    smoothing = None  # Default: No smoothing
    config.nowarn = 0  # Deactivate no warning flag (print/log warning
    # messages)

    if (len(config.options) > 3):
        options = config.options[3:]
        while (options != []):  # Do a loop processing all options

            if (options[0] == '-nowarn'):
                config.nowarn = 1  # Activate no warning flag
                options = options[1:]  # Remove processed '-nowarn' option

            elif (options[0] == '-v1'):
                config.verbose = 1  # Set to verbose output level 1
                options = options[1:]  # Remove processed '-v1' option

            elif (options[0] == '-v2'):
                config.verbose = 2  # Set to verbose output level 2
                options = options[1:]  # Remove processed '-v2' option

            elif (options[0] == '-l'):
                config.logging = 1
                if (len(options) > 1):
                    if (options[1][0] !=
                            '-'):  # Not another option, must be a file name
                        config.log_file = options[1]  # Get name of log file
                        options = options[1:]  # Remove file_name
                options = options[1:]  # Remove processed -'l' option only

                try:
                    f_log = open(config.log_file,
                                 'a')  # Test if file is appendable
                except:
                    print '***** Error ********************',
                    print '***** Cannot write to log file: ' + config.log_file
                    raise IOError()

                # Write (append) header to log file
                #
                f_log.write(os.linesep)
                f_log.write(
                    '##################################################')
                f_log.write("############" + os.linesep)
                f_log.write("#" + os.linesep)
                f_log.write(
                    "# 'pyTrainHMM.py - Version 0.1' process started at: ")
                f_log.write(time.ctime(time.time()) + os.linesep)
                f_log.write("#" + os.linesep)
                f_log.write("# Input file name: " + in_file_name + os.linesep)
                f_log.write("# HMM file name:   " + hmm_file_name + os.linesep)
                f_log.write(os.linesep)
                f_log.close()

            elif (options[0] == '-s'):
                smoothing = 1  # Set to do a HMM smoothing
                smoothing = options[1]
                if (smoothing in ['l', 'la', 'lap', 'laplac', 'laplace']):
                    smoothing = 'laplace'
                elif (smoothing in ['a','ad','abs','absd','absdis','absdisc',\
                       'absdiscount']):
                    smoothing = 'absdiscount'
                else:  # Illegal value
                    print "*** Error: Illegal value for 'smoothing' argument:", smoothing
                    print "***        Possible are: 'laplace' or 'absdiscount'"
                    raise Exception()

                options = options[2:]  # Remove processed option

            else:
                print '*** Error: Illegal option:', options[0]
                raise Exception()

    # Get HMM states and observations from configuration module - - - - - - - - -
    #
    if (tag_mode == 'name'):
        state_list = config.name_hmm_states
        obser_list = config.name_hmm_obser

    else:
        state_list = config.geoloc_hmm_states
        obser_list = config.geoloc_hmm_obser

    # Open input file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    #
    try:
        f_in = open(in_file_name, 'r')
    except:
        inout.log_message('Cannot open input file: ' + in_file_name, 'err')
        raise IOError()

    line_count = 0  # Counter for lines read
    rec_count = 0  # Counter for training records read

    # Read lines, discard comment lines and process training data lines - - - - -
    #
    training_data = []  # List of training records

    train_list = [
    ]  # List of training sequences (dictionaries), extracted from
    # training data

    for line in xreadlines.xreadlines(f_in):

        if (line[0] != '#') and (line.strip() != ''):
            # Line must contain a training record

            line = line.strip()  # Remove line separators
            config.curr_line = line  # Make a copy of the unprocessed current line

            line_list = line.split(',')  # Split into a list of elements
            line_data = []  # Training data list for one training record

            inout.log_message(['Record number: ' + str(rec_count)], 'v1')
            config.curr_line_no = line_count  # Store current line number

            for elem in line_list:
                [k, v] = elem.split(':')  # Split into key and value
                tag = k.strip()
                state = v.strip()
                line_data.append((state, tag))

                if (state not in state_list):
                    msg = ['Illegal state name in training record: '+state, \
                           'Line: '+str(line_count)+', record: '+str(rec_count), \
                           'Possible values: '+str(state_list)]
                    inout.log_message(msg, 'err')
                    raise Exception()

                if (tag not in obser_list):
                    msg = ['Illegal observation (tag) name in training record: '+tag, \
                           'Line: '+str(line_count)+', record: '+str(rec_count), \
                           'Possible values: '+str(obser_list)]
                    inout.log_message(msg, 'err')
                    raise Exception()

            inout.log_message('  Training record '+str(rec_count)+':'+ \
                              str(line_data),'v1')

            train_list.append(line_data)

            rec_count += 1
            inout.log_message('', 'v1')  # Print empty lines between records

        line_count += 1

    # Close input file  - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    #
    f_in.close()

    inout.log_message('', 'v1')  # Print empty lines between records

    # Initalise HMM and train it with training data - - - - - - - - - - - - - - -
    #
    myhmm = simplehmm.hmm(state_list, obser_list)

    myhmm.train(train_list, smoothing)
    myhmm.print_hmm()

    # Save trained HMM  - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    #
    myhmm.save_hmm(hmm_file_name)

    inout.log_message(['Read '+str(line_count)+' lines, processed '+ \
                      str(rec_count)+' training records', 'End.'],'v1')
예제 #26
0
 def xreadlines(self):
     return xreadlines(self.__sio)
예제 #27
0
 def xreadlines(self):
     self._check_open()
     import xreadlines
     return xreadlines.xreadlines(self)
예제 #28
0
import os
import sys
import string
import xreadlines


# Determine what the source file is.
sourceFile = None
for arg in sys.argv[1:]:
    if re.search(".c$", arg):
        sourceFile = arg
if sourceFile == None:
    sys.stderr.write("Unable to find a source file in " + string.join(sys.argv[1:]))
    sys.stderr.write("\n")
    sys.exit(1)

sourcePath = os.path.dirname(sourceFile)

# Have gcc generate the depenency information.  We modify the line
# with the filename so that it also includes the .d file, and so that
# the complete path name is used.
child = os.popen("gcc -M " + string.join(sys.argv[1:]), "r")
for line in xreadlines.xreadlines(child):
    if re.search(":", line):
        (file, remainder) = re.split(":", line)
        print os.path.join(sourcePath, file),
        dependencyFile = re.split("\.", file)[0]
        print " %s.d: %s" % (os.path.join(sourcePath, dependencyFile), remainder),
    else:
        print line,
예제 #29
0
    def __init__(self):
        self.count = 5

    def readlines(self, sizehint = None):
        self.count = self.count - 1
        return map(lambda x: "%d\n" % x, range(self.count))

class Null: pass

import xreadlines


lineno = 0

try:
    xreadlines.xreadlines(Null())[0]
except AttributeError, detail:
    print "AttributeError (expected)"
else:
    print "Did not throw attribute error"

try:
    xreadlines.xreadlines(XReader)[0]
except TypeError, detail:
    print "TypeError (expected)"
else:
    print "Did not throw type error"

try:
    xreadlines.xreadlines(XReader())[1]
except RuntimeError, detail:
예제 #30
0
            for attr, value in attrs:
                if attr == "href":
                    self.current_href = value
            self.state = "a"

    def handle_data(self, data):
        if self.state == "a":
            self.adata = data

    def handle_endtag(self, tag):
        if tag == "a" and self.state == "a":
            self.writer(self.current_href, self.adata)
            self.state = ""
            self.adata = ""


def default_writer(href, data):
    print href, data


if __name__ == "__main__":
    import sys, xreadlines
    if len(sys.argv) < 2:
        print "%s <htmlfile>" % (sys.argv[0])
        print "emits the hyperlink reference and associated text found in an HTML file."
    hget = HrefGetter(default_writer)
    fo = open(sys.argv[1], "r")
    for line in xreadlines.xreadlines(fo):
        hget.feed(line)
    fo.close()
예제 #31
0
from test_support import verbose
class XReader:
    def __init__(self):
        self.count = 5
    def readlines(self, sizehint = None):
        self.count = self.count - 1
        return map(lambda x: "%d\n" % x, range(self.count))
class Null: pass
import xreadlines

lineno = 0
try:
    xreadlines.xreadlines(Null())[0]
except AttributeError, detail:
    print "AttributeError (expected)"
else:
    print "Did not throw attribute error"
try:
    xreadlines.xreadlines(XReader)[0]
except TypeError, detail:
    print "TypeError (expected)"
else:
    print "Did not throw type error"
try:
    xreadlines.xreadlines(XReader())[1]
except RuntimeError, detail:
    print "RuntimeError (expected):", detail
else:
    print "Did not throw runtime error"
xresult = ['0\n', '1\n', '2\n', '3\n', '0\n', '1\n', '2\n', '0\n', '1\n', '0\n']
예제 #32
0
infile = file(sys.argv[1], 'r')
ofile = file(sys.argv[2], 'w')

reStackUsed = re.compile(r'^\#if YY_STACK_USED$')
reProto = re.compile(r'^static int yy_get_next_buffer YY_PROTO\(\( void \)\)')
reGetNextDeclaration = re.compile(r'static int yy_get_next_buffer\(\)')
reGetNext = re.compile(r'yy_get_next_buffer\(\)')
reyyinput = re.compile(r'^static int yyinput\(\)$')
reReturnYyinput = re.compile(r'return yyinput\(\);$')
reunistd = re.compile('^\#include \<unistd\.h\>$')
reAlwaysInteractive = re.compile(r'\#if YY_ALWAYS_INTERACTIVE')
reNeverInteractive = re.compile(r'\#if YY_NEVER_INTERACTIVE')
reyymain = re.compile(r'\#if YY_MAIN')

for li in xreadlines.xreadlines(infile):
    li = misutils.stripcrlf(li)
    li = reStackUsed.sub(r'#ifdef YY_STACK_USED', li)
    li = reProto.sub(r'', li)
    li = reGetNextDeclaration.sub(
        r'static int yy_get_next_buffer( SqlHandle *dbsql )', li)
    li = reGetNext.sub(r'yy_get_next_buffer( dbsql )', li)
    li = reyyinput.sub(r'static int yyinput( SqlHandle *dbsql )', li)
    li = reReturnYyinput.sub(r'return yyinput( dbsql );', li)
    li = reunistd.sub(
        r'#ifndef _MSC_VER' + '\n' + r'#include <unistd.h>' + '\n' + r'#endif',
        li)
    li = reAlwaysInteractive.sub(r'#ifdef YY_ALWAYS_INTERACTIVE', li)
    li = reNeverInteractive.sub(r'#ifdef YY_NEVER_INTERACTIVE', li)
    li = reyymain.sub(r'#ifdef YY_MAIN', li)
예제 #33
0
        def readmail(self, mailfile):
            elements = []
            for line in xreadlines.xreadlines(mailfile):
                elements.append(string.split(line, None, 2))

            return elements
예제 #34
0
def get_build_requires(spec, bconds_with, bconds_without):
    cond_rx = re.compile(r"%\{(\!\?|\?\!|\?)([a-zA-Z0-9_+]+)\s*:([^%\{\}]*)\}")

    def expand_conds(l):
        def expand_one(m):
            if m.group(1) == "?":
                if macros.has_key(m.group(2)):
                    return m.group(3)
            else:
                if not macros.has_key(m.group(2)):
                    return m.group(3)
            return ""

        for i in range(10):
            l = cond_rx.sub(expand_one, l)
            if len(l) > 1000: break

        return l

    macro_rx = re.compile(r"%\{([a-zA-Z0-9_+]+)\}")
    def expand_macros(l):
        def expand_one(m):
            if macros.has_key(m.group(1)):
                return string.strip(macros[m.group(1)])
            else:
                return m.group(0) # don't change

        for i in range(10):
            l = macro_rx.sub(expand_one, l)
            if len(l) > 1000: break

        return expand_conds(l)

    simple_br_rx = re.compile(r"^BuildRequires\s*:\s*([^\s]+)", re.I)
    bcond_rx = re.compile(r"^%bcond_(with|without)\s+([^\s]+)")
    version_rx = re.compile(r"^Version\s*:\s*([^\s]+)", re.I)
    release_rx = re.compile(r"^Release\s*:\s*([^\s]+)", re.I)
    name_rx = re.compile(r"^Name\s*:\s*([^\s]+)", re.I)
    define_rx = re.compile(r"^\%define\s+([a-zA-Z0-9_+]+)\s+(.*)", re.I)
    any_br_rx = re.compile(r"BuildRequires", re.I)

    macros = {}
    for b in bconds_with:
        macros["_with_%s" % b] = 1
    for b in bconds_without:
        macros["_without_%s" % b] = 1

    macros["__perl"] = "/usr/bin/perl"
    macros["_bindir"] = "/usr/bin"
    macros["_sbindir"] = "/usr/sbin"
    macros["kgcc_package"] = "gcc"

    build_req = []

    f = open(spec)
    for l in xreadlines.xreadlines(f):
        l = string.strip(l)
        if l == "%changelog": break

        # %bcond_with..
        m = bcond_rx.search(l)
        if m:
            bcond = m.group(2)
            if m.group(1) == "with":
                if macros.has_key("_with_%s" % bcond):
                    macros["with_%s" % bcond] = 1
            else:
                if not macros.has_key("_without_%s" % bcond):
                    macros["with_%s" % bcond] = 1
            continue

        # name,version,release
        m = version_rx.search(l)
        if m: macros["version"] = m.group(1)
        m = release_rx.search(l)
        if m: macros["release"] = m.group(1)
        m = name_rx.search(l)
        if m: macros["name"] = m.group(1)

        # %define
        m = define_rx.search(l)
        if m: macros[m.group(1)] = m.group(2)

        # *BuildRequires*
        if any_br_rx.search(l):
            l = expand_macros(l)
            m = simple_br_rx.search(l)
            if m:
                build_req.append(m.group(1))
            else:
                if l <> "" and l[0] <> '#':
                    msg("spec error (%s): %s\n" % (spec, l))

    for x in build_req:
        print x
예제 #35
0
import sys
from xreadlines import xreadlines

for line in xreadlines(sys.stdin):
    if line.startswith("%%BoundingBox:"):
        parts = line.split()
        x0, y0, x1, y1 = int(parts[1]), int(parts[2]), int(parts[3]), int(
            parts[4])
        x1 += x0
        y1 += y0
        x0 = 0
        y0 = 0
        line = "%%%%BoundingBox: %d %d %d %d\n" % (x0, y0, x1, y1)
    sys.stdout.write(line)
예제 #36
0
파일: alignGFF2db.py 프로젝트: kpalin/EEL
    def _parseGFFfile(self,fhandle,fileName):
        """Parses the pairwise alignment GFF file.

        This is used to clear up the addGFFfile() function."""

        currMod=None
        currModId=""
        cisModRows=0

        # Book-keeping for columns
        seqsHaveCol={}

        
        for line in xreadlines.xreadlines(fhandle):
            line=line.strip()
            if len(line)==0 or line[0]=='#':  ## Skip empty and comment lines.
                continue
            ## Parse a GFF line
            try:
                seq,src,feat,start,stop,score,strand,frame,attribs=line.split('#')[0].split("\t",8)
            except ValueError:
                print line
                raise
            attribs=self._parseAttribs(attribs)
            # Start of a new cis module
            if feat=='CisModule':

                try:
                    seq2=attribs["Target"].strip('"')
                except TypeError:
                    seq2=attribs["Target"].replace('"','')
                start2=int(attribs["Start"])
                stop2=int(attribs["End"])

                
                if cisModRows<1:

                    regionMap=self.addCisModule(float(score),seq,int(start),int(stop),\
                                                seq2,start2,stop2)
                    cisModRows+=1

                    self.addSlice(self.currentCisID,regionMap,fileName)
                
                # Must create the first column.
                seqsHaveCol[seq]=1
            else:
                cisModRows=0

                if seqsHaveCol.has_key(seq):
                    # New column
                    seqsHaveCol={}
                    colId=self.makeNewColumn(feat,int(stop)-int(start),0.0)
                seqsHaveCol[seq]=1

                
                cur=self.db.cursor()

                # To get the strands correctly:

                start,stop,strand=self.mapWithinRegion(regionMap[seq][1],start,stop,strand)
                assert(start<stop)

                ins="INSERT INTO sites (pos,regID,colID,strand) VALUES (%d,%d,%d,'%s')"%(start,regionMap[seq][0],colId,strand)
                cur.execute(ins)
                #assert(lineModId==currModId)
                #siteId=Site(feat,float(score),strand,int(stop)-int(start))
                #currMod.appendSite({seq:int(start)},siteId)
        cur.close()
        print "Done"
예제 #37
-1
 def evalSynth(self, synthname):
     tmpname = tempfile.mktemp(".sc")
     outfile = open(tmpname, "w")
     outfile.write('SynthDef("' + synthname + '",{')
     for line in xreadlines.xreadlines(open(synthname + ".sc", "r")):
         outfile.write(line)
     tmpname2 = tempfile.mktemp("")
     outfile.write('}).writeDefFile("' + tmpname2 + '");\n')
     outfile.close()
     os.system("sclang " + tmpname)
     tmpname2 += synthname + ".scsyndef"
     self.loadSynthDef(tmpname2)
     os.system("rm " + tmpname + " " + tmpname2)