def word_histogram(source): """Create histogram of normalized words (no punct or digits) scale that in terms of percentage""" hist = {} trans = maketrans('','') if type(source) in (StringType,UnicodeType): # String-like src for word in split(source): word = translate(word, trans, punctuation+digits) word=word.lower() if len(word) > 0: hist[word] = hist.get(word,0) + 1 elif hasattr(source,'read'): # File-like src try: from xreadlines import xreadlines # Check for module for line in xreadlines(source): for word in split(line): word = translate(word, trans, punctuation+digits) word=word.lower() if len(word) > 0: hist[word] = hist.get(word,0) + 1 except ImportError: # Older Python ver line = source.readline() # Slow but mem-friendly while line: for word in split(line): word = translate(word, trans, punctuation+digits) word=word.lower() if len(word) > 0: hist[word] = hist.get(word,0) + 1 line = source.readline() else: raise TypeError, \ "source must be a string-like or file-like object" return hist
def scanIds(package): """Scans all our python source files for _svn_id lines of the form seen in this file above, returning a dictionary of Id strings for each filename. This doesn't import the file, since we want to scan its version even if it won't import (for example, it needs a module we don't have) """ import os, re import PGBuild import PGBuild.Errors from xreadlines import xreadlines packagePath = package.__path__[0] moduleDict = {} svnlineRe = re.compile("^\s*_svn_id") # First, search for source files and fill moduleDict with # module name - module path pairs, without importing anything. def visit(arg, dirname, names): for name in names: # Ignore editor backups and such if name.endswith(".py") and not (name.startswith(".") or name.startswith("#")): modulePath = os.path.join(dirname, name) modPathList = modulePath[len(packagePath):].split(os.sep) # Strip off the .py extension, strip off __init__ modules from packages modPathList[-1] = modPathList[-1][:-3] if modPathList[-1] == "__init__": del modPathList[-1] moduleName = package.__name__ + ".".join(modPathList) moduleDict[moduleName] = modulePath os.path.walk(packagePath, visit, None) # Open each module and search for the _svn_id line. The line must start with _svn_id, # but we do allow a little flexibility since we exec the line to get its value. for module in moduleDict: path = moduleDict[module] file = open(path) id = None for line in xreadlines(file): if svnlineRe.search(line): env = {'_svn_id': None} try: exec line in env except: raise PGBuild.Errors.InternalError( "Malformed _svn_id found in module %s" % module) id = env['_svn_id'] break file.close() if not id: raise PGBuild.Errors.InternalError("Module %s has no _svn_id" % module) moduleDict[module] = id return moduleDict
def readboard(boardname): elements = [] if os.access(boarddb_dir + boardname, os.F_OK): boarddb = file(boarddb_dir + boardname) for line in xreadlines.xreadlines(boarddb): elements.append(string.split(line, None, 1)) boarddb.close() return elements
def evalSynth(self,synthname): tmpname=tempfile.mktemp(".sc") outfile=open(tmpname,"w") outfile.write('SynthDef("'+synthname+'",{') for line in xreadlines.xreadlines(open(synthname+".sc","r")): outfile.write(line) outfile.write('}).send(Server.new(\localhost,NetAddr("'+self.ip+'",'+str(self.port)+')););\n') outfile.close() os.system("sclang "+tmpname) os.system("rm "+tmpname)
def evalSynth(self, synthname): tmpname = tempfile.mktemp(".sc") outfile = open(tmpname, "w") outfile.write('SynthDef("' + synthname + '",{') for line in xreadlines.xreadlines(open(synthname + ".sc", "r")): outfile.write(line) outfile.write('}).send(Server.new(\localhost,NetAddr("' + self.ip + '",' + str(self.port) + ')););\n') outfile.close() os.system("sclang " + tmpname) os.system("rm " + tmpname)
def readlog(self, name): elements = {} if os.access(self.logdb_dir + name + self.logdb_file, os.F_OK): logdb = file(self.logdb_dir + name + self.logdb_file) for line in xreadlines.xreadlines(logdb): key, data = string.split(line, None, 1) elements[key] = string.strip(data) logdb.close() return elements
def __init__(self, fileobj, continued=None): # self.seq: the underlying line-sequence # self.phys_num: current index into self.seq (physical line number) # self.logi_num: current index into self (logical line number) import xreadlines try: self.seq = fileobj.xreadlines() except AttributeError: self.seq = xreadlines.xreadlines(fileobj) self.phys_num = 0 self.logi_num = 0 # allow for optional passing of continued-function if not callable(continued): def continued(line): if line.endswith('\\\n'): return 1,line[:-2] else: return 0, line self.continued = continued
def __init__(self, fileobj, separator=None): # self.seq: the underlying line-sequence # self.line_num: current index into self.seq (line number) # self.para_num: current index into self (paragraph number) import xreadlines try: self.seq = fileobj.xreadlines() except AttributeError: self.seq = xreadlines.xreadlines(fileobj) self.line_num = 0 self.para_num = 0 # allow for optional passing of separator-function if separator is None: def separator(line): return line == '\n' elif not callable(separator): raise TypeError, "separator argument must be callable" self.separator = separator
def __init__(self, fileobj, continued=None): # self.seq: the underlying line-sequence # self.phys_num: current index into self.seq (physical line number) # self.logi_num: current index into self (logical line number) import xreadlines try: self.seq = fileobj except AttributeError: self.seq = xreadlines.xreadlines(fileobj) self.phys_num = 0 self.logi_num = 0 # allow for optional passing of continued-function if not callable(continued): def continued(line): if line.endswith('\\\n'): return 1,line[:-2] else: return 0, line self.continued = continued
def __init__(self, fileobj, separator=None): # self.seq: the underlying line-sequence # self.line_num: current index into self.seq (line number) # self.para_num: current index into self (paragraph number) import xreadlines try: self.seq = fileobj except AttributeError: self.seq = xreadlines.xreadlines(fileobj) self.line_num = 0 self.para_num = 0 # allow for optional passing of separator-function if separator is None: def separator(line): return line == '\n' elif not callable(separator): raise TypeError("separator argument must be callable") self.separator = separator
def main(): import sys if len(sys.argv) < 3: print __doc__ if not os.path.isfile(sys.argv[1]): print __doc__ settings = sys.argv[1] folder = sys.argv[2] if not os.path.isdir(folder): os.makedirs(folder) nicklist = map(extract, xreadlines(open(settings))) for nick in nicklist: writePerson(folder, nick)
def read_words(): """Read from stdin onto all_words""" # indexed by length; contents is a list of words of that length by_len = {} # XXX: You'll get a deprecation warning here for Python 2.3. I just use # xreadlines for the benefit of old machines. for w in xreadlines(sys.stdin): if w[-1] == '\n': w = w[:-1] # chomp # check chars are reasonable if not letters_re.match(w): raise ValueError() w = w.lower() l = len(w) # Put it into the right bucket for its length. Make a new # one if needed. wl = by_len.get(l) if wl is None: wl = [] by_len[l] = wl wl.append(w) # Now join up all the buckets so that we have one big list, sorted by # word length all_words = [] lens = by_len.keys() lens.sort() for l in lens: all_words.extend(by_len[l]) return all_words
raise ValueError, "HrefGetter: writer must be callable." def handle_starttag(self, tag, attrs): if tag == "a": for attr, value in attrs: if attr == "href": self.current_href = value self.state = "a" def handle_data(self, data): if self.state == "a": self.adata = data def handle_endtag(self, tag): if tag == "a" and self.state == "a": self.writer(self.current_href, self.adata) self.state = "" self.adata = "" def default_writer(href, data): print href, data if __name__ == "__main__": import sys, xreadlines if len(sys.argv) < 2: print "%s <htmlfile>" % (sys.argv[0]) print "emits the hyperlink reference and associated text found in an HTML file." hget = HrefGetter(default_writer) fo = open(sys.argv[1], "r") for line in xreadlines.xreadlines(fo): hget.feed(line) fo.close()
import sys from xreadlines import xreadlines for line in xreadlines(sys.stdin): if line.startswith("%%BoundingBox:"): parts = line.split() x0, y0, x1, y1 = int(parts[1]), int(parts[2]), int(parts[3]), int(parts[4]) x1 += x0 y1 += y0 x0 = 0 y0 = 0 line = "%%%%BoundingBox: %d %d %d %d\n" % (x0, y0, x1, y1) sys.stdout.write(line)
def xreadlines(self, sizehint=-1): """Emulate file(...).xreadlines(...).""" import xreadlines return xreadlines.xreadlines(self)
def xreadlines(self): if self.closed: raise ValueError("I/O operation on closed file") import xreadlines return xreadlines.xreadlines(self)
def get_build_requires(spec, bconds_with, bconds_without): cond_rx = re.compile(r"%\{(\!\?|\?\!|\?)([a-zA-Z0-9_+]+)\s*:([^%\{\}]*)\}") def expand_conds(l): def expand_one(m): if m.group(1) == "?": if macros.has_key(m.group(2)): return m.group(3) else: if not macros.has_key(m.group(2)): return m.group(3) return "" for i in range(10): l = cond_rx.sub(expand_one, l) if len(l) > 1000: break return l macro_rx = re.compile(r"%\{([a-zA-Z0-9_+]+)\}") def expand_macros(l): def expand_one(m): if macros.has_key(m.group(1)): return string.strip(macros[m.group(1)]) else: return m.group(0) # don't change for i in range(10): l = macro_rx.sub(expand_one, l) if len(l) > 1000: break return expand_conds(l) simple_br_rx = re.compile(r"^BuildRequires\s*:\s*([^\s]+)", re.I) bcond_rx = re.compile(r"^%bcond_(with|without)\s+([^\s]+)") version_rx = re.compile(r"^Version\s*:\s*([^\s]+)", re.I) release_rx = re.compile(r"^Release\s*:\s*([^\s]+)", re.I) name_rx = re.compile(r"^Name\s*:\s*([^\s]+)", re.I) define_rx = re.compile(r"^\%define\s+([a-zA-Z0-9_+]+)\s+(.*)", re.I) any_br_rx = re.compile(r"BuildRequires", re.I) macros = {} for b in bconds_with: macros["_with_%s" % b] = 1 for b in bconds_without: macros["_without_%s" % b] = 1 macros["__perl"] = "/usr/bin/perl" macros["_bindir"] = "/usr/bin" macros["_sbindir"] = "/usr/sbin" macros["kgcc_package"] = "gcc" build_req = [] f = open(spec) for l in xreadlines.xreadlines(f): l = string.strip(l) if l == "%changelog": break # %bcond_with.. m = bcond_rx.search(l) if m: bcond = m.group(2) if m.group(1) == "with": if macros.has_key("_with_%s" % bcond): macros["with_%s" % bcond] = 1 else: if not macros.has_key("_without_%s" % bcond): macros["with_%s" % bcond] = 1 continue # name,version,release m = version_rx.search(l) if m: macros["version"] = m.group(1) m = release_rx.search(l) if m: macros["release"] = m.group(1) m = name_rx.search(l) if m: macros["name"] = m.group(1) # %define m = define_rx.search(l) if m: macros[m.group(1)] = m.group(2) # *BuildRequires* if any_br_rx.search(l): l = expand_macros(l) m = simple_br_rx.search(l) if m: build_req.append(m.group(1)) else: if l <> "" and l[0] <> '#': msg("spec error (%s): %s\n" % (spec, l)) for x in build_req: print(x)
def xreadlines(self): return xreadlines(self.__sio)
def trainhmm(): """Main routine, open file, read lines, train HMM and save it to file. USAGE: trainhmm() ARGUMENTS: None DESCRIPTION: Main routine, see description of module above. """ # Process command line arguments and check for correctness - - - - - - - - - # if (len(config.options) < 3): print '***** Error: %s needs at least four arguments:'% (sys.argv[0]) print '***** - Name of the project module' print '***** - Tagging mode: "name" or "locality"' print '***** - Input training file name' print '***** - HMM output file name' print '***** plus options' raise Exception() if (config.options[1] == config.options[2]): print '*** Error: Input and output files must differ' print '*** Input training file name:', config.options[1] print '*** HMM output file name: ', config.options[1] raise Exception() in_file_name = config.options[1] hmm_file_name = config.options[2] # Get tagging mode/lookup-tables used - - - - - - - - - - - - - - - - - - - - # tag_mode = config.options[0] if (tag_mode in ['name','na','n']): tag_mode = 'name' elif (tag_mode in ['locality','lolty','loc','l']): tag_mode = 'loc' else: print '***** Error: Illegal tagging mode:', tag_mode print '***** Must be either "name" or "locality"' raise Exception() # Check for optional arguments and process if any - - - - - - - - - - - - - - # config.verbose = 0 # Default: No verbose output config.logging = 0 # Default: No logging into a file smoothing = None # Default: No smoothing config.nowarn = 0 # Deactivate no warning flag (print/log warning # messages) if (len(config.options) > 3): options = config.options[3:] while (options != []): # Do a loop processing all options if (options[0] == '-nowarn'): config.nowarn = 1 # Activate no warning flag options = options[1:] # Remove processed '-nowarn' option elif (options[0] == '-v1'): config.verbose = 1 # Set to verbose output level 1 options = options[1:] # Remove processed '-v1' option elif (options[0] == '-v2'): config.verbose = 2 # Set to verbose output level 2 options = options[1:] # Remove processed '-v2' option elif (options[0] == '-l'): config.logging = 1 if (len(options) > 1): if (options[1][0] != '-'): # Not another option, must be a file name config.log_file = options[1] # Get name of log file options = options[1:] # Remove file_name options = options[1:] # Remove processed -'l' option only try: f_log = open(config.log_file,'a') # Test if file is appendable except: print '***** Error ********************', print '***** Cannot write to log file: '+config.log_file raise IOError() # Write (append) header to log file # f_log.write(os.linesep) f_log.write('##################################################') f_log.write("############"+os.linesep) f_log.write("#"+os.linesep) f_log.write("# 'pyTrainHMM.py - Version 0.1' process started at: ") f_log.write(time.ctime(time.time())+os.linesep) f_log.write("#"+os.linesep) f_log.write("# Input file name: "+in_file_name+os.linesep) f_log.write("# HMM file name: "+hmm_file_name+os.linesep) f_log.write(os.linesep) f_log.close() elif (options[0] == '-s'): smoothing = 1 # Set to do a HMM smoothing smoothing = options[1] if (smoothing in ['l','la','lap','laplac','laplace']): smoothing = 'laplace' elif (smoothing in ['a','ad','abs','absd','absdis','absdisc',\ 'absdiscount']): smoothing = 'absdiscount' else: # Illegal value print "*** Error: Illegal value for 'smoothing' argument:", smoothing print "*** Possible are: 'laplace' or 'absdiscount'" raise Exception() options = options[2:] # Remove processed option else: print '*** Error: Illegal option:', options[0] raise Exception() # Get HMM states and observations from configuration module - - - - - - - - - # if (tag_mode == 'name'): state_list = config.name_hmm_states obser_list = config.name_hmm_obser else: state_list = config.geoloc_hmm_states obser_list = config.geoloc_hmm_obser # Open input file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # try: f_in = open(in_file_name,'r') except: inout.log_message('Cannot open input file: '+in_file_name,'err') raise IOError() line_count = 0 # Counter for lines read rec_count = 0 # Counter for training records read # Read lines, discard comment lines and process training data lines - - - - - # training_data = [] # List of training records train_list = [] # List of training sequences (dictionaries), extracted from # training data for line in xreadlines.xreadlines(f_in): if (line[0] != '#') and (line.strip() != ''): # Line must contain a training record line = line.strip() # Remove line separators config.curr_line = line # Make a copy of the unprocessed current line line_list = line.split(',') # Split into a list of elements line_data = [] # Training data list for one training record inout.log_message(['Record number: '+str(rec_count)],'v1') config.curr_line_no = line_count # Store current line number for elem in line_list: [k,v] = elem.split(':') # Split into key and value tag = k.strip() state = v.strip() line_data.append((state,tag)) if (state not in state_list): msg = ['Illegal state name in training record: '+state, \ 'Line: '+str(line_count)+', record: '+str(rec_count), \ 'Possible values: '+str(state_list)] inout.log_message(msg,'err') raise Exception() if (tag not in obser_list): msg = ['Illegal observation (tag) name in training record: '+tag, \ 'Line: '+str(line_count)+', record: '+str(rec_count), \ 'Possible values: '+str(obser_list)] inout.log_message(msg,'err') raise Exception() inout.log_message(' Training record '+str(rec_count)+':'+ \ str(line_data),'v1') train_list.append(line_data) rec_count += 1 inout.log_message('','v1') # Print empty lines between records line_count += 1 # Close input file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # f_in.close() inout.log_message('','v1') # Print empty lines between records # Initalise HMM and train it with training data - - - - - - - - - - - - - - - # myhmm = simplehmm.hmm(state_list, obser_list) myhmm.train(train_list,smoothing) myhmm.print_hmm() # Save trained HMM - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # myhmm.save_hmm(hmm_file_name) inout.log_message(['Read '+str(line_count)+' lines, processed '+ \ str(rec_count)+' training records', 'End.'],'v1')
for field_dict in field_list: field_name = field_dict['name'] if (field_dict['type'] == 'freq'): # Check for 'freq' field type file_name = field_dict['freq_file'] # Get the corresponding file name if (file_name != None): try: fin = open(file_name) # Open file for reading except: print ' Error: Can not open frequency file %s' % (file_name) raise Exception value_list = [] # List with all values of the frequency file for line in xreadlines.xreadlines(fin): line = line.strip() line_list = line.split(',') if (len(line_list) != 2): print ' Error: Illegal format in frequency file %s: %s' % \ (file_name, line) raise Exception line_val = line_list[0].strip() line_freq = int(line_list[1]) # Append value as many times as given in frequency file # value_list += [line_val]* line_freq random.shuffle(value_list) # Randomly shuffle the list of values
def get_poldek_requires(): # precompile regexps name_rx = re.compile(r"\d+\. ([^\s]+)-[^-]+-[^-]+\n") req_rx = re.compile(r" req .* --> (.*)\n") pkg_name_rx = re.compile(r"([^\s]+)-[^-]+-[^-]+") # todo: if a and b are sets, then use sets module # and intersection method on set object def intersect(a, b): r = [] for x in a: if x in b: r.append(x) return r # add given req-list to cur_pkg_reqs def add_req(reqs): if len(reqs) == 1: if reqs[0] not in cur_pkg_reqs: cur_pkg_reqs.append(reqs[0]) else: did = 0 for x in cur_pkg_reqs: if type(x) is types.ListType: i = intersect(x, reqs) if len(i) == 0: continue did = 1 idx = cur_pkg_reqs.index(x) if len(i) == 1: if i[0] in cur_pkg_reqs: del cur_pkg_reqs[idx] else: cur_pkg_reqs[idx] = i[0] else: cur_pkg_reqs[idx] = i else: if x in reqs: return if not did: cur_pkg_reqs.append(reqs) pkg_reqs = {} cur_pkg_reqs = None cur_pkg = None f = chr_popen("poldek -v -v --verify --unique-pkg-names") for l in xreadlines.xreadlines(f): m = name_rx.match(l) if m: if cur_pkg: pkg_reqs[cur_pkg] = cur_pkg_reqs cur_pkg = m.groups(1) if pkg_reqs.has_key(cur_pkg): cur_pkg = None cur_pkg_reqs = None else: cur_pkg_reqs = [] continue m = req_rx.match(l) if m: reqs = [] for x in string.split(m.group(1)): if x in ["RPMLIB_CAP", "NOT", "FOUND", "UNMATCHED"]: continue m = pkg_name_rx.match(x) if m: reqs.append(m.group(1)) else: msg("poldek_reqs: bad pkg name: %s\n" % x) if len(reqs) != 0: add_req(reqs) f.close() if cur_pkg: pkg_reqs[cur_pkg] = cur_pkg_reqs return pkg_reqs
def trainhmm(): """Main routine, open file, read lines, train HMM and save it to file. USAGE: trainhmm() ARGUMENTS: None DESCRIPTION: Main routine, see description of module above. """ # Process command line arguments and check for correctness - - - - - - - - - # if (len(config.options) < 3): print '***** Error: %s needs at least four arguments:' % (sys.argv[0]) print '***** - Name of the project module' print '***** - Tagging mode: "name" or "locality"' print '***** - Input training file name' print '***** - HMM output file name' print '***** plus options' raise Exception() if (config.options[1] == config.options[2]): print '*** Error: Input and output files must differ' print '*** Input training file name:', config.options[1] print '*** HMM output file name: ', config.options[1] raise Exception() in_file_name = config.options[1] hmm_file_name = config.options[2] # Get tagging mode/lookup-tables used - - - - - - - - - - - - - - - - - - - - # tag_mode = config.options[0] if (tag_mode in ['name', 'na', 'n']): tag_mode = 'name' elif (tag_mode in ['locality', 'lolty', 'loc', 'l']): tag_mode = 'loc' else: print '***** Error: Illegal tagging mode:', tag_mode print '***** Must be either "name" or "locality"' raise Exception() # Check for optional arguments and process if any - - - - - - - - - - - - - - # config.verbose = 0 # Default: No verbose output config.logging = 0 # Default: No logging into a file smoothing = None # Default: No smoothing config.nowarn = 0 # Deactivate no warning flag (print/log warning # messages) if (len(config.options) > 3): options = config.options[3:] while (options != []): # Do a loop processing all options if (options[0] == '-nowarn'): config.nowarn = 1 # Activate no warning flag options = options[1:] # Remove processed '-nowarn' option elif (options[0] == '-v1'): config.verbose = 1 # Set to verbose output level 1 options = options[1:] # Remove processed '-v1' option elif (options[0] == '-v2'): config.verbose = 2 # Set to verbose output level 2 options = options[1:] # Remove processed '-v2' option elif (options[0] == '-l'): config.logging = 1 if (len(options) > 1): if (options[1][0] != '-'): # Not another option, must be a file name config.log_file = options[1] # Get name of log file options = options[1:] # Remove file_name options = options[1:] # Remove processed -'l' option only try: f_log = open(config.log_file, 'a') # Test if file is appendable except: print '***** Error ********************', print '***** Cannot write to log file: ' + config.log_file raise IOError() # Write (append) header to log file # f_log.write(os.linesep) f_log.write( '##################################################') f_log.write("############" + os.linesep) f_log.write("#" + os.linesep) f_log.write( "# 'pyTrainHMM.py - Version 0.1' process started at: ") f_log.write(time.ctime(time.time()) + os.linesep) f_log.write("#" + os.linesep) f_log.write("# Input file name: " + in_file_name + os.linesep) f_log.write("# HMM file name: " + hmm_file_name + os.linesep) f_log.write(os.linesep) f_log.close() elif (options[0] == '-s'): smoothing = 1 # Set to do a HMM smoothing smoothing = options[1] if (smoothing in ['l', 'la', 'lap', 'laplac', 'laplace']): smoothing = 'laplace' elif (smoothing in ['a','ad','abs','absd','absdis','absdisc',\ 'absdiscount']): smoothing = 'absdiscount' else: # Illegal value print "*** Error: Illegal value for 'smoothing' argument:", smoothing print "*** Possible are: 'laplace' or 'absdiscount'" raise Exception() options = options[2:] # Remove processed option else: print '*** Error: Illegal option:', options[0] raise Exception() # Get HMM states and observations from configuration module - - - - - - - - - # if (tag_mode == 'name'): state_list = config.name_hmm_states obser_list = config.name_hmm_obser else: state_list = config.geoloc_hmm_states obser_list = config.geoloc_hmm_obser # Open input file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # try: f_in = open(in_file_name, 'r') except: inout.log_message('Cannot open input file: ' + in_file_name, 'err') raise IOError() line_count = 0 # Counter for lines read rec_count = 0 # Counter for training records read # Read lines, discard comment lines and process training data lines - - - - - # training_data = [] # List of training records train_list = [ ] # List of training sequences (dictionaries), extracted from # training data for line in xreadlines.xreadlines(f_in): if (line[0] != '#') and (line.strip() != ''): # Line must contain a training record line = line.strip() # Remove line separators config.curr_line = line # Make a copy of the unprocessed current line line_list = line.split(',') # Split into a list of elements line_data = [] # Training data list for one training record inout.log_message(['Record number: ' + str(rec_count)], 'v1') config.curr_line_no = line_count # Store current line number for elem in line_list: [k, v] = elem.split(':') # Split into key and value tag = k.strip() state = v.strip() line_data.append((state, tag)) if (state not in state_list): msg = ['Illegal state name in training record: '+state, \ 'Line: '+str(line_count)+', record: '+str(rec_count), \ 'Possible values: '+str(state_list)] inout.log_message(msg, 'err') raise Exception() if (tag not in obser_list): msg = ['Illegal observation (tag) name in training record: '+tag, \ 'Line: '+str(line_count)+', record: '+str(rec_count), \ 'Possible values: '+str(obser_list)] inout.log_message(msg, 'err') raise Exception() inout.log_message(' Training record '+str(rec_count)+':'+ \ str(line_data),'v1') train_list.append(line_data) rec_count += 1 inout.log_message('', 'v1') # Print empty lines between records line_count += 1 # Close input file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # f_in.close() inout.log_message('', 'v1') # Print empty lines between records # Initalise HMM and train it with training data - - - - - - - - - - - - - - - # myhmm = simplehmm.hmm(state_list, obser_list) myhmm.train(train_list, smoothing) myhmm.print_hmm() # Save trained HMM - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # myhmm.save_hmm(hmm_file_name) inout.log_message(['Read '+str(line_count)+' lines, processed '+ \ str(rec_count)+' training records', 'End.'],'v1')
def xreadlines(self): self._check_open() import xreadlines return xreadlines.xreadlines(self)
import os import sys import string import xreadlines # Determine what the source file is. sourceFile = None for arg in sys.argv[1:]: if re.search(".c$", arg): sourceFile = arg if sourceFile == None: sys.stderr.write("Unable to find a source file in " + string.join(sys.argv[1:])) sys.stderr.write("\n") sys.exit(1) sourcePath = os.path.dirname(sourceFile) # Have gcc generate the depenency information. We modify the line # with the filename so that it also includes the .d file, and so that # the complete path name is used. child = os.popen("gcc -M " + string.join(sys.argv[1:]), "r") for line in xreadlines.xreadlines(child): if re.search(":", line): (file, remainder) = re.split(":", line) print os.path.join(sourcePath, file), dependencyFile = re.split("\.", file)[0] print " %s.d: %s" % (os.path.join(sourcePath, dependencyFile), remainder), else: print line,
def __init__(self): self.count = 5 def readlines(self, sizehint = None): self.count = self.count - 1 return map(lambda x: "%d\n" % x, range(self.count)) class Null: pass import xreadlines lineno = 0 try: xreadlines.xreadlines(Null())[0] except AttributeError, detail: print "AttributeError (expected)" else: print "Did not throw attribute error" try: xreadlines.xreadlines(XReader)[0] except TypeError, detail: print "TypeError (expected)" else: print "Did not throw type error" try: xreadlines.xreadlines(XReader())[1] except RuntimeError, detail:
for attr, value in attrs: if attr == "href": self.current_href = value self.state = "a" def handle_data(self, data): if self.state == "a": self.adata = data def handle_endtag(self, tag): if tag == "a" and self.state == "a": self.writer(self.current_href, self.adata) self.state = "" self.adata = "" def default_writer(href, data): print href, data if __name__ == "__main__": import sys, xreadlines if len(sys.argv) < 2: print "%s <htmlfile>" % (sys.argv[0]) print "emits the hyperlink reference and associated text found in an HTML file." hget = HrefGetter(default_writer) fo = open(sys.argv[1], "r") for line in xreadlines.xreadlines(fo): hget.feed(line) fo.close()
from test_support import verbose class XReader: def __init__(self): self.count = 5 def readlines(self, sizehint = None): self.count = self.count - 1 return map(lambda x: "%d\n" % x, range(self.count)) class Null: pass import xreadlines lineno = 0 try: xreadlines.xreadlines(Null())[0] except AttributeError, detail: print "AttributeError (expected)" else: print "Did not throw attribute error" try: xreadlines.xreadlines(XReader)[0] except TypeError, detail: print "TypeError (expected)" else: print "Did not throw type error" try: xreadlines.xreadlines(XReader())[1] except RuntimeError, detail: print "RuntimeError (expected):", detail else: print "Did not throw runtime error" xresult = ['0\n', '1\n', '2\n', '3\n', '0\n', '1\n', '2\n', '0\n', '1\n', '0\n']
infile = file(sys.argv[1], 'r') ofile = file(sys.argv[2], 'w') reStackUsed = re.compile(r'^\#if YY_STACK_USED$') reProto = re.compile(r'^static int yy_get_next_buffer YY_PROTO\(\( void \)\)') reGetNextDeclaration = re.compile(r'static int yy_get_next_buffer\(\)') reGetNext = re.compile(r'yy_get_next_buffer\(\)') reyyinput = re.compile(r'^static int yyinput\(\)$') reReturnYyinput = re.compile(r'return yyinput\(\);$') reunistd = re.compile('^\#include \<unistd\.h\>$') reAlwaysInteractive = re.compile(r'\#if YY_ALWAYS_INTERACTIVE') reNeverInteractive = re.compile(r'\#if YY_NEVER_INTERACTIVE') reyymain = re.compile(r'\#if YY_MAIN') for li in xreadlines.xreadlines(infile): li = misutils.stripcrlf(li) li = reStackUsed.sub(r'#ifdef YY_STACK_USED', li) li = reProto.sub(r'', li) li = reGetNextDeclaration.sub( r'static int yy_get_next_buffer( SqlHandle *dbsql )', li) li = reGetNext.sub(r'yy_get_next_buffer( dbsql )', li) li = reyyinput.sub(r'static int yyinput( SqlHandle *dbsql )', li) li = reReturnYyinput.sub(r'return yyinput( dbsql );', li) li = reunistd.sub( r'#ifndef _MSC_VER' + '\n' + r'#include <unistd.h>' + '\n' + r'#endif', li) li = reAlwaysInteractive.sub(r'#ifdef YY_ALWAYS_INTERACTIVE', li) li = reNeverInteractive.sub(r'#ifdef YY_NEVER_INTERACTIVE', li) li = reyymain.sub(r'#ifdef YY_MAIN', li)
def readmail(self, mailfile): elements = [] for line in xreadlines.xreadlines(mailfile): elements.append(string.split(line, None, 2)) return elements
def get_build_requires(spec, bconds_with, bconds_without): cond_rx = re.compile(r"%\{(\!\?|\?\!|\?)([a-zA-Z0-9_+]+)\s*:([^%\{\}]*)\}") def expand_conds(l): def expand_one(m): if m.group(1) == "?": if macros.has_key(m.group(2)): return m.group(3) else: if not macros.has_key(m.group(2)): return m.group(3) return "" for i in range(10): l = cond_rx.sub(expand_one, l) if len(l) > 1000: break return l macro_rx = re.compile(r"%\{([a-zA-Z0-9_+]+)\}") def expand_macros(l): def expand_one(m): if macros.has_key(m.group(1)): return string.strip(macros[m.group(1)]) else: return m.group(0) # don't change for i in range(10): l = macro_rx.sub(expand_one, l) if len(l) > 1000: break return expand_conds(l) simple_br_rx = re.compile(r"^BuildRequires\s*:\s*([^\s]+)", re.I) bcond_rx = re.compile(r"^%bcond_(with|without)\s+([^\s]+)") version_rx = re.compile(r"^Version\s*:\s*([^\s]+)", re.I) release_rx = re.compile(r"^Release\s*:\s*([^\s]+)", re.I) name_rx = re.compile(r"^Name\s*:\s*([^\s]+)", re.I) define_rx = re.compile(r"^\%define\s+([a-zA-Z0-9_+]+)\s+(.*)", re.I) any_br_rx = re.compile(r"BuildRequires", re.I) macros = {} for b in bconds_with: macros["_with_%s" % b] = 1 for b in bconds_without: macros["_without_%s" % b] = 1 macros["__perl"] = "/usr/bin/perl" macros["_bindir"] = "/usr/bin" macros["_sbindir"] = "/usr/sbin" macros["kgcc_package"] = "gcc" build_req = [] f = open(spec) for l in xreadlines.xreadlines(f): l = string.strip(l) if l == "%changelog": break # %bcond_with.. m = bcond_rx.search(l) if m: bcond = m.group(2) if m.group(1) == "with": if macros.has_key("_with_%s" % bcond): macros["with_%s" % bcond] = 1 else: if not macros.has_key("_without_%s" % bcond): macros["with_%s" % bcond] = 1 continue # name,version,release m = version_rx.search(l) if m: macros["version"] = m.group(1) m = release_rx.search(l) if m: macros["release"] = m.group(1) m = name_rx.search(l) if m: macros["name"] = m.group(1) # %define m = define_rx.search(l) if m: macros[m.group(1)] = m.group(2) # *BuildRequires* if any_br_rx.search(l): l = expand_macros(l) m = simple_br_rx.search(l) if m: build_req.append(m.group(1)) else: if l <> "" and l[0] <> '#': msg("spec error (%s): %s\n" % (spec, l)) for x in build_req: print x
import sys from xreadlines import xreadlines for line in xreadlines(sys.stdin): if line.startswith("%%BoundingBox:"): parts = line.split() x0, y0, x1, y1 = int(parts[1]), int(parts[2]), int(parts[3]), int( parts[4]) x1 += x0 y1 += y0 x0 = 0 y0 = 0 line = "%%%%BoundingBox: %d %d %d %d\n" % (x0, y0, x1, y1) sys.stdout.write(line)
def _parseGFFfile(self,fhandle,fileName): """Parses the pairwise alignment GFF file. This is used to clear up the addGFFfile() function.""" currMod=None currModId="" cisModRows=0 # Book-keeping for columns seqsHaveCol={} for line in xreadlines.xreadlines(fhandle): line=line.strip() if len(line)==0 or line[0]=='#': ## Skip empty and comment lines. continue ## Parse a GFF line try: seq,src,feat,start,stop,score,strand,frame,attribs=line.split('#')[0].split("\t",8) except ValueError: print line raise attribs=self._parseAttribs(attribs) # Start of a new cis module if feat=='CisModule': try: seq2=attribs["Target"].strip('"') except TypeError: seq2=attribs["Target"].replace('"','') start2=int(attribs["Start"]) stop2=int(attribs["End"]) if cisModRows<1: regionMap=self.addCisModule(float(score),seq,int(start),int(stop),\ seq2,start2,stop2) cisModRows+=1 self.addSlice(self.currentCisID,regionMap,fileName) # Must create the first column. seqsHaveCol[seq]=1 else: cisModRows=0 if seqsHaveCol.has_key(seq): # New column seqsHaveCol={} colId=self.makeNewColumn(feat,int(stop)-int(start),0.0) seqsHaveCol[seq]=1 cur=self.db.cursor() # To get the strands correctly: start,stop,strand=self.mapWithinRegion(regionMap[seq][1],start,stop,strand) assert(start<stop) ins="INSERT INTO sites (pos,regID,colID,strand) VALUES (%d,%d,%d,'%s')"%(start,regionMap[seq][0],colId,strand) cur.execute(ins) #assert(lineModId==currModId) #siteId=Site(feat,float(score),strand,int(stop)-int(start)) #currMod.appendSite({seq:int(start)},siteId) cur.close() print "Done"
def evalSynth(self, synthname): tmpname = tempfile.mktemp(".sc") outfile = open(tmpname, "w") outfile.write('SynthDef("' + synthname + '",{') for line in xreadlines.xreadlines(open(synthname + ".sc", "r")): outfile.write(line) tmpname2 = tempfile.mktemp("") outfile.write('}).writeDefFile("' + tmpname2 + '");\n') outfile.close() os.system("sclang " + tmpname) tmpname2 += synthname + ".scsyndef" self.loadSynthDef(tmpname2) os.system("rm " + tmpname + " " + tmpname2)