def get_rar_file_index(subject, last_index): # place *.vol###+##.par2 at the end rar_match = sre.search("\".+?\.vol\d+\+\d+\.par2\"", subject) if rar_match != None: last_index -= 1 return last_index # place par2 header first rar_match = sre.search("\".+?\.par2\"", subject) if rar_match != None: return 0 # place *.part##.rar files simply in order of the ## rar_match = sre.search("\".+?\.part(\d+).rar\"", subject) if rar_match != None: return int(rar_match.group(1)) + 1 # place *.rar files before *.r## rar_match = sre.search("\".+?\.rar\"", subject) if rar_match != None: return 1 # place *.r## files simply in order of the ## rar_match = sre.search("\".+?\.r(\d+)\"", subject) if rar_match != None: return int(rar_match.group(1)) + 2 # place anything else at the end last_index -= 1 return last_index
def get_rar_file_index(subject, last_index): # place *.vol###+##.par2 at the end rar_match = sre.search("\".+?\.vol\d+\+\d+\.par2\"", subject) if rar_match != None: last_index -= 1 return last_index # place par2 header first rar_match = sre.search("\".+?\.par2\"", subject) if rar_match != None: return 0 # place *.part##.rar files simply in order of the ## rar_match = sre.search("\".+?\.part(\d+).rar\"", subject) if rar_match != None: return int(rar_match.group(1))+1 # place *.rar files before *.r## rar_match = sre.search("\".+?\.rar\"", subject) if rar_match != None: return 1 # place *.r## files simply in order of the ## rar_match = sre.search("\".+?\.r(\d+)\"", subject) if rar_match != None: return int(rar_match.group(1))+2 # place anything else at the end last_index -= 1 return last_index
def FindSuffix(self): """Identify any known suffixes, mark off as syllables and possible stresses. Syllables are stored in a class-wide compiled RE. We identify them and list them backwards so as to "cut off" the last first. We consult a global-to-module list of those that force stress on previous syllable. """ self.numSuffixes = 0 self.forceStress = 0 resultslist = [] for f in self.suffixes.finditer(self.wd): resultslist.append((f.group(), f.start())) if not resultslist: return # make sure *end* of word is in list! otherwise, 'DESP erate' if resultslist[-1][1] + len(resultslist[-1][0]) < len(self.wd): return resultslist.reverse() for res in resultslist: # if no vowel left before, false suffix ('singing') # n.b.: will choke on 'quest' etc! put in dictionary, I guess if not sre.search('[aeiouy]', self.wd[:res[1]]): break if res[0] == 'ing' and self.wd[res[1]-1] == self.wd[res[1]-2]: self.sylBounds.append(res[1] - 1) # freq special case else: self.sylBounds.append(res[1]) # sorted later self.wd = self.wd[:res[1]] self.numSuffixes += 1 if res[0] in STRESSSUFFIX: self.forceStress = 0 - len(self.sylBounds) if res[0] in MULTISUFFIX: # tricky bit! it *happens* that secondary division in all these # comes after its first character; NOT inevitable! # also does not allow for 3-syl: 'ically' (which are reliable!) self.sylBounds.append(res[1]+1) self.numSuffixes += 1
def DivideCV(self): """Divide the word among C and V groups to fill the sylBounds list. Here, and here alone, we need to catch e-with-grave-accent to count it as not only a vowel but syllabic ('an aged man' vs. 'aged beef'). Other special characters might be useful to recognize, but won't make the same syllabic difference. """ unicodeVowels = u"[ae\N{LATIN SMALL LETTER E WITH GRAVE}iouy]+" uniConsonants = u"[^ae\N{LATIN SMALL LETTER E WITH GRAVE}iouy]+" firstvowel = sre.search(unicodeVowels, self.wd).start() for v in sre.finditer(unicodeVowels, self.wd): lastvowel = v.end() # replaced for each group, last sticks disyllabicvowels = self.sylvowels.search(v.group()) if disyllabicvowels: self.sylBounds.append(v.start() + disyllabicvowels.start() + 1) for cc in sre.finditer(uniConsonants, self.wd): if cc.start() < firstvowel or cc.end() >= lastvowel: continue numcons = len(cc.group()) if numcons < 3: pos = cc.end() - 1 # before single C or betw. 2 elif numcons > 3: pos = cc.end() - 2 # before penult C else: # 3 consonants, divide 1/2 or 2/1? cg = cc.group() # our CCC cluster if cg[-3] == cg[-2] or self.splitLeftPairs.search(cg): pos = cc.end() - 2 # divide 1/2 else: pos = cc.end() - 1 # divide 2/1 if not self.wd[pos-1].isalpha() and not self.wd[pos].isalpha(): self.sylBounds.append(pos-1) else: self.sylBounds.append(pos)
def mv_nl(fname): m = sre.search("(.+)\sNomad", fname) if m: print ">%s< ==> [%s]**************" % (fname, m.group(1)) print os.rename(fname, m.group(1)) else: print "NOM>%s<" % fname
def Preliminaries(self): apostrophe = self.wd.find("\'", -2) # just at end of word ('twas) if apostrophe != -1: # poss.; check if syllabic and remove if self.wd[-1] != '\'' and self.wd[-1] in 'se' and self.wd[-2] in SIBILANTS: self.sylBounds.append(apostrophe) self.wd = self.wd[:apostrophe] # cut off ' or 's until last stage # cut final s/d from plurals/pasts if not syllabic self.isPast = self.isPlural = False # defaults used also for suffixes if sre.search(r"[^s]s\b", self.wd): self.isPlural = True # terminal single s (DUMB!) if sre.search(r"ed\b", self.wd): self.isPast = True # terminal 'ed' if self.isPast or self.isPlural: self.wd = self.wd[:-1] # final-syl test turns out to do better work *after* suffices cut off self.FindSuffix() # if final syllable is l/r+e, reverse letters for processing as syllable if len(self.wd) > 3 and self.liquidterm.search(self.wd): self.wd = self.wd[:-2] + self.wd[-1] + self.wd[-2]
def do_screens(fobj, emitter, regexp=DP_SCREEN_LS_REGEXP): while True: line = fobj.readline() if len(line) == 0: break m = sre.search(DP_SCREEN_LS_REGEXP, line) emitter(m, line)
def _verify_with_par2(self, par2_file): try: # if not os.path.exists(self.status_filepath): cmd = '"' + os.path.join(self.par2exe_directory, 'par2.exe') + '"' args = ' v -q "' + par2_file + '"' print cmd, args, self.status_filepath cmd = '"' + cmd + args + ' > "' + self.status_filepath + '""' cmd.encode('utf8') os.system(cmd) #print par2exe.readlines() #from subprocess import * #par2exe = Popen(["c:/temp/par2.exe"], stdout=PIPE, stderr=STDOUT, stdin=PIPE) #par2exe.wait() status_file = open(self.status_filepath) lines = status_file.readlines() status_file.close() par2_targets = dict() for line in lines: # Target: "foo.rar" - found. # Target: "bar.rar" - missing. # Target: "baz.rar" - damaged. par2_target_match = sre.search("Target: \"(.+?)\" - (\S+)\.", line) if par2_target_match: par2_targets[par2_target_match.group(1).lower()] = par2_target_match.group(2) return par2_targets except: traceback.print_exc()
def _verify_with_par2(self, par2_file): try: # if not os.path.exists(self.status_filepath): cmd = '"' + os.path.join(self.par2exe_directory, 'par2.exe') + '"' args = ' v -q "' + par2_file + '"' print cmd, args, self.status_filepath cmd = '"' + cmd + args + ' > "' + self.status_filepath + '""' cmd.encode('utf8') os.system(cmd) #print par2exe.readlines() #from subprocess import * #par2exe = Popen(["c:/temp/par2.exe"], stdout=PIPE, stderr=STDOUT, stdin=PIPE) #par2exe.wait() status_file = open(self.status_filepath) lines = status_file.readlines() status_file.close() par2_targets = dict() for line in lines: # Target: "foo.rar" - found. # Target: "bar.rar" - missing. # Target: "baz.rar" - damaged. par2_target_match = sre.search("Target: \"(.+?)\" - (\S+)\.", line) if par2_target_match: par2_targets[par2_target_match.group( 1).lower()] = par2_target_match.group(2) return par2_targets except: traceback.print_exc()
def getattrval(body,attr): body=sre.sub("([^\>]*)\>([^\000]*)","\\1",body) if sre.search(attr+"=(\"|'|)([^\\1\ \>]*)\\1",body)!=None: delim=sre.sub("[^\>]* "+attr+"=(\"|'|)([^\\1\ \>]*)\\1([^\>]*)","\\1",body) exp="[^\>]* "+attr+"=(\\"+delim+")([^" if delim=="": exp+="\ " else: exp+=delim exp+="\>]*)\\"+delim+"([^\>]*)" return sre.sub(exp,"\\2",body) else: return ""
def Preliminaries(self): apostrophe = self.wd.find("\'", -2) # just at end of word ('twas) if apostrophe != -1: # poss.; check if syllabic and remove if (self.wd[-1] != '\'' and self.wd[-1] in 'se' and self.wd[-2] in SIBILANTS): self.sylBounds.append(apostrophe) self.wd = self.wd[:apostrophe] # cut off ' or 's until last stage # cut final s/d from plurals/pasts if not syllabic self.isPast = self.isPlural = False # defaults used also for suffixes if sre.search(r"[^s]s\b", self.wd): self.isPlural = True # terminal single s (DUMB!) if sre.search(r"ed\b", self.wd): self.isPast = True # terminal 'ed' if self.isPast or self.isPlural: self.wd = self.wd[:-1] # final-syl test turns out to do better work *after* suffices cut off self.FindSuffix() # if final syllable is l/r+e, reverse letters for processing as syll. if len(self.wd) > 3 and self.liquidterm.search(self.wd): self.wd = self.wd[:-2] + self.wd[-1] + self.wd[-2]
def getattrval(body, attr): body = sre.sub("([^\>]*)\>([^\000]*)", "\\1", body) if sre.search(attr + "=(\"|'|)([^\\1\ \>]*)\\1", body) != None: delim = sre.sub("[^\>]* " + attr + "=(\"|'|)([^\\1\ \>]*)\\1([^\>]*)", "\\1", body) exp = "[^\>]* " + attr + "=(\\" + delim + ")([^" if delim == "": exp += "\ " else: exp += delim exp += "\>]*)\\" + delim + "([^\>]*)" return sre.sub(exp, "\\2", body) else: return ""
def sanitize(self): # sanatize the query to guard against non selects if not sre.search('^\s*select[^;]*$',self['query'],sre.I): usage() print "ERROR: Queries must be select statements without ';'s" sys.exit(3) if not sre.search('^[a-zA-Z][\w_]*$', self['table'],sre.I): usage() print "ERROR: TABLE must be 1 word consisting of alphanumeric + '_'" sys.exit(3) if not sre.search('^([a-zA-Z][\w_]*|)$', self['procname'],sre.I): usage() print "ERROR: proc-name must be 1 word consisting of alphanumeric + '_'" sys.exit(3) if not sre.search('order\s+by\s+([a-zA-Z][\w_]*[.]|)(?P<tn>tail_num)\s*,\s*([a-zA-Z][\w_]*[.]|)(?P<mt>measurement_time)',self['query'],sre.I): usage() print "ERROR: Queries must have an order by tail_num, measurement_time" sys.exit(3)
def sanitize(self): # sanatize the query to guard against non selects if not sre.search('^\s*select[^;]*$', self['query'], sre.I): usage() print "ERROR: Queries must be select statements without ';'s" sys.exit(3) if not sre.search('^[a-zA-Z][\w_]*$', self['table'], sre.I): usage() print "ERROR: TABLE must be 1 word consisting of alphanumeric + '_'" sys.exit(3) if not sre.search('^([a-zA-Z][\w_]*|)$', self['procname'], sre.I): usage() print "ERROR: proc-name must be 1 word consisting of alphanumeric + '_'" sys.exit(3) if not sre.search( 'order\s+by\s+([a-zA-Z][\w_]*[.]|)(?P<tn>tail_num)\s*,\s*([a-zA-Z][\w_]*[.]|)(?P<mt>measurement_time)', self['query'], sre.I): usage() print "ERROR: Queries must have an order by tail_num, measurement_time" sys.exit(3)
def SpecialCodes(self): """Encode character-combinations so as to trick DivideCV. The combinations are contained in regexes compiled in the class's __init__. Encoding (*not* to be confused with Unicode functions!) is done by small functions outside of (and preceding) the class. The combinations in Paul Holzer's original code have been supplemented and tweaked in various ways. For example, the original test for [iy]V is poor; 'avionics' defeats it; so we leave that to a new disyllabic-vowel test. The messy encoding-and-sometimes-decoding of nonsyllabic final 'e' after a C seems the best that can be done, though I hope not. """ if sre.search(r"[^aeiouy]e\b", self.wd): # nonsyllabic final e after C if (not self.isPlural or self.wd[-2] not in SIBILANTS) and (not self.isPast or self.wd[-2] not in "dt"): self.wd = self.wd[:-1] + encode(self.wd[-1]) if not sre.search(r"[aeiouy]", self.wd): # any vowel left?? self.wd = self.wd[:-1] + "e" # undo the encoding self.wd = self.CiVcomb.sub(handleCiV, self.wd) self.wd = self.CCpair.sub(handleCC, self.wd) self.wd = self.VyVcomb.sub(handleVyV, self.wd)
def get_next_image_dir(): dnbase = "./imgs" lst = glob.glob(dnbase + '-[0-9][0-9][0-9][0-9]') if (lst): lst.sort() last = lst[len(lst) - 1] m = sre.search(sre.compile(dnbase + "-(\d+)$"), last) nlast = int(m.group(1)) fno = nlast + 1 else: fno = 1 dirname = (dnbase + "-%04d") % (fno) # printf("dirname=%s\n", dirname); os.mkdir(dirname) return dirname
def SpecialCodes(self): """Encode character-combinations so as to trick DivideCV. The combinations are contained in regexes compiled in the class's __init__. Encoding (*not* to be confused with Unicode functions!) is done by small functions outside of (and preceding) the class. The combinations in Paul Holzer's original code have been supplemented and tweaked in various ways. For example, the original test for [iy]V is poor; 'avionics' defeats it; so we leave that to a new disyllabic-vowel test. The messy encoding-and-sometimes-decoding of nonsyllabic final 'e' after a C seems the best that can be done, though I hope not. """ if sre.search(r"[^aeiouy]e\b", self.wd): # nonsyllabic final e after C if ((not self.isPlural or self.wd[-2] not in SIBILANTS) and (not self.isPast or self.wd[-2] not in 'dt')): self.wd = self.wd[:-1] + encode(self.wd[-1]) if not sre.search(r"[aeiouy]", self.wd): # any vowel left?? self.wd = self.wd[:-1] + 'e' # undo the encoding self.wd = self.CiVcomb.sub(handleCiV, self.wd) self.wd = self.CCpair.sub(handleCC, self.wd) self.wd = self.VyVcomb.sub(handleVyV, self.wd)
def SSLSpoofCheck(self, host): spoof = None if self.spoof_ssl_config.has_key(self.path): spoof = self.spoof_ssl_config[self.path] else: for i in self.spoof_ssl_config: if sre.search(i, self.path): self.log.debug('SSLSpoofCheck %s matched %s' % (i, host)) spoof = self.spoof_ssl_config[i] break if spoof == None: self.log.debug('SSLSpoofCheck no matches, using DEFAULT') spoof = self.spoof_ssl_config['DEFAULT'] self.log.debug('SSLSpoofCheck for %s: %s' % (host, spoof)) return spoof
def _ProcessRedirects(self): self.log.debug('Entering _ProcessRedirects') newurl = None for target in self.http_redirect_table: match = sre.search(target, self.url) if match: self.log.debug('Matched %s on %s' % (target, self.url)) newurl = match.expand(self.http_redirect_table[target]) self.log.debug(' expanded %s to %s' % (self.http_redirect_table[target], newurl)) break if not newurl: self.log.debug('No matches on %s' % self.url) self.spoof_url = self.url else: self.spoof_url = newurl
def html_format(src): out = [] while src != "": m = sre.search(pat_token, src) if m is None: break out.append(src[: m.start()]) token = src[m.start() : m.end()] src = src[m.end() :] if sre.match(pat_paragraph, token): out.append(P()) elif sre.match(pat_uri, token): out.append(link(token, token)) elif sre.match(pat_tabulate, token): qtriples, src = parse_tabulate(src) tabulate(out, qtriples) elif sre.match(pat_slink, token): contents = token[1:-1].split() if 0 == len(contents): pass # XXX error message? else: # XXX security screen target and caption # (caption should not look like a URL itself) # XXX url encoding # XXX nofollow if contents[0].startswith("http:"): target = contents[0] if 1 == len(contents): caption = contents[0] else: caption = " ".join(contents[1:]) else: caption = " ".join(contents) target = "/page/" + caption out.append(link(target, caption)) elif sre.match(pat_escape, token): out.append(token[1]) else: raise "Bug" out.append(src) return out
def getpage(url,dheaders=1,redir=0,realpage=0,poststring="",exceptions=0): # function to recurse and try getpage() again with new values def recurse(exceptions): sock.close() exceptions+=1 if exceptions<=6: return getpage(url,dheaders,redir,realpage,poststring,exceptions) else: print "Too many recursions, skipping..." return global usecookies,urllogfile,debug,ignorefileext if not checkserver(servername(url)): return if url.find("#")!=-1: url=url[:url.find("#")] # file extensions that need to be ignored code fileext=sre.sub(".*(http\://[^/]*/).*","\\1",url) if url==fileext: fileext="None" else: fileext=sre.sub("^.*\/[^/]*\.([^\&\#\?\/]*)[^/]*$","\\1",url) if ignorefileext.count(","+fileext+",")!=0: return try: sock=socket.socket(socket.AF_INET,socket.SOCK_STREAM) sock.connect((servername(url,False),80)) workurl=pagename(url) theurl=url if redir!=1: theurl=workurl qrytype="GET" if poststring!="": qrytype="POST" out=(qrytype+" "+theurl+" HTTP/1.1\n" "Host: "+servername(url,False)+"\n" "Connection: close\n") if usecookies: global cookies out+="Cookie: "+cookies+"\n" if poststring!="": out+="Content-Type: application/x-www-form-urlencoded\n" out+="Content-Length: "+str(len(poststring)) out+="\n\n"+poststring+"\n" out+="\r\n\r\n" sock.send(out) # get response type and log the page response=sock.recv(12)[-3:] fp=open("logs/"+urllogfile,"a") fp.write(url+": "+response+" "+str(realpage)+"\n") if poststring!="": fp.write(indent+"POST: "+poststring+"\n") fp.close() # at 404 response, close connection and fail if response=="404" or response=="500": sock.close() return # at 30[1237] response types, recurse new page if sre.search("30[1237]",response): while 1: chunk="" byte=sock.recv(1) while byte!="\r": chunk+=byte byte=sock.recv(1) sock.recv(1) if chunk.lower()[:9]=="location:": location=chunk.lower()[9:].strip() if location=="http://"+servername(url,False)+url: location="/" locpage=fullpath(location,url) sock.close() # if url[len(url)-2:]=="" and locpage[len(locpage)-4:]=="": break redir=1 if locpage!=url: redir=0 if pagename(sre.sub("\\\\(\"|\')","\\1",locpage))==pagename(url): print "QUOTE REDIR" return print "OLD:",url print "NEW:",chunk.lower() print "REDIR:",locpage return getpage(locpage,redir=redir,realpage=realpage) if realpage==1: sock.close() return elif realpage==1: sock.close() return url # get headers, ignoring certain HTTP headers headers="" type=0 while 1: chunk="" byte=sock.recv(1) if byte=="\r": sock.recv(1) break while byte!="\r": chunk+=byte byte=sock.recv(1) sock.recv(1) if chunk.lower()[:11]!="set-cookie:" and chunk.lower()[:5]!="date:" and chunk.lower()[:15]!="content-length:" and chunk.lower()[:11]!="keep-alive:" and chunk.lower()[:18]!="transfer-encoding:" and chunk.lower()[:11]!="connection:": headers+=chunk # if chunk.lower()[:15]=="content-length:": # type=1 # conlen=int(chunk[16:]) if chunk.lower()[:26]=="transfer-encoding: chunked": type=2 # no special type specified, just get the page if type==0: body="" while 1: chunk=sock.recv(200) body+=chunk if chunk=="": break # set it up if it does have a type # else: # byte=sock.recv(1) # if byte=="\r": sock.recv(1) # else: # while 1: # i=-1 # while byte!="\r": # i+=1 # byte=sock.recv(1) # nbytes=sock.recv(3) # if nbytes=="\n\r\n": break # # content-length # if type==1: # body="" # for i in range(conlen): # chunk=sock.recv(1) # body+=chunk # transfer-encoding: chunked if type==2: body="" chunksize="" while chunksize!=0: byte="" chunk="" while byte!="\r": chunk+=byte byte=sock.recv(1) sock.recv(1) chunksize=int(chunk,16) wchunksz=chunksize while wchunksz>=1: subchunk=sock.recv(wchunksz) body+=subchunk wchunksz-=len(subchunk) sock.recv(2) # clean up and return sock.close() if dheaders!=1: headers="" return [headers,body,urlfix(url)] # catch socket errors, such as "connection reset by peer" - trys again until it gives up and goes on to the next page except socket.error: print "Socket Error, Recursing..." return recurse(exceptions)
def main(argv): # Main program. Takes string containing arguments a la unix command line p = Params() p.define_defaults() p.read_params(argv) p.read_config() p.parse_param_opts() p.sanitize() # connect my = MySQLdb.connect(**p['con_opts']) # create cursor to contain data about tail_nums to parse mycur = my.cursor() # create procedure name or get it from user if p['procname'] == "": rn = random.randint(0, 1000000) procname = 'P%s' % rn else: procname = p['procname'] # string to store all cmds issued to DB all_cmds = "" # if dropping procedure if p['drop_proc']: cmd = 'DROP PROCEDURE IF EXISTS %s;\n' % procname all_cmds += cmd if not p['debug']: mycur.execute(cmd) # start working on procedure string ps = procedure_string # if add drop table if p['drop']: ps = ps.replace('<<DROP>>', 'DROP TABLE IF EXISTS <<TABLE>>;') else: ps = ps.replace('<<DROP>>', '') # sub table and first select ps = ps.replace('<<TABLE>>', p['table']) ps = ps.replace('<<SELECTASC>>', p['query']) # turn ASC to DESC mtch = sre.search( 'order\s+by\s+([a-zA-Z][\w_]*[.]|)(?P<tn>tail_num)\s*,\s*([a-zA-Z][\w_]*[.]|)(?P<mt>measurement_time)', p['query'], sre.I) tn_pos = mtch.span('tn') mt_pos = mtch.span('mt') qry = p['query'][:tn_pos[0]] + 'tail_num DESC' + p['query'][ tn_pos[1]:mt_pos[0]] + 'measurement_time DESC' # sub second select ps = ps.replace('<<SELECTDESC>>', qry) # finally substitute procname ps = ps.replace('<<PROCNAME>>', procname) # if not debug execute, otherwise just print if not p['debug']: mycur.execute(ps) # add to all cmds string all_cmds += ps # if do no all call if not p['no_call']: cmd = 'call %s;\n' % procname all_cmds += cmd if not p['debug']: mycur.execute(cmd) # if leave_proc if not (p['leave_proc'] or p['no_call']): cmd = 'DROP PROCEDURE %s;\n' % procname all_cmds += cmd if not p['debug']: mycur.execute(cmd) if p['debug']: print all_cmds
def fromString(source): import sre result = {} insertionorder = [] fail = False originalsource = source # preserve original in case of broken header headervalueRE_sX = "^([^: ]+[^:]*):( ?)((.|\n)+)" # TODO: This could be optimised lines = source.split("\r\n") I = 0 headerLines = [] valid = False for I in xrange(len(lines)): if lines[I] != "": headerLines.append(lines[I]) else: # The divider cannot be the last line valid = not (I == len(lines) - 1) break if not valid: body = originalsource fail = True else: bodyLines = lines[I + 1:] body = "\r\n".join(bodyLines) key = None for line in headerLines: match = sre.search(headervalueRE_sX, line) if match: (key, spaces, value, X) = match.groups() if value == " " and not spaces: value = "" try: result[key].append(value) except KeyError: result[key] = value except AttributeError: result[key] = [result[key], value] insertionorder.append(key) else: if key: # value = line.strip() # Strictly speaking, surely we should be doing this??? (Breaks tests though if we do...) value = line if isinstance(result[key], list): # Append to last item in the list result[key][len(result[key]) - 1] += "\r\n" + value else: result[key] += "\r\n" + value else: # print "NOMATCH!NOMATCH!NOMATCH!NOMATCH!NOMATCH!NOMATCH!" fail = True break if not fail: result["__BODY__"] = body else: result["__BODY__"] = originalsource md = MimeDict(**result) md.insertionorder = insertionorder md.invalidSource = fail return md
def treepages(url,level): global treeglob,urlfields,postfields,treedurls,levels,server,vulnlogfile,scanlimit,ignorefileext print ">>>>>>>>",level,"<<<<<<<<" print " ---> "+url pageinfo=getpage(url) if listempty(pageinfo): return body=pageinfo[1].lower() print "AA" # select/option, textarea # check for forms bodyarr=sre.split("<form",body) for i in range(len(bodyarr)): frmsect=bodyarr[i][:bodyarr[i].find(">")] frmbody=bodyarr[i][bodyarr[i].find(">"):][:bodyarr[i].find("</form>")] actionurl=getattrval(frmsect,"action") if actionurl=="" or actionurl==frmsect or actionurl=="\"\"": actionurl=pageinfo[2] if actionurl.count(";")>0: actionurl=actionurl[actionurl.find(";")+1:] if actionurl[:11].lower()=="javascript:": continue actionurl=fullpath(actionurl,pageinfo[2]) print "ACTION:",actionurl # get the input variables poststring="" inputarr=sre.sub("(.*?)\<input([^\>]*)\>(.*?)","\\2|ZZaaXXaaZZ|",frmbody).split("|ZZaaXXaaZZ|") for j in range(len(inputarr)): name=getattrval(inputarr[j],"name") if name==inputarr[j] or name=="" or name=="\"\"": continue value=getattrval(inputarr[j],"value") if value==inputarr[j] or value=="" or value=="\"\"": value="" if poststring!="": poststring+="&" poststring+=name+"="+value # get select/option tags selectarr=sre.sub("(.*?)\<select([^\>]*)\>(.*?)","\\2|ZZaaXXaaZZ|",frmbody).split("|ZZaaXXaaZZ|") for j in range(len(selectarr)): name=getattrval(selectarr[j],"name") if name==selectarr[j] or name=="" or name=="\"\"": continue value=sre.sub("(.*?)\<option([^\>]*)value=(\"|'|)([^\\3\ ]*)\\3([^\>]*)\>(.*?)","\\2",selectarr[j]) if value==selectarr[j] or value=="" or value=="\"\"": value="" if poststring!="": poststring+="&" poststring+=name+"="+value print "sel/opt: "+name+"="+value if poststring=="": continue if sre.search("method=([\'\"]|)post([\'\"]|)",frmsect[:frmsect.find(">")].lower())==None: if actionurl.find("?")!=-1: actionurl+="&" else: actionurl+="?" actionurl+=poststring body+='<a href="'+actionurl+'">' print 'GETT <a href="'+actionurl+'">' continue # determine if it needs to be scanned, and if so, scan it postscan=0 postvars=poststring.split("&") if postfields.has_key(actionurl): for j in range(len(postvars)): postvars[j]=postvars[j][:postvars[j].find("=")] if postfields[actionurl].count(postvars[j])==0: postfields[actionurl].append(postvars[j]) postscan=1 else: for j in range(len(postvars)): postvars[j]=postvars[j][:postvars[j].find("=")] postfields[actionurl]=postvars postscan=1 if postscan==1: vulns=checkvars(actionurl,poststring) if not listempty(vulns): dispvulns(vulns,actionurl) print "BB" # check for urls in "href" tags # ? # part of 3? (src|href|location|window.open)= and http:// urlreg="(\'|\")(?!javascript:)(([^\>]+?)(?!\.("+ignorefileext.replace(",","|")+"))(.{3,8}?)(|\?([^\>]+?)))" urlarr=sre.sub("(?s)(?i)(.+?)((src|href)=|location([\ ]*)=([\ ]*)|window\.open\()"+urlreg+"\\6","\\7|ZZaaXXaaZZ|",body).split("|ZZaaXXaaZZ|") del urlarr[len(urlarr)-1] urlarr.append(sre.sub("(?s)(?i)(.+?)(src|href)="+urlreg+"\\3","\\4|ZZaaXXaaZZ|",body).split("|ZZaaXXaaZZ|")) del urlarr[len(urlarr)-1] for i in range(len(urlarr)): theurl=fullpath(urlarr[i],pageinfo[2]) if not checkserver(servername(theurl)): continue # determine if it needs scanned and/or treed, and if so, scan and/or tree it getscan=0 if theurl.count("?")!=0: nqurl=theurl[:theurl.find("?")] query=theurl[theurl.find("?")+1:] query=sre.sub("\&\;","\&",query) qryvars=query.split("&") if urlfields.has_key(nqurl): for j in range(len(qryvars)): qryvars[j]=qryvars[j][:qryvars[j].find("=")] if urlfields[nqurl].count(qryvars[j])==0: urlfields[nqurl].append(qryvars[j]) getscan=1 else: for j in range(len(qryvars)): qryvars[j]=qryvars[j][:qryvars[j].find("=")] urlfields[nqurl]=qryvars getscan=1 else: if urlfields.has_key(theurl)==False: urlfields[theurl]=[] nqurl=theurl if getscan==1: vulns=checkvars(theurl) if not listempty(vulns): dispvulns(vulns,theurl) tree=treeglob if treedurls.has_key(nqurl): if treedurls[nqurl].count(theurl)==0 and len(treedurls[nqurl])<=scanlimit: treedurls[nqurl].append(theurl) else: tree=0 else: treedurls[nqurl]=[theurl] if tree==1 and level<levels: realurl=getpage(theurl,realpage=1) if theurl!=realurl and realurl!=None: body+=' href="'+realurl+'" ' print "treeee" try: treepages(theurl,level+1) except KeyboardInterrupt: treeglob=0 print "TREEGLOB CHANGED TO ZERO" treepages(theurl,level+1)
os.chdir("..") os.chdir("..") sys.stderr.write("Must check %d directories\n" % (total_dirs)) for chip in chip_list: #pyraf.iraf.cd(chip) if chip not in cand_list: sys.stderr.write("No candidate on %s\n" % (chip)) conintue for field in field_list: if field not in cand_list[chip]: sys.stderr.write("%s/%s failed to complete.\n" % (chip, field)) continue if cand_list[chip][field] == "no_candidates": continue if sre.search('checked', cand_list[chip][field]): continue else: #print cand_list[chip][field] sys.stderr.write("Checking candidates in %s %s\n" % (field, chip)) pyraf.iraf.cd(chip + "/" + field) result = discands(read_cands(cand_list[chip][field])) if result > -1: sys.stderr.write("%d objects marked as real\n" % (result)) os.rename(cand_list[chip][field], cand_list[chip][field] + ".checked") pyraf.iraf.cd("../..") if result == -2: sys.stderr.write("Removing lock file and exiting.\n") os.unlink('MOPconf.lock') sys.exit()
def getpage(url, dheaders=1, redir=0, realpage=0, poststring="", exceptions=0): # function to recurse and try getpage() again with new values def recurse(exceptions): sock.close() exceptions += 1 if exceptions <= 6: return getpage(url, dheaders, redir, realpage, poststring, exceptions) else: print "Too many recursions, skipping..." return global usecookies, urllogfile, debug, ignorefileext if not checkserver(servername(url)): return if url.find("#") != -1: url = url[:url.find("#")] # file extensions that need to be ignored code fileext = sre.sub(".*(http\://[^/]*/).*", "\\1", url) if url == fileext: fileext = "None" else: fileext = sre.sub("^.*\/[^/]*\.([^\&\#\?\/]*)[^/]*$", "\\1", url) if ignorefileext.count("," + fileext + ",") != 0: return try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((servername(url, False), 80)) workurl = pagename(url) theurl = url if redir != 1: theurl = workurl qrytype = "GET" if poststring != "": qrytype = "POST" out = (qrytype + " " + theurl + " HTTP/1.1\n" "Host: " + servername(url, False) + "\n" "Connection: close\n") if usecookies: global cookies out += "Cookie: " + cookies + "\n" if poststring != "": out += "Content-Type: application/x-www-form-urlencoded\n" out += "Content-Length: " + str(len(poststring)) out += "\n\n" + poststring + "\n" out += "\r\n\r\n" sock.send(out) # get response type and log the page response = sock.recv(12)[-3:] fp = open("logs/" + urllogfile, "a") fp.write(url + ": " + response + " " + str(realpage) + "\n") if poststring != "": fp.write(indent + "POST: " + poststring + "\n") fp.close() # at 404 response, close connection and fail if response == "404" or response == "500": sock.close() return # at 30[1237] response types, recurse new page if sre.search("30[1237]", response): while 1: chunk = "" byte = sock.recv(1) while byte != "\r": chunk += byte byte = sock.recv(1) sock.recv(1) if chunk.lower()[:9] == "location:": location = chunk.lower()[9:].strip() if location == "http://" + servername(url, False) + url: location = "/" locpage = fullpath(location, url) sock.close() # if url[len(url)-2:]=="" and locpage[len(locpage)-4:]=="": break redir = 1 if locpage != url: redir = 0 if pagename(sre.sub("\\\\(\"|\')", "\\1", locpage)) == pagename(url): print "QUOTE REDIR" return print "OLD:", url print "NEW:", chunk.lower() print "REDIR:", locpage return getpage(locpage, redir=redir, realpage=realpage) if realpage == 1: sock.close() return elif realpage == 1: sock.close() return url # get headers, ignoring certain HTTP headers headers = "" type = 0 while 1: chunk = "" byte = sock.recv(1) if byte == "\r": sock.recv(1) break while byte != "\r": chunk += byte byte = sock.recv(1) sock.recv(1) if chunk.lower()[:11] != "set-cookie:" and chunk.lower( )[:5] != "date:" and chunk.lower( )[:15] != "content-length:" and chunk.lower( )[:11] != "keep-alive:" and chunk.lower( )[:18] != "transfer-encoding:" and chunk.lower( )[:11] != "connection:": headers += chunk # if chunk.lower()[:15]=="content-length:": # type=1 # conlen=int(chunk[16:]) if chunk.lower()[:26] == "transfer-encoding: chunked": type = 2 # no special type specified, just get the page if type == 0: body = "" while 1: chunk = sock.recv(200) body += chunk if chunk == "": break # set it up if it does have a type # else: # byte=sock.recv(1) # if byte=="\r": sock.recv(1) # else: # while 1: # i=-1 # while byte!="\r": # i+=1 # byte=sock.recv(1) # nbytes=sock.recv(3) # if nbytes=="\n\r\n": break # # content-length # if type==1: # body="" # for i in range(conlen): # chunk=sock.recv(1) # body+=chunk # transfer-encoding: chunked if type == 2: body = "" chunksize = "" while chunksize != 0: byte = "" chunk = "" while byte != "\r": chunk += byte byte = sock.recv(1) sock.recv(1) chunksize = int(chunk, 16) wchunksz = chunksize while wchunksz >= 1: subchunk = sock.recv(wchunksz) body += subchunk wchunksz -= len(subchunk) sock.recv(2) # clean up and return sock.close() if dheaders != 1: headers = "" return [headers, body, urlfix(url)] # catch socket errors, such as "connection reset by peer" - trys again until it gives up and goes on to the next page except socket.error: print "Socket Error, Recursing..." return recurse(exceptions)
# FIXME: this is basically test_re.py, with a few minor changes import sys sys.path=['.']+sys.path from test_support import verbose, TestFailed import sre import sys, os, string, traceback # Misc tests from Tim Peters' re.doc if verbose: print 'Running tests on sre.search and sre.match' try: assert sre.search('x*', 'axx').span(0) == (0, 0) assert sre.search('x*', 'axx').span() == (0, 0) assert sre.search('x+', 'axx').span(0) == (1, 3) assert sre.search('x+', 'axx').span() == (1, 3) assert sre.search('x', 'aaa') == None except: raise TestFailed, "sre.search" try: assert sre.match('a*', 'xxx').span(0) == (0, 0) assert sre.match('a*', 'xxx').span() == (0, 0) assert sre.match('x*', 'xxxa').span(0) == (0, 3) assert sre.match('x*', 'xxxa').span() == (0, 3) assert sre.match('a+', 'xxx') == None except: raise TestFailed, "sre.search"
def __init__(self, server, port, username, password, num_threads, nzb_source, nzb_directory, par2exe_directory, common_prefix = "", progress_update = None): self.server = server self.port = port self.username = username self.password = password self.num_threads = num_threads self.par2exe_directory = par2exe_directory self.progress_update = progress_update self.common_prefix = common_prefix self.rar_filepath = None self.sorted_filenames = list() self.downloaded_files = dict() self.download_queue = Queue.Queue(0) self.decode_queue = Queue.Queue(0) self.cancelled = False self.finished_files = 0 self.threads = list() # note on calls to _update_progress: a call is made before the task begins and after the task completes; # this allows the consumer to cancel during the task nzb_string = "" nzb_files = None nzb_filepath = "" try: title = self.common_prefix if title == "": if nzb_source[:7] == "file://" and os.path.exists(urllib.url2pathname(nzb_source)): nzb_filepath = urllib.url2pathname(nzb_source) title = "NZB" else: parts = title.split('.') if len(parts) > 1: ext = parts[-1].lower() if ext == "par2" or ext == "nzb" or ext == "nfo": title = '.'.join(parts[:-1]) if nzb_filepath == "": nzb_filepath = os.path.join(nzb_directory, title) + ".nzb" print "NZB filepath: " + nzb_filepath if nzb_source.startswith("<?xml"): nzb_string = nzb_source elif os.path.exists(nzb_filepath) and os.path.isfile(nzb_filepath): nzb_file = open(nzb_filepath, "r") nzb_string = string.join(nzb_file.readlines(), "") nzb_file.close() #nzb_filepath = possible_nzb_filepath else: nzb_url = nzb_source if self._update_progress("Downloading " + title, STATUS_INITIALIZING, os.path.basename(nzb_url)): return urllib.urlopen(nzb_url) nzb_string = string.join(urllib.urlopen(nzb_url).readlines(), "") if self._update_progress("Downloading " + title, STATUS_INITIALIZING, os.path.basename(nzb_url)): return if self._update_progress("Parsing " + title, STATUS_INITIALIZING, title): return nzb_files = nzb_parser.parse(nzb_string) sort_nzb_rar_files(nzb_files) for nzb_file in nzb_files: filename = sre.search("\"(.*?)\"", nzb_file.subject).group(1) filename = filename.encode('utf8').lower() self.sorted_filenames.append(filename) # a common prefix from the file list is preferred better_common_prefix = os.path.commonprefix(self.sorted_filenames).rstrip(". ") if better_common_prefix != "": self.common_prefix = better_common_prefix if self.common_prefix == "": self.common_prefix = self.sorted_filenames[0] parts = self.common_prefix.split('.') print parts if len(parts) > 1: ext = parts[-1].lower() if ext == "par2" or ext == "nzb" or ext == "nfo": self.common_prefix = '.'.join(parts[:-1]) print "Common prefix: " + self.common_prefix self.download_directory = os.path.join(nzb_directory, self.common_prefix) self.status_filepath = os.path.join(self.download_directory, self.common_prefix + ".status") if self._update_progress("Parsing " + title, STATUS_INITIALIZING, title): return # make sure the download directory exists try: os.makedirs(self.download_directory) except: pass nzb_filepath = os.path.join(nzb_directory, self.common_prefix + ".nzb" ) nzb_filepath = nzb_filepath.encode('utf8') #print nzb_filepath #if os.path.exists(nzb_filepath) and os.path.isdir(nzb_filepath): # shutil.rmtree(nzb_filepath) # remove the directory containing the nzb; it is rewritten below if not os.path.exists(nzb_filepath) or os.path.getsize(nzb_filepath) != len(nzb_string): nzb = open(nzb_filepath, "w+b") nzb.write(nzb_string) nzb.close() # run par2 if we already have the .par2 file par2_file = os.path.join(self.download_directory, self.common_prefix + ".par2") print "PAR2 file: " + par2_file par2_targets = None if os.path.exists(par2_file): if self._update_progress("Verifying with PAR2", STATUS_INITIALIZING, os.path.basename(par2_file)): return par2_targets = self._verify_with_par2(par2_file) if self._update_progress("Verifying with PAR2", STATUS_INITIALIZING, os.path.basename(par2_file)): return #for target in par2_targets: # print "\t" + target + ": " + par2_targets[target] nested_nzbs = list() for nzb_file in nzb_files: nzb_file.filename = sre.search("\"(.*?)\"", nzb_file.subject.lower()).group(1) nzb_file.filename = nzb_file.filename.encode('utf8') nzb_file.filepath = os.path.join(self.download_directory, nzb_file.filename) nzb_file.filepath = nzb_file.filepath.encode('utf8') #print filepath # create an empty file if it doesn't exist if not os.path.exists(nzb_file.filepath): open(nzb_file.filepath, "w+b").close() if not self.rar_filepath: rar_match = sre.search("(.+?)\.part(\d+).rar", nzb_file.filename) if rar_match and int(rar_match.group(2)) == 1: self.rar_filepath = nzb_file.filepath self.rar_filename = os.path.basename(nzb_file.filepath) else: rar_match = sre.search("(.+?)\.rar", nzb_file.filename) if rar_match: self.rar_filepath = nzb_file.filepath self.rar_filename = os.path.basename(nzb_file.filepath) if self.rar_filepath: print "First RAR file is " + self.rar_filename if os.path.splitext(nzb_file.filepath)[1] == ".nzb": nested_nzbs.append(nzb_file.filepath) self.downloaded_files[nzb_file.filename] = multipart_file(nzb_file.filename, nzb_file.filepath, nzb_file) nzb_file.finished = False # skip non-PAR2 files if par2 validated it if par2_targets and par2_targets.has_key(nzb_file.filename) and par2_targets[nzb_file.filename] == "found": print "PAR2 verified " + nzb_file.filename + ": skipping" self.finished_files += 1 self.downloaded_files[nzb_file.filename].finished = True nzb_file.finished = True continue # sort segments in ascending order by article number nzb_file.segments.sort(key=lambda obj: obj.number) # if no RAR file and no nested NZBs, abort if not self.rar_filepath: if len(nested_nzbs) == 0: raise Exception("nothing to do: NZB did not have a RAR file or any nested NZBs") self.rar_filepath = self.rar_filename = "" # check if first RAR is already finished if par2_targets and par2_targets.has_key(self.rar_filename) and par2_targets[self.rar_filename] == "found": if self._update_progress("First RAR is ready.", STATUS_READY, self.rar_filepath): return if self._update_progress("Starting " + `self.num_threads` + " download threads", STATUS_INITIALIZING): return # queue first segment of each file to get each file's total size #for nzb_file in nzb_files: # skip non-PAR2 files if par2 validated it #if nzb_file.finished: continue #self.download_queue.put([nzb_file.filename, nzb_file, nzb_file.segments[0]], timeout=1) # queue the rest of the segments in order for nzb_file in nzb_files: # skip non-PAR2 files if par2 validated it if nzb_file.finished: continue if self._update_progress("Queueing file", STATUS_INITIALIZING, nzb_file.filename): return for nzb_segment in nzb_file.segments[0:]: self.download_queue.put([nzb_file.filename, nzb_file, nzb_segment], timeout=1) # start download threads for i in range(self.num_threads): thread = threading.Thread(name=`i`, target=self._download_thread) thread.start() self.threads.append(thread) if self._update_progress("Starting " + `self.num_threads` + " download threads", STATUS_INITIALIZING): return # decode parts as they are downloaded # begins streaming when the first RAR is finished self._decode_loop() # if no RAR file was found, try the nested NZBs that were downloaded if self.rar_filepath == "": if self._update_progress("No RAR files found.", STATUS_INITIALIZING): return for nested_nzb_filepath in nested_nzbs: if self._update_progress("Trying nested NZB: " + os.path.basename(nested_nzb_filepath), STATUS_INITIALIZING): return nzb_stream(self.server, self.port, self.username, self.password, self.num_threads, urllib.pathname2url(nested_nzb_filepath), nzb_directory, par2exe_directory, "", self.progress_update) except: traceback.print_exc() self._update_progress("Error parsing NZB", STATUS_FAILED, self.common_prefix) # cancel all threads before returning self.cancelled = True for thread in self.threads: if thread.isAlive(): print "Cancelled thread " + thread.getName() thread.join()
def main(argv): # Main program. Takes string containing arguments a la unix command line p = Params() p.define_defaults() p.read_params(argv) p.read_config() p.parse_param_opts() p.sanitize() # connect my = MySQLdb.connect(**p['con_opts']) # create cursor to contain data about tail_nums to parse mycur = my.cursor() # create procedure name or get it from user if p['procname']=="": rn = random.randint(0,1000000) procname = 'P%s' % rn else: procname = p['procname'] # string to store all cmds issued to DB all_cmds = "" # if dropping procedure if p['drop_proc']: cmd = 'DROP PROCEDURE IF EXISTS %s;\n' % procname all_cmds += cmd if not p['debug']: mycur.execute(cmd) # start working on procedure string ps = procedure_string # if add drop table if p['drop']: ps = ps.replace('<<DROP>>','DROP TABLE IF EXISTS <<TABLE>>;') else: ps = ps.replace('<<DROP>>','') # sub table and first select ps = ps.replace('<<TABLE>>',p['table']) ps = ps.replace('<<SELECTASC>>',p['query']) # turn ASC to DESC mtch = sre.search('order\s+by\s+([a-zA-Z][\w_]*[.]|)(?P<tn>tail_num)\s*,\s*([a-zA-Z][\w_]*[.]|)(?P<mt>measurement_time)',p['query'],sre.I) tn_pos = mtch.span('tn') mt_pos = mtch.span('mt') qry = p['query'][:tn_pos[0]] + 'tail_num DESC' + p['query'][tn_pos[1]:mt_pos[0]] + 'measurement_time DESC' # sub second select ps = ps.replace('<<SELECTDESC>>',qry) # finally substitute procname ps = ps.replace('<<PROCNAME>>',procname) # if not debug execute, otherwise just print if not p['debug']: mycur.execute(ps) # add to all cmds string all_cmds += ps # if do no all call if not p['no_call']: cmd = 'call %s;\n' % procname all_cmds += cmd if not p['debug']: mycur.execute(cmd) # if leave_proc if not (p['leave_proc'] or p['no_call']): cmd = 'DROP PROCEDURE %s;\n' % procname all_cmds += cmd if not p['debug']: mycur.execute(cmd) if p['debug']: print all_cmds
import sys, sre if len(sys.argv) != 2: print 'Usage: depfilter.py NODE' sys.exit(1) top = sys.argv[1] # Read in dot file lines = sys.stdin.readlines() graph = {} for arc in lines[1:-1]: match = sre.search('"(.*)" -> "(.*)"', arc) n1, n2 = match.group(1), match.group(2) if not graph.has_key(n1): graph[n1] = [] graph[n1].append(n2) # Create subset of 'graph' rooted at 'top' subgraph = {} def add_deps(node): if graph.has_key(node) and not subgraph.has_key(node): subgraph[node] = graph[node] for n in graph[node]: add_deps(n)
def treepages(url, level): global treeglob, urlfields, postfields, treedurls, levels, server, vulnlogfile, scanlimit, ignorefileext print ">>>>>>>>", level, "<<<<<<<<" print " ---> " + url pageinfo = getpage(url) if listempty(pageinfo): return body = pageinfo[1].lower() print "AA" # select/option, textarea # check for forms bodyarr = sre.split("<form", body) for i in range(len(bodyarr)): frmsect = bodyarr[i][:bodyarr[i].find(">")] frmbody = bodyarr[i][bodyarr[i].find(">"):][:bodyarr[i].find("</form>" )] actionurl = getattrval(frmsect, "action") if actionurl == "" or actionurl == frmsect or actionurl == "\"\"": actionurl = pageinfo[2] if actionurl.count(";") > 0: actionurl = actionurl[actionurl.find(";") + 1:] if actionurl[:11].lower() == "javascript:": continue actionurl = fullpath(actionurl, pageinfo[2]) print "ACTION:", actionurl # get the input variables poststring = "" inputarr = sre.sub("(.*?)\<input([^\>]*)\>(.*?)", "\\2|ZZaaXXaaZZ|", frmbody).split("|ZZaaXXaaZZ|") for j in range(len(inputarr)): name = getattrval(inputarr[j], "name") if name == inputarr[j] or name == "" or name == "\"\"": continue value = getattrval(inputarr[j], "value") if value == inputarr[j] or value == "" or value == "\"\"": value = "" if poststring != "": poststring += "&" poststring += name + "=" + value # get select/option tags selectarr = sre.sub("(.*?)\<select([^\>]*)\>(.*?)", "\\2|ZZaaXXaaZZ|", frmbody).split("|ZZaaXXaaZZ|") for j in range(len(selectarr)): name = getattrval(selectarr[j], "name") if name == selectarr[j] or name == "" or name == "\"\"": continue value = sre.sub( "(.*?)\<option([^\>]*)value=(\"|'|)([^\\3\ ]*)\\3([^\>]*)\>(.*?)", "\\2", selectarr[j]) if value == selectarr[j] or value == "" or value == "\"\"": value = "" if poststring != "": poststring += "&" poststring += name + "=" + value print "sel/opt: " + name + "=" + value if poststring == "": continue if sre.search("method=([\'\"]|)post([\'\"]|)", frmsect[:frmsect.find(">")].lower()) == None: if actionurl.find("?") != -1: actionurl += "&" else: actionurl += "?" actionurl += poststring body += '<a href="' + actionurl + '">' print 'GETT <a href="' + actionurl + '">' continue # determine if it needs to be scanned, and if so, scan it postscan = 0 postvars = poststring.split("&") if postfields.has_key(actionurl): for j in range(len(postvars)): postvars[j] = postvars[j][:postvars[j].find("=")] if postfields[actionurl].count(postvars[j]) == 0: postfields[actionurl].append(postvars[j]) postscan = 1 else: for j in range(len(postvars)): postvars[j] = postvars[j][:postvars[j].find("=")] postfields[actionurl] = postvars postscan = 1 if postscan == 1: vulns = checkvars(actionurl, poststring) if not listempty(vulns): dispvulns(vulns, actionurl) print "BB" # check for urls in "href" tags # ? # part of 3? (src|href|location|window.open)= and http:// urlreg = "(\'|\")(?!javascript:)(([^\>]+?)(?!\.(" + ignorefileext.replace( ",", "|") + "))(.{3,8}?)(|\?([^\>]+?)))" urlarr = sre.sub( "(?s)(?i)(.+?)((src|href)=|location([\ ]*)=([\ ]*)|window\.open\()" + urlreg + "\\6", "\\7|ZZaaXXaaZZ|", body).split("|ZZaaXXaaZZ|") del urlarr[len(urlarr) - 1] urlarr.append( sre.sub("(?s)(?i)(.+?)(src|href)=" + urlreg + "\\3", "\\4|ZZaaXXaaZZ|", body).split("|ZZaaXXaaZZ|")) del urlarr[len(urlarr) - 1] for i in range(len(urlarr)): theurl = fullpath(urlarr[i], pageinfo[2]) if not checkserver(servername(theurl)): continue # determine if it needs scanned and/or treed, and if so, scan and/or tree it getscan = 0 if theurl.count("?") != 0: nqurl = theurl[:theurl.find("?")] query = theurl[theurl.find("?") + 1:] query = sre.sub("\&\;", "\&", query) qryvars = query.split("&") if urlfields.has_key(nqurl): for j in range(len(qryvars)): qryvars[j] = qryvars[j][:qryvars[j].find("=")] if urlfields[nqurl].count(qryvars[j]) == 0: urlfields[nqurl].append(qryvars[j]) getscan = 1 else: for j in range(len(qryvars)): qryvars[j] = qryvars[j][:qryvars[j].find("=")] urlfields[nqurl] = qryvars getscan = 1 else: if urlfields.has_key(theurl) == False: urlfields[theurl] = [] nqurl = theurl if getscan == 1: vulns = checkvars(theurl) if not listempty(vulns): dispvulns(vulns, theurl) tree = treeglob if treedurls.has_key(nqurl): if treedurls[nqurl].count(theurl) == 0 and len( treedurls[nqurl]) <= scanlimit: treedurls[nqurl].append(theurl) else: tree = 0 else: treedurls[nqurl] = [theurl] if tree == 1 and level < levels: realurl = getpage(theurl, realpage=1) if theurl != realurl and realurl != None: body += ' href="' + realurl + '" ' print "treeee" try: treepages(theurl, level + 1) except KeyboardInterrupt: treeglob = 0 print "TREEGLOB CHANGED TO ZERO" treepages(theurl, level + 1)
def fromString(source): import sre result = {} insertionorder = [] fail = False originalsource = source # preserve original in case of broken header headervalueRE_sX = "^([^: ]+[^:]*):( ?)((.|\n)+)" # TODO: This could be optimised lines = source.split("\r\n") I = 0 headerLines = [] valid = False for I in xrange(len(lines)): if lines[I] != "": headerLines.append(lines[I]) else: # The divider cannot be the last line valid = not (I == len(lines)-1) break if not valid: body = originalsource fail = True else: bodyLines = lines[I+1:] body = "\r\n".join(bodyLines) key = None for line in headerLines: match = sre.search(headervalueRE_sX, line) if match: (key, spaces,value,X) = match.groups() if value == " " and not spaces: value = "" try: result[key].append(value) except KeyError: result[key] = value except AttributeError: result[key] = [ result[key], value ] insertionorder.append(key) else: if key: # value = line.strip() # Strictly speaking, surely we should be doing this??? (Breaks tests though if we do...) value = line if isinstance(result[key], list): # Append to last item in the list result[key][len(result[key])-1] += "\r\n" + value else: result[key] += "\r\n" + value else: # print "NOMATCH!NOMATCH!NOMATCH!NOMATCH!NOMATCH!NOMATCH!" fail = True break if not fail: result["__BODY__"]=body else: result["__BODY__"]=originalsource md = MimeDict(**result) md.insertionorder = insertionorder md.invalidSource = fail return md
def fromString(source): import sre result = {} insertionorder = [] fail = False originalsource = source # preserve original in case of broken header # The leading space in the headervalue RE prevents a continuation line # being treated like a key: value line. headervalueRE_s = "^([^: ]+[^:]*):( ?)([^\r]+)\r\n" # TODO: This could be optimised continuationHeaderRE_s = "^( +[^\r\n]*)\r\n" match = sre.search(headervalueRE_s, source) # Search for header lines inHeader = True key = None while True: # We break out this loop when matching fails for any reason if match: (key, spaces, value) = match.groups() if value == " " and not spaces: # Empty header value = "" try: result[key].append(value) except KeyError: result[key] = value except AttributeError: result[key] = [result[key], value] insertionorder.append(key) if not match and key: # We have already matched a line. This may be continuation. match = sre.search(continuationHeaderRE_s, source) if not match: break (value, ) = match.groups() if isinstance(result[key], list): # Append to last item in the list result[key][len(result[key]) - 1] += "\r\n" + value else: result[key] += "\r\n" + value if not match: break source = source[match.end():] match = sre.search(headervalueRE_s, source) # End of header lines. Start of source should be "\r\n" # # If it isn't, the header is invalid, and the entire original # source becomes the __BODY__, and all keys aside from that removed. # if source[:2] == "\r\n": source = source[2:] else: source = originalsource result = {} insertionorder = [] fail = True result["__BODY__"] = source md = MimeDict(**result) md.insertionorder = insertionorder md.invalidSource = fail return md
interval_bytes[Apos] += sbytes interval_bytes[Bpos] += ebytes interval_pkts[Apos] += spkts interval_pkts[Bpos] += epkts for i in xrange(Apos+1,Bpos): active_sessions[i] += interval interval_bytes[i] += ibytes interval_pkts[i] += ipkts if debug > 1: print "->span fracstart<",frac_start,">fracend<",frac_end,">" infile = file(args[0]) mobj = sre.search(sre.compile("\.bz2$"), args[0]) if mobj: infile.close() infile = bz2.BZ2File(args[0]) if not infile: print "error opening ",args[0] sys.exit(0) buffer = infile.readline() if not buffer: print "error reading from ",args[0] usage(sys.argv[0]) mobj = sre.match(sre.compile('first-record ([\d\.]+)'), buffer)
def fromString(source): import sre result = {} insertionorder = [] fail = False originalsource = source # preserve original in case of broken header # The leading space in the headervalue RE prevents a continuation line # being treated like a key: value line. headervalueRE_s = "^([^: ]+[^:]*):( ?)([^\r]+)\r\n" # TODO: This could be optimised continuationHeaderRE_s = "^( +[^\r\n]*)\r\n" match = sre.search(headervalueRE_s,source) # Search for header lines inHeader = True key = None while True: # We break out this loop when matching fails for any reason if match: (key, spaces,value) = match.groups() if value == " " and not spaces: # Empty header value = "" try: result[key].append(value) except KeyError: result[key] = value except AttributeError: result[key] = [ result[key], value ] insertionorder.append(key) if not match and key: # We have already matched a line. This may be continuation. match = sre.search(continuationHeaderRE_s, source) if not match: break (value,) = match.groups() if isinstance(result[key], list): # Append to last item in the list result[key][len(result[key])-1] += "\r\n" + value else: result[key] += "\r\n" + value if not match: break source = source[match.end():] match = sre.search(headervalueRE_s,source) # End of header lines. Start of source should be "\r\n" # # If it isn't, the header is invalid, and the entire original # source becomes the __BODY__, and all keys aside from that removed. # if source[:2]=="\r\n": source = source[2:] else: source = originalsource result = {} insertionorder = [] fail = True result["__BODY__"]=source md = MimeDict(**result) md.insertionorder = insertionorder md.invalidSource = fail return md
os.chdir("..") os.chdir("..") sys.stderr.write("Must check %d directories\n" % ( total_dirs)) for chip in chip_list: #pyraf.iraf.cd(chip) if chip not in cand_list: sys.stderr.write("No candidate on %s\n" % ( chip)) conintue for field in field_list: if field not in cand_list[chip]: sys.stderr.write("%s/%s failed to complete.\n" % ( chip,field) ) continue if cand_list[chip][field]=="no_candidates": continue if sre.search('checked',cand_list[chip][field]): continue else: #print cand_list[chip][field] sys.stderr.write("Checking candidates in %s %s\n" % ( field , chip)) pyraf.iraf.cd(chip+"/"+field) result=discands(read_cands(cand_list[chip][field])) if result > -1: sys.stderr.write("%d objects marked as real\n" % ( result)) os.rename(cand_list[chip][field],cand_list[chip][field]+".checked") pyraf.iraf.cd("../..") if result==-2: sys.stderr.write("Removing lock file and exiting.\n") os.unlink('MOPconf.lock') sys.exit()
def __init__(self, server, port, username, password, num_threads, nzb_source, nzb_directory, par2exe_directory, common_prefix="", progress_update=None): self.server = server self.port = port self.username = username self.password = password self.num_threads = num_threads self.par2exe_directory = par2exe_directory self.progress_update = progress_update self.common_prefix = common_prefix self.rar_filepath = None self.sorted_filenames = list() self.downloaded_files = dict() self.download_queue = Queue.Queue(0) self.decode_queue = Queue.Queue(0) self.cancelled = False self.finished_files = 0 self.threads = list() # note on calls to _update_progress: a call is made before the task begins and after the task completes; # this allows the consumer to cancel during the task nzb_string = "" nzb_files = None nzb_filepath = "" try: title = self.common_prefix if title == "": if nzb_source[:7] == "file://" and os.path.exists( urllib.url2pathname(nzb_source)): nzb_filepath = urllib.url2pathname(nzb_source) title = "NZB" else: parts = title.split('.') if len(parts) > 1: ext = parts[-1].lower() if ext == "par2" or ext == "nzb" or ext == "nfo": title = '.'.join(parts[:-1]) if nzb_filepath == "": nzb_filepath = os.path.join(nzb_directory, title) + ".nzb" print "NZB filepath: " + nzb_filepath if nzb_source.startswith("<?xml"): nzb_string = nzb_source elif os.path.exists(nzb_filepath) and os.path.isfile(nzb_filepath): nzb_file = open(nzb_filepath, "r") nzb_string = string.join(nzb_file.readlines(), "") nzb_file.close() #nzb_filepath = possible_nzb_filepath else: nzb_url = nzb_source if self._update_progress("Downloading " + title, STATUS_INITIALIZING, os.path.basename(nzb_url)): return urllib.urlopen(nzb_url) nzb_string = string.join( urllib.urlopen(nzb_url).readlines(), "") if self._update_progress("Downloading " + title, STATUS_INITIALIZING, os.path.basename(nzb_url)): return if self._update_progress("Parsing " + title, STATUS_INITIALIZING, title): return nzb_files = nzb_parser.parse(nzb_string) sort_nzb_rar_files(nzb_files) for nzb_file in nzb_files: filename = sre.search("\"(.*?)\"", nzb_file.subject).group(1) filename = filename.encode('utf8').lower() self.sorted_filenames.append(filename) # a common prefix from the file list is preferred better_common_prefix = os.path.commonprefix( self.sorted_filenames).rstrip(". ") if better_common_prefix != "": self.common_prefix = better_common_prefix if self.common_prefix == "": self.common_prefix = self.sorted_filenames[0] parts = self.common_prefix.split('.') print parts if len(parts) > 1: ext = parts[-1].lower() if ext == "par2" or ext == "nzb" or ext == "nfo": self.common_prefix = '.'.join(parts[:-1]) print "Common prefix: " + self.common_prefix self.download_directory = os.path.join(nzb_directory, self.common_prefix) self.status_filepath = os.path.join(self.download_directory, self.common_prefix + ".status") if self._update_progress("Parsing " + title, STATUS_INITIALIZING, title): return # make sure the download directory exists try: os.makedirs(self.download_directory) except: pass nzb_filepath = os.path.join(nzb_directory, self.common_prefix + ".nzb") nzb_filepath = nzb_filepath.encode('utf8') #print nzb_filepath #if os.path.exists(nzb_filepath) and os.path.isdir(nzb_filepath): # shutil.rmtree(nzb_filepath) # remove the directory containing the nzb; it is rewritten below if not os.path.exists(nzb_filepath) or os.path.getsize( nzb_filepath) != len(nzb_string): nzb = open(nzb_filepath, "w+b") nzb.write(nzb_string) nzb.close() # run par2 if we already have the .par2 file par2_file = os.path.join(self.download_directory, self.common_prefix + ".par2") print "PAR2 file: " + par2_file par2_targets = None if os.path.exists(par2_file): if self._update_progress("Verifying with PAR2", STATUS_INITIALIZING, os.path.basename(par2_file)): return par2_targets = self._verify_with_par2(par2_file) if self._update_progress("Verifying with PAR2", STATUS_INITIALIZING, os.path.basename(par2_file)): return #for target in par2_targets: # print "\t" + target + ": " + par2_targets[target] nested_nzbs = list() for nzb_file in nzb_files: nzb_file.filename = sre.search( "\"(.*?)\"", nzb_file.subject.lower()).group(1) nzb_file.filename = nzb_file.filename.encode('utf8') nzb_file.filepath = os.path.join(self.download_directory, nzb_file.filename) nzb_file.filepath = nzb_file.filepath.encode('utf8') #print filepath # create an empty file if it doesn't exist if not os.path.exists(nzb_file.filepath): open(nzb_file.filepath, "w+b").close() if not self.rar_filepath: rar_match = sre.search("(.+?)\.part(\d+).rar", nzb_file.filename) if rar_match and int(rar_match.group(2)) == 1: self.rar_filepath = nzb_file.filepath self.rar_filename = os.path.basename(nzb_file.filepath) else: rar_match = sre.search("(.+?)\.rar", nzb_file.filename) if rar_match: self.rar_filepath = nzb_file.filepath self.rar_filename = os.path.basename( nzb_file.filepath) if self.rar_filepath: print "First RAR file is " + self.rar_filename if os.path.splitext(nzb_file.filepath)[1] == ".nzb": nested_nzbs.append(nzb_file.filepath) self.downloaded_files[nzb_file.filename] = multipart_file( nzb_file.filename, nzb_file.filepath, nzb_file) nzb_file.finished = False # skip non-PAR2 files if par2 validated it if par2_targets and par2_targets.has_key( nzb_file.filename) and par2_targets[ nzb_file.filename] == "found": print "PAR2 verified " + nzb_file.filename + ": skipping" self.finished_files += 1 self.downloaded_files[nzb_file.filename].finished = True nzb_file.finished = True continue # sort segments in ascending order by article number nzb_file.segments.sort(key=lambda obj: obj.number) # if no RAR file and no nested NZBs, abort if not self.rar_filepath: if len(nested_nzbs) == 0: raise Exception( "nothing to do: NZB did not have a RAR file or any nested NZBs" ) self.rar_filepath = self.rar_filename = "" # check if first RAR is already finished if par2_targets and par2_targets.has_key( self.rar_filename) and par2_targets[ self.rar_filename] == "found": if self._update_progress("First RAR is ready.", STATUS_READY, self.rar_filepath): return if self._update_progress( "Starting " + ` self.num_threads ` + " download threads", STATUS_INITIALIZING): return # queue first segment of each file to get each file's total size #for nzb_file in nzb_files: # skip non-PAR2 files if par2 validated it #if nzb_file.finished: continue #self.download_queue.put([nzb_file.filename, nzb_file, nzb_file.segments[0]], timeout=1) # queue the rest of the segments in order for nzb_file in nzb_files: # skip non-PAR2 files if par2 validated it if nzb_file.finished: continue if self._update_progress("Queueing file", STATUS_INITIALIZING, nzb_file.filename): return for nzb_segment in nzb_file.segments[0:]: self.download_queue.put( [nzb_file.filename, nzb_file, nzb_segment], timeout=1) # start download threads for i in range(self.num_threads): thread = threading.Thread(name=` i `, target=self._download_thread) thread.start() self.threads.append(thread) if self._update_progress( "Starting " + ` self.num_threads ` + " download threads", STATUS_INITIALIZING): return
def yenc_decode(encoded_lines): # check for start tag first_line = 0 for line in encoded_lines: if line[:7] == "=ybegin": break; first_line += 1 if first_line == len(encoded_lines): raise Exception("ybegin line not found") file_size = None # =ybegin part=2 total=66 line=128 size=50000000 ybegin_match = sre.search("size=(\d+)", encoded_lines[first_line][7:]) if ybegin_match == None: raise Exception("ybegin line is malformed") else: file_size = int(ybegin_match.group(1)) decoded_buffer = "" part_number = None part_begin = None part_end = None part_size = None for line in encoded_lines[first_line+1:]: if line[:6] == "=ypart": ypart_match = sre.search("begin=(\d+) end=(\d+)", line[6:]) if ypart_match == None: raise Exception("ypart line is malformed") else: part_begin = int(ypart_match.group(1)) part_end = int(ypart_match.group(2)) continue elif line[:5] == "=yend": yend_match = sre.search("size=(\d+) part=(\d+) pcrc32=([0-9a-zA-Z]{8})", line[5:]) if yend_match == None: raise Exception("yend line is malformed") else: part_size = int(yend_match.group(1)) part_number = int(yend_match.group(2)) pcrc32 = int(yend_match.group(3), 16) if (crc32(decoded_buffer) & 0xffffffff) != pcrc32: raise Exception("CRC32 checksum failed", crc32(decoded_buffer) & 0xffffffff, pcrc32) break i = 0 end = len(line) while i < end: byte = line[i] # end of line if byte in "\r\n": break; # escape byte if byte == '=': i += 1 decoded_buffer += decode_escape_table[ord(line[i])] # normal byte else: decoded_buffer += decode_table[ord(byte)] i += 1 if part_size != None and part_size != len(decoded_buffer): print "Warning: yend size attribute does not equal buffer length" return decoded_buffer, part_number, part_begin, part_end, file_size