def analyseurl(urls): """ 功能:分析urls,返回列表格式的字典 字典格式:{'name':names,'urls':url} 这里将符合要求的页面信息插入数据库,还包括日志信息 还包括 key的判断???? mm = re.compile('''\<a.*?href\=['|"](http\w*?)['|"].*?\>''') """ returns = [] html = urllib2.urlopen(urls, timeout=50) #print urls #try: if True: data = html.read() #soup = BeautifulSoup.BeautifulSoup(data) #temp = soup.findAll('a',href=re.compile(r'http.*?\W'))#为什么不直接用re匹配a标签,使用beautifulsoup只能匹配出15个,怎么回事呢 mm = re.compile('''\<a\W*?href\="(http.*?)".*?\>''') temp = mm.findall(data) logging2.debug('analysing ' + urls) #print 'analysing' for tt in temp: returns.append(tt) conn = sqlite3.connect(options.dbfile) cor = conn.cursor() cor.execute( 'create table if not exists keyofhtml( id integer primary key,urls text,key text,htmls text)' ) #print 0,'0' rr = re.compile( r"""content\W*?\=\W*?["|']\W*?text\/html\W*?\;\W*?charset\W*?\=\W*?(\w*?)\W*?["|']""" ) m = rr.search(data) #print 1,'1' if m: #print 2 code = m.group(1) try: data = data.decode(code) except UnicodeDecodeError, e: #print e logging2.error('decode from charset error') #print 4 rekey = re.compile(keyinsys) #生成关键字匹配 good = rekey.search(data) if good: #print 'good' data = data.replace("'", '"') #纠结的单引号怎么处理? sqls = "insert into keyofhtml(urls,key,htmls) values('%s','%s','%s')" try: cor.execute(sqls % (urls, keyinsys, data)) except UnicodeDecodeError, e: #print e cor.execute(sqls % (urls, keyinsys, 'decode error')) logging2.error('reading ' + urls + ' decode error') conn.commit()
def readSymbols(symbol): text = symbol.group(0) if procWrap is None: return text else: try: return str(procWrap.programinfo.getAddrOf(text)) except ValueError as e: debug(e) return text
def analyseurl(urls): """ 功能:分析urls,返回列表格式的字典 字典格式:{'name':names,'urls':url} 这里将符合要求的页面信息插入数据库,还包括日志信息 还包括 key的判断???? """ returns = [] print urls html = urllib2.urlopen(urls, timeout=50) try: conn = sqlite3.connect(options.dbfile) cor = conn.cursor() cor.execute( 'create table if not exists keyofhtml( id integer primary key,urls text,key text,htmls text)' ) data = html.read() rr = re.compile(r"""content\=["|']text\/html\;charset\=(\w*?)["|']""") m = rr.search(data) if m: code = m.group(1) if code: data = data.decode(code) rekey = re.compile(keyinsys) good = rekey.search(data) if good: data = data.replace("'", '"') #纠结的单引号怎么处理? sqls = "insert into keyofhtml(urls,key,htmls) values('%s','%s','%s')" cor.execute(sqls % (urls, keyinsys, data)) conn.commit() conn.close() logging2.debug('reading ' + urls) logging2.info('what should i write here') logging2.warning('a warning here') logging2.error('a error test here') logging2.critical('what is a critical??') #print 'reading' except: print 'error' logging2.error('error ong reading ' + urls) soup = BeautifulSoup.BeautifulSoup(data) temp = soup.findAll('a', href=re.compile(r'http.*')) #为什么不直接用re匹配a标签 logging2.debug('analysing ' + urls) #print 'analysing' for tt in temp: hrefs = tt['href'] #have? if hrefs.startswith('http'): if tt.string: #span????? returns.append({'name': tt.string, 'urls': hrefs}) else: returns.append({'name': 'NoName', 'urls': hrefs}) else: continue return returns
def callFunction(self, funcname, *args, tillResult=False): """ Redirect control flow to call the specified function with given arguments. Registers will be restored as soon as function returns. If you dont see a result immediately, continue till you have stepped through all breakpoints/syscalls Does nothing if the process just entered syscall If you want to do something right after this syscall, singlestep over it. If its a read(stdin) syscall, you need to "trace write" or disable auto-continue for write in Constants.py use: call libc:memset $rbp 0x41 0x10 """ """How does this work: call mmap to map a page where we can inject code. the injected code will call the specified function. After the specified function is called, it runs into an interrupt. The "continue" logic will check for each received trap if we have reached this certain interrupt. Once that is the case, _afterCallFunction will be called""" func_ad = self.programinfo.getAddrOf(funcname) if func_ad is None: return "function %s not found" % funcname proc = self.ptraceProcess if proc.syscall_state.next_event == "exit": return "about to call syscall, returning" if self.inserted_function_data: return "already in an inserted function, returning" oldregs = proc.getregs() inject_at = self.get_own_segment().functioncall argregs = ["rdi", "rsi", "rdx", "rcx", "r8", "r9"] # set new args (depends on calling convention) if len(args) > len(argregs): raise ValueError("too many arguments supplied" ) # TODO add push(var) functionality for (val, reg) in zip(args, argregs): proc.setreg(reg, val) ip = proc.getInstrPointer() finish = inject_at + 3 # if ip==finish, call afterCalen(pwn.asm("call rax\nint3", arch="amd64")) llFunction debug(proc.readBytes(inject_at + 2, 1)) info("inject_at= %x" % inject_at) proc.setInstrPointer(inject_at) proc.setreg("rax", func_ad) self.inserted_function_data = (ip, finish, oldregs, funcname) res = self.cont( ) # if you want to debug the injected function, change this to cont(singlestep=True) return res if res else "none"
def analyseurl(urls): """ 功能:分析urls,返回列表格式的字典 字典格式:{'name':names,'urls':url} 这里将符合要求的页面信息插入数据库,还包括日志信息 还包括 key的判断???? mm = re.compile('''\<a.*?href\=['|"](http\w*?)['|"].*?\>''') """ returns=[] html = urllib2.urlopen(urls,timeout=50) #print urls #try: if True: data = html.read() #soup = BeautifulSoup.BeautifulSoup(data) #temp = soup.findAll('a',href=re.compile(r'http.*?\W'))#为什么不直接用re匹配a标签,使用beautifulsoup只能匹配出15个,怎么回事呢 mm = re.compile('''\<a\W*?href\="(http.*?)".*?\>''') temp = mm.findall(data) logging2.debug('analysing '+urls) #print 'analysing' for tt in temp: returns.append({'urls':tt}) conn = sqlite3.connect(options.dbfile) cor = conn.cursor() cor.execute('create table if not exists keyofhtml( id integer primary key,urls text,key text,htmls text)') #print 0,'0' rr = re.compile(r"""content\W*?\=\W*?["|']\W*?text\/html\W*?\;\W*?charset\W*?\=\W*?(\w*?)\W*?["|']""") m = rr.search(data) #print 1,'1' if m: #print 2 code = m.group(1) try: data = data.decode(code) except UnicodeDecodeError,e: #print e logging2.error('decode from charset error') #print 4 rekey = re.compile('.*') good = rekey.search(data) if good: #print 'good' data = data.replace("'",'"')#纠结的单引号怎么处理? sqls = "insert into keyofhtml(urls,key,htmls) values('%s','%s','%s')" try: cor.execute(sqls%(urls,keyinsys,data)) except UnicodeDecodeError,e: #print e cor.execute(sqls%(urls,keyinsys,'decode error')) logging2.error('reading '+urls+' decode error') conn.commit()
def analyseurl(urls): """ 功能:分析urls,返回列表格式的字典 字典格式:{'name':names,'urls':url} 这里将符合要求的页面信息插入数据库,还包括日志信息 还包括 key的判断???? """ returns=[] print urls html = urllib2.urlopen(urls,timeout=50) try: conn = sqlite3.connect(options.dbfile) cor = conn.cursor() cor.execute('create table if not exists keyofhtml( id integer primary key,urls text,key text,htmls text)') data = html.read() rr = re.compile(r"""content\=["|']text\/html\;charset\=(\w*?)["|']""") m = rr.search(data) if m: code = m.group(1) if code: data = data.decode(code) rekey = re.compile(keyinsys) good = rekey.search(data) if good: data = data.replace("'",'"')#纠结的单引号怎么处理? sqls = "insert into keyofhtml(urls,key,htmls) values('%s','%s','%s')" cor.execute(sqls%(urls,keyinsys,data)) conn.commit() conn.close() logging2.debug('reading '+urls) logging2.info('what should i write here') logging2.warning('a warning here') logging2.error('a error test here') logging2.critical('what is a critical??') #print 'reading' except: print 'error' logging2.error('error ong reading '+urls) soup = BeautifulSoup.BeautifulSoup(data) temp = soup.findAll('a',href=re.compile(r'http.*'))#为什么不直接用re匹配a标签 logging2.debug('analysing '+urls) #print 'analysing' for tt in temp: hrefs = tt['href']#have? if hrefs.startswith('http'): if tt.string:#span????? returns.append({'name':tt.string,'urls':hrefs}) else: returns.append({'name':'NoName','urls':hrefs}) else: continue return returns
def callFunction(self, cmd: str): _, _, cmd = cmd.partition(" ") funcname, _, argstr = cmd.partition(" ") debug("%s(%s)" % (funcname, argstr)) currProc = self.getCurrentProcess() args = [parseInteger(arg, currProc) for arg in argstr.split()] debug("trying function %s with args %s" % (funcname, args)) try: return self.getCurrentProcess().callFunction(funcname, *args) except ProcessEvent as event: self._handle_ProcessEvent(event)
def sendNewHeap(self, oldstart, oldstop): if self.heap.start != oldstart: raise NotImplementedError if self.heap.stop < oldstop: raise NotImplementedError # replace old heap with new heap self.hyxsock.send(UPD_FROMPAULA_INSERT) length = self.heap.stop - self.heap.start self.hyxsock.send(pack("<Q", length)) ret = self.hyxsock.send(self.heap.heapbytes) debug("sent %#x bytes" % ret) debug("heapbytes len= %x" % len(self.heap.heapbytes))
def cont(self, signum=0, singlestep=False): """continue execution of the process stops at: - a traced syscall (?trace) - an inserted function - specified breakpoints""" proc = self.ptraceProcess event = self._getNextEvent(signum, singlestep) if isinstance(event, str): # happens if an interesting syscall is hit return event if isinstance(event, ProcessSignal): if event.signum == SIGTRAP: # normal trap, maybe breakpoint? ip = proc.getInstrPointer() if self.inserted_function_data and self.inserted_function_data[ 1] == ip: return self._afterCallFunction() elif ip - 1 in proc.breakpoints.keys( ) and not singlestep: # did we hit a breakpoint? debug("cont calls reinsertBreakpoint") self.reinstertBreakpoint() return "hit breakpoint at %#x" % (ip - 1) elif singlestep: return self.where() else: print(self.where(), event) raise NotImplementedError else: if event.signum in SIGNALS_IGNORE.values(): event.signum = 0 else: warning("got %s, sending it back and continuing" % event) # warning(self.where()) return self.cont(event.signum, singlestep) else: debug("encountered %s" % event) if isinstance(event, ProcessEvent): raise event raise NotImplementedError
def where(self, ip: int): """ finds the symbol for the respective virtual adress Some segments are mapped without a name, such as the data segment of a library. When cycling through the found mappings, we take the pathname from the mapping right before the one where the virtual address resides incase there is no name. Example: libc:free_hook""" found = None above = None # this is returned incase the found mapping itself has no name for mapping in getMappings(self.pid): if mapping.start <= ip <= mapping.end: found = mapping if mapping.end <= ip and mapping.pathname is not None and \ (above is None or mapping.end > above.end): above = mapping debug("found = %s, above = %s" % (found, above)) if not found: raise ValueError("address is not in virtual adress space") if found.pathname is None and above.end == found.start: found=above # find smaller symbols elf = self._getElf(found.pathname) symbols = list((symbolname, symbol_ad + elf.base) for (symbolname, symbol_ad) in elf.symbols.items()) filter_func = lambda sym_ad_tuple: sym_ad_tuple[1] <= ip symbols_smaller = filter(filter_func, symbols) # biggest matching is the one we need symbol = max(symbols_smaller, key=itemgetter(1)) return symbol # get symbol string
def writeToBuf(self, text: str): """write to the processes stdin. Use: w AA w b"\x41\x41\n" w b'AA' (no newline added) w pack(421) (equal to pack("<Q",421) w pack(<I, 421) If you write a normal string, a newline is added at the end. Note that it isnt directly written to its stdin, but instead written to an internal buffer. Upon a read syscall (trying to consume from stdin), the buffers contents are written to the actual stdin. This means that if you write to stdin and fork before consumption, both processes will get to consume what you have previously written.""" match = write_arg_regex.match(text) if match.group(1): fmt = match.group(2) if match.group( 2) else "<Q" # remove , from fmt val = match.group(3) debug("%s %s" % (val, fmt)) val = int(val, 16 if val.startswith("0x") else 10) try: text = pack(fmt, val) except struct_error as e: print(e) return else: text = (match.group(6) + "\n").encode() if match.group(6) \ else eval(match.group(5)) assert isinstance(text, bytes), "%s" % type(text) print("writing ", text) self.stdin_buf += text
def analyseurl(urls): """ 功能:分析urls,返回列表格式的字典 字典格式:{'name':names,'urls':url} 这里将符合要求的页面信息插入数据库,还包括日志信息 """ returns = [] #print urls html = urllib2.urlopen(urls, timeout=30) try: data = html.read() rr = re.compile(r"""content\=["|']text\/html\;charset\=(\w*?)["|']""") m = rr.search(data) if m: code = m.group(1) if code: data = data.decode(code) logging2.debug('reading') #print 'reading' except: logging2.error('error ong reading') soup = BeautifulSoup.BeautifulSoup(data) temp = soup.findAll('a', href=re.compile(r'http.*')) logging2.debug('analysing') #print 'analysing' for tt in temp: hrefs = tt['href'] #have? if hrefs.startswith('http'): if tt.string: #span????? returns.append({'name': tt.string, 'urls': hrefs}) else: returns.append({'name': 'NoName', 'urls': hrefs}) else: continue return returns
def analyseurl(urls): """ 功能:分析urls,返回列表格式的字典 字典格式:{'name':names,'urls':url} 这里将符合要求的页面信息插入数据库,还包括日志信息 """ returns=[] #print urls html = urllib2.urlopen(urls,timeout=30) try: data = html.read() rr = re.compile(r"""content\=["|']text\/html\;charset\=(\w*?)["|']""") m = rr.search(data) if m: code = m.group(1) if code: data = data.decode(code) logging2.debug('reading') #print 'reading' except: logging2.error('error ong reading') soup = BeautifulSoup.BeautifulSoup(data) temp = soup.findAll('a',href=re.compile(r'http.*')) logging2.debug('analysing') #print 'analysing' for tt in temp: hrefs = tt['href']#have? if hrefs.startswith('http'): if tt.string:#span????? returns.append({'name':tt.string,'urls':hrefs}) else: returns.append({'name':'NoName','urls':hrefs}) else: continue return returns
def _copyBreakpoints(self): """this is used to creaty new breakpoint python objects for a forked process It could be optimized to create these Breakpoints without reading/writing again""" debug("cloning breakpoints") from ptrace.debugger.process import Breakpoint debug(self.parent.ptraceProcess.breakpoints) for bp in self.parent.ptraceProcess.breakpoints.values(): debug("bp= %s" % bp) assert isinstance(bp, Breakpoint) new_bp = self.ptraceProcess.createBreakpoint(bp.address) new_bp.old_bytes = bp.old_bytes # cover edge case where we just ran into a breakpoint (bp has been temporarily disabled) if False: ip = self.parent.remember_insert_bp if ip: # this var stores the address of where the bp has to be inserted self.insertBreakpoint(ip)
def startup(self, file): with open(file, "r") as f: for line in f.readlines(): if len(line) > 0: self.stdinQ.put(line) debug("put %s" % line)
rekey = re.compile('.*') good = rekey.search(data) if good: #print 'good' data = data.replace("'",'"')#纠结的单引号怎么处理? sqls = "insert into keyofhtml(urls,key,htmls) values('%s','%s','%s')" try: cor.execute(sqls%(urls,keyinsys,data)) except UnicodeDecodeError,e: #print e cor.execute(sqls%(urls,keyinsys,'decode error')) logging2.error('reading '+urls+' decode error') conn.commit() #print 'donessss' conn.close() logging2.debug('reading '+urls) logging2.info('what should i write here') logging2.warning('a warning here') logging2.error('a error test here') logging2.critical('what is a critical??') #print 'reading' #except: #print 'error' #logging2.error('error ong reading '+urls) return returns def main(): i = 0 th = threading2.ThreadPool(workQueue,resultQueue,options.number)
def _getNextEvent(self, signum=0, singlestep=False): """ continues execution until an interesing syscall is entered/exited or some other event (hopyfully breakpoint sigtrap) happens""" def isSysTrap(event): return isinstance(event, ProcessSignal) and event.signum == 0x80 | SIGTRAP def isTrap(event): return isinstance(event, ProcessSignal) and event.signum == SIGTRAP def feedStdin(syscall): """called if process wants to read from stdin""" assert syscall.result is None # make sure we are not returning from a syscall count = syscall.arguments[2].value # how much is read if len(self.stdin_buf) == 0: print("process requests %d bytes from stdin" % (count)) self.stdinRequested = count return 0 else: return count, self.writeBufToPipe(count) # check if process is trying to read from stdin, if yes give him what we got if self.stdinRequested: if self.writeBufToPipe(self.stdinRequested) == 0: return "no data to stdin was provided" self.stdinRequested = 0 # '''this is the actual start of the function''' proc = self.ptraceProcess assert isinstance(proc, PtraceProcess) # if we are continuing from a breakpoint, singlestep over the breakpoint and reinsert it. # if we are not singlestepping and did not hit a syscall / exceptional event, continue till next syscall cont_func = proc.syscall if DO_SYSCALL else proc.cont if self.remember_insert_bp: event = self._reinstertBreakpoint() if not singlestep and isTrap(event): cont_func(signum) event = proc.waitEvent() else: if singlestep: proc.singleStep(signum) else: cont_func(signum) event = proc.waitEvent() if not isSysTrap(event): debug(" getNextEvent returns %s" % event) return event # everything from hereon is just about dealing with syscalls state = proc.syscall_state syscall = state.event(self.syscall_options) # if process is about to read from stdin, feed it what we have. if nothing is available, notify user if syscall.name == "read" and state.next_event == "exit" and \ syscall.arguments[0].value == 0: written = feedStdin(syscall) if written == 0: return "no data to stdin was provided" else: print("process requests %d bytes from stdin (%d written)" % written) # skip over boring syscalls if syscall.name not in self.syscalls_to_trace: if syscall.result is not None and PRINT_BORING_SYSCALLS: # print results of boring syscalls print("syscall %s = %s" % (syscall.format(), syscall.result_text)) return self._getNextEvent() # we are tracing the specific syscall else: if syscall.result is not None: # just returned return "%s = %s" % (syscall.name, syscall.result_text) else: # about to call return "process is about to syscall %s" % syscall.format()
rekey = re.compile(keyinsys) #生成关键字匹配 good = rekey.search(data) if good: #print 'good' data = data.replace("'", '"') #纠结的单引号怎么处理? sqls = "insert into keyofhtml(urls,key,htmls) values('%s','%s','%s')" try: cor.execute(sqls % (urls, keyinsys, data)) except UnicodeDecodeError, e: #print e cor.execute(sqls % (urls, keyinsys, 'decode error')) logging2.error('reading ' + urls + ' decode error') conn.commit() #print 'donessss' conn.close() logging2.debug('reading ' + urls) logging2.info('what should i write here') logging2.warning('a warning here') logging2.error('a error test here') logging2.critical('what is a critical??') return returns def main(): """ 执行入口,层次判断,任务转移. >>> main() 时间 深度 当前完成 待完成 """
def get_own_segment(self, address=None): """injects an MMAP syscall so we get our own page for code""" if self.own_segment: return self.own_segment start = self.programinfo.getElfStart() address = address if address else start - 0x2000 debug("getownsegment adress = %x" % address) proc = self.ptraceProcess if proc.syscall_state.next_event == "exit": print("about to call syscall, returning") return # save state ip = proc.getInstrPointer() old_regs = proc.getregs() old_code = proc.readBytes(ip, len(inject_syscall_instr)) # prepare mmap syscall MAP_FIXED_NOREPLACE = 1048576 prot = PROT_EXEC mapflags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED_NOREPLACE length = 0x1000 fill_regs = ["rax", "rdi", "rsi", "rdx", "r10", "r8", "r9"] # calling convention for syscalls args = [9, address, length, prot, mapflags, -1, 0] # syscallcode, ..., filedescriptor, offset assert len(args) == len(fill_regs) for reg, arg in zip(fill_regs, args): proc.setreg(reg, arg) proc.writeBytes(ip, inject_syscall_instr) # step over the syscall proc.syscall() proc.waitSyscall() proc.syscall() proc.waitSyscall() result = proc.getreg("rax") debug("result= %x" % result) if result > 2**63 - 1: result -= 2**64 import errno if errno.EEXIST == -result: warning("mapping exists") return self.get_own_segment(address * 2) # restore state proc.writeBytes(ip, old_code) proc.setregs(old_regs) func_addr = address + 0x100 inject_code = """ call rax int3 int3 int3""" inject_code = pwn.asm(inject_code, arch="amd64") proc.writeBytes(func_addr, inject_code) nop_addr = address + 0x200 #proc.writeBytes(nop_addr, pwn.asm("nop\nint3")) # TODO jmp 0 proc.writeBytes(nop_addr, b"\xeb\xfe") # jumps to itself fork_addr = address + 0x300 proc.writeBytes(fork_addr, inject_syscall_instr) self.own_segment = InsertedGadgets(address, func_addr, nop_addr, fork_addr) return self.own_segment