def test_skip_space_after_bin_control_word(self): data = testdata_reader.read_encrypted('rtfobj/issue_185.rtf.zip') rtfp = rtfobj.RtfObjParser(data) rtfp.parse() objects = rtfp.objects self.assertTrue(len(objects) == 1)
def test_bin_no_param(self): data = testdata_reader.read('rtfobj/issue_251.rtf') rtfp = rtfobj.RtfObjParser(data) rtfp.parse() objects = rtfp.objects self.assertTrue(len(objects) == 1)
def _run(self, scanObject, result, depth, args): moduleResult = [] rtfp = rtfobj.RtfObjParser(scanObject.buffer) #import reference rtfp.parse() for rtfobject in rtfp.objects: i = rtfp.objects.index(rtfobject) #index if rtfobject.is_package: objtypeis = "OLEPackage" typeolepackagedict = {} typeolepackagedict.update({ 'Type': objtypeis, 'Index': i, 'Filename': rtfobject.filename, 'Source Patch': rtfobject.src_path, 'Temp Path': rtfobject.temp_path }) scanObject.addMetadata(self.module_name, "Parsed_Objects_Metadata", typeolepackagedict) moduleResult.append( ModuleObject(buffer=rtfobject.olepkgdata, externalVars=ExternalVars( filename='e_rtf_object_%08X.olepackage' % rtfobject.start))) elif rtfobject.is_ole: objtypeis = "OLE" typeoledict = {} typeoledict.update({ 'Type': objtypeis, 'Index': i, 'Format_id': rtfobject.format_id, 'Class_name': rtfobject.class_name, 'Size': rtfobject.oledata_size }) scanObject.addMetadata(self.module_name, "Parsed_Objects_Metadata", typeoledict) moduleResult.append( ModuleObject(buffer=rtfobject.oledata, externalVars=ExternalVars( filename='e_rtf_object_%08X.ole' % rtfobject.start))) else: objtypeis = "RAW" #Not a well-formed OLE object. typerawdict = {} typerawdict.update({'Type': objtypeis, 'Index': i}) scanObject.addMetadata(self.module_name, "Parsed_Objects_Metadata", typerawdict) moduleResult.append( ModuleObject(buffer=rtfobject.rawdata, externalVars=ExternalVars( filename='e_rtf_object_%08X.raw' % rtfobject.start))) return moduleResult
def scan(self, data, file, options, expire_at): file_limit = options.get('limit', 1000) self.event['total'] = {'objects': 0, 'extracted': 0} rtf = rtfobj.RtfObjParser(data) rtf.parse() self.event['total']['objects'] = len(rtf.objects) for object in rtf.objects: if self.event['total']['extracted'] >= file_limit: break index = rtf.server.index(object) if object.is_package: extract_file = strelka.File( name=object.filename, source=self.name, ) for c in strelka.chunk_string(object.olepkgdata): self.upload_to_coordinator( extract_file.pointer, c, expire_at, ) elif object.is_ole: extract_file = strelka.File( name=f'object_{index}', source=self.name, ) for c in strelka.chunk_string(object.oledata): self.upload_to_coordinator( extract_file.pointer, c, expire_at, ) else: extract_file = strelka.File( name=f'object_{index}', source=self.name, ) for c in strelka.chunk_string(object.rawdata): self.upload_to_coordinator( extract_file.pointer, c, expire_at, ) self.files.append(extract_file) self.event['total']['extracted'] += 1
async def scan(self, payload: Payload, request: Request) -> WorkerResponse: extracted: List[ExtractedPayload] = [] rtf = rtfobj.RtfObjParser(payload.content) rtf.parse() for obj_idx, obj in enumerate(rtf.objects): if obj.is_ole: data = obj.oledata meta = PayloadMeta(extra_data={'index': obj_idx}) elif obj.is_package: data = obj.olepkgdata meta = PayloadMeta(extra_data={ 'index': obj_idx, 'filename': obj.filename }) else: data = obj.rawdata meta = PayloadMeta(extra_data={'index': obj_idx}) extracted.append(ExtractedPayload(data, meta)) return WorkerResponse(extracted=extracted)
def extractOleData(rtfFilename): createdFiles = [] rtfData = open(rtfFilename, 'rb').read() rtfp = rtfobj.RtfObjParser(rtfData) rtfp.parse() for obj in rtfp.objects: if 'Acro' in obj.class_name: # PDF outfileName = os.path.splitext(rtfFilename)[0] + ".pdf" outfileData = obj.oledata[obj.oledata.find("%PDF-1."):] elif 'Package' == obj.class_name: outfileName = os.path.join(os.path.dirname(rtfFilename), obj.filename) outfileData = obj.olepkgdata elif 'Word' in obj.class_name: # if "PK\03\04" in obj.oledata: # outfileData = obj.oledata[obj.oledata.find("PK\03\04"):] # outfileName = os.path.splitext(rtfFilename)[0] + ".docx" # else: outfileName = os.path.splitext(rtfFilename)[0] + ".doc" outfileData = obj.oledata elif 'Excel' in obj.class_name: if "PK\03\04" in obj.oledata: outfileData = obj.oledata[obj.oledata.find("PK\03\04"):] outfileName = os.path.splitext(rtfFilename)[0] + ".xlsx" else: outfileData = obj.oledata outfileName = os.path.splitext(rtfFilename)[0] + ".xls" else: print "A" pass createdFiles.append(outfileName) outfile = open(outfileName, "wb") outfile.write(outfileData) outfile.close() return createdFiles
def scan(self, file_object, options): file_limit = options.get("limit", 1000) self.metadata["total"] = {"objects": 0, "extracted": 0} rtf = rtfobj.RtfObjParser(file_object.data) rtf.parse() self.metadata["total"]["objects"] = len(rtf.objects) for object in rtf.objects: if self.metadata["total"]["extracted"] >= file_limit: break index = rtf.objects.index(object) child_file = None child_filename = None if object.is_package: child_file = object.olepkgdata child_filename = f"{self.scanner_name}::{object.filename}" elif object.is_ole: child_file = object.oledata child_filename = f"{self.scanner_name}::object_{index}" else: child_file = object.rawdata child_filename = f"{self.scanner_name}:object_{index}" child_fo = objects.StrelkaFile(data=child_file, filename=child_filename, depth=file_object.depth + 1, parent_uid=file_object.uid, root_uid=file_object.root_uid, parent_hash=file_object.hash, root_hash=file_object.root_hash, source=self.scanner_name) self.children.append(child_fo) self.metadata["total"]["extracted"] += 1