def __analyze(self): magic_string = peekFixedLengthString(self.file, 15, position=0) (magic, archname) = magic_string.split() if magic != 'dyld_v1': raise MachOError('Invalid magic "{0}"'.format(magic_string)) self.arch = Arch(archname) if self.endian is None: self.endian = self.arch.endian if self.endian is None: raise MachOError('Cannot guess endian from architecture "{0}"'.format(archname)) (mappingOffset, mappingCount, imagesOffset, imagesCount, self.dyldBaseAddress) = \ peekStruct(self.file, Struct(self.endian + '4LQ'), position=16) self.mappings = MappingSet(self.__analyzeMappings(mappingOffset, mappingCount)) self.mappings.freeze() images = DataTable('!address', '!name', '!path') for image in self.__analyzeImages(imagesOffset, imagesCount): path = image.path bn = basename(path) while True: (stem, ext) = splitext(bn) if not ext: break bn = stem images.append(image, address=image.address, name=bn, path=path) self.images = images
def _loadSections(self, machO): segStruct = machO.makeStruct('16s4^2i2L') sectStruct = machO.makeStruct(Section.STRUCT_FORMAT) (segname, self.vmaddr, self._vmsize, self._fileoff, self._filesize, self.maxprot, self.initprot, nsects, _) = readStruct(machO.file, segStruct) self.segname = fromStringz(segname) machO_fileOrigin = machO._fileOrigin sectVals = peekStructs(machO.file, sectStruct, count=nsects) # get all section headers sectionsList = (Section.createSection(i) for i in sectVals ) # convert all headers into Section objects sections = DataTable('className', 'sectname', 'ftype') for s in sectionsList: if s.offset < machO_fileOrigin: s.offset += machO_fileOrigin sections.append(s, className=type(s).__name__, sectname=s.sectname, ftype=s.ftype) self.sections = sections self._hasAnalyzedSections = False self._shouldImportMappings = machO.mappings.mutable
def analyzeClassList(machO, addressesAndClassTuples, protocols): """Analyze a list of classes, and return a :class:`~data_table.DataTable` of :class:`~objc.class_.Class`\\s with the following column names: * ``'name'`` (unique, string, the name of the class) * ``'addr'`` (unique, integer, the VM address to the class) The parameter *addressesAndClassTuples* should be an iteratable of 2-tuples, which include the VM address of the class, and a 12-tuple representing an ``old_class`` struct. """ classes = DataTable('!name', '!addr') supers = [] for vmaddr, classTuple in addressesAndClassTuples: (cls, superPtr) = analyzeClass(machO, classTuple, protocols) supers.append(superPtr) classes.append(cls, name=cls.name, addr=vmaddr) for cls, superPtr in zip(classes, supers): if superPtr: supcls = classAt(machO, superPtr, classes) cls.superClass = classAt(machO, superPtr, classes) return classes
def _GetPatch(self) -> Patch: random.seed(self.seed) data_table = DataTable() data_table.ResetToVanilla() level_generator = LevelGenerator(data_table) level_generator.Generate() item_randomizer = ItemRandomizer(data_table, self.settings) item_randomizer.Randomize() validator = Validator(data_table, self.settings) while not validator.IsSeedValid(): item_randomizer.Randomize() patch = data_table.GetPatch() patch.AddData(0x16fd8, [ 0xFF, 0xA5, 0xEC, 0x30, 0x0B, 0x49, 0x80, 0xCD, 0xA1, 0x6B, 0xD0, 0x09, 0xA4, 0x10, 0xF0, 0x05, 0x85, 0xEC, 0x4C, 0x47, 0xB5, 0x4C, 0x59, 0xB5, 0xAC, 0xBB, 0x6B, 0xB9, 0xF1, 0xAF, 0x85, 0x98, 0xB9, 0xF6, 0xAF, 0x85, 0x70, 0xB9, 0xFB, 0xAF, 0x60, 0x00, 0x04, 0x08, 0x01, 0x02, 0x78, 0x78, 0x78, 0x00, 0xF0, 0x8D, 0x3D, 0xDD, 0x8D, 0x8D ]) patch.AddData(0x17058, [0xA9, 0x78, 0x85, 0x70, 0x20, 0xE0, 0xAF, 0x85]) patch.AddData(0x17550, [0x20, 0xC0, 0xB8, 0x4C, 0xC9, 0xAF, 0x12, 0x20]) patch.AddData(0x178D0, [0xAD, 0x22, 0x05, 0xC9, 0x01, 0xF0, 0x03, 0x4C, 0x2F, 0x75, 0x60]) patch.AddData(0x1934D, [0x00]) # Fix for ring/tunic colors patch.AddData(0x6BFB, [0x20, 0xE4, 0xFF]) patch.AddData(0x1FFF4, [0x8E, 0x02, 0x06, 0x8E, 0x72, 0x06, 0xEE, 0x4F, 0x03, 0x60]) self._AddExtras(patch) return patch
def __analyze(self): magic_string = peekFixedLengthString(self.file, 15, position=0) (magic, archname) = magic_string.split() if magic != 'dyld_v1': raise MachOError('Invalid magic "{0}"'.format(magic_string)) self.arch = Arch(archname) if self.endian is None: self.endian = self.arch.endian if self.endian is None: raise MachOError( 'Cannot guess endian from architecture "{0}"'.format( archname)) (mappingOffset, mappingCount, imagesOffset, imagesCount, self.dyldBaseAddress) = \ peekStruct(self.file, Struct(self.endian + '4LQ'), position=16) self.mappings = MappingSet( self.__analyzeMappings(mappingOffset, mappingCount)) self.mappings.freeze() images = DataTable('!address', '!name', '!path') for image in self.__analyzeImages(imagesOffset, imagesCount): path = image.path bn = basename(path) while True: (stem, ext) = splitext(bn) if not ext: break bn = stem images.append(image, address=image.address, name=bn, path=path) self.images = images
def readCategoryList(machO, addresses, classes, protoRefsMap): """Read categories from an iterable of *addresses*, and return a :class:`~data_table.DataTable` of :class:`~objc.category.Category`\\s with the following column names: * ``'name'`` (string, the name of the category) * ``'base'`` (string, the name of the class the category is patching) """ cats = DataTable('name', 'base') for vmaddr in addresses: cat = readCategory(machO, vmaddr, classes, protoRefsMap) cats.append(cat, name=cat.name, base=cat.class_.name) return cats
def analyzeProtocolList(machO, addressesAndProtoTuples): """Analyze a list of protocols, and return a :class:`~data_table.DataTable` of :class:`~objc.protocol.Protocol`\\s with the following column names: * ``'name'`` (string, the name of the protocol) * ``'addr'`` (unique, integer, the VM address to the protocol) The parameter *addressesAndProtoTuples* should be an iteratable of 2-tuples, which include the VM address of the protocol, and a 5-tuple representing an ``old_protocol`` struct. """ # associate each unique protocol to a list of vmaddrs. protoDict = {} protoListDict = {} for vmaddr, protoTuple in addressesAndProtoTuples: (preped, protoListPtr) = _prepareProtocol(machO, protoTuple) if preped in protoDict: protoDict[preped].append(vmaddr) else: protoDict[preped] = [vmaddr] if protoListPtr: if preped in protoListDict: protoListDict[preped].append(protoListPtr) else: protoListDict[preped] = [protoListPtr] # now do the actual analysis. protos = DataTable('name', '!addr') # there can be multiple protocols with the same name in ABI 1.0 refs = [] d = machO.derefString analyzer = methodDescriptionAnalyzer(d) for preped, vmaddrs in protoDict.items(): protoListPtrs = protoListDict[preped] if preped in protoListDict else [] (proto, protoRefs) = _analyzeProtocol(machO, d, analyzer, preped, protoListPtrs) protos.append(proto, name=proto.name) protos.associate(proto, 'addr', vmaddrs) refs.append(protoRefs) # connect the protocols. for proto, protocolRefs in zip(protos, refs): connectProtocol(proto, protocolRefs, protos) return protos
def analyzeCategoryList(machO, catTuples, classes, protocols): """Analyze a list of classes, and return a :class:`~data_table.DataTable` of :class:`~objc.class_.Class`\\s with the following column names: * ``'name'`` (string, the name of the category) * ``'base'`` (string, the name of the class the category is patching) The parameter *catTuples* should be an iteratable of 7-tuples representing the ``old_category`` structs. """ cats = DataTable('name', 'base') for catTuple in catTuples: cat = analyzeCategory(machO, catTuple, classes, protocols) cats.append(cat, name=cat.name, base=cat.class_.name) return cats
def _loadSections(self, machO): segStruct = machO.makeStruct('16s4^2i2L') sectStruct = machO.makeStruct(Section.STRUCT_FORMAT) (segname, self.vmaddr, self._vmsize, self._fileoff, self._filesize, self.maxprot, self.initprot, nsects, _) = readStruct(machO.file, segStruct) self.segname = fromStringz(segname) machO_fileOrigin = machO._fileOrigin sectVals = peekStructs(machO.file, sectStruct, count=nsects) # get all section headers sectionsList = (Section.createSection(i) for i in sectVals) # convert all headers into Section objects sections = DataTable('className', 'sectname', 'ftype') for s in sectionsList: if s.offset < machO_fileOrigin: s.offset += machO_fileOrigin sections.append(s, className=type(s).__name__, sectname=s.sectname, ftype=s.ftype) self.sections = sections self._hasAnalyzedSections = False self._shouldImportMappings = machO.mappings.mutable
def main(): logging.basicConfig(level=logging.INFO) logging.info('=== NORMAL START ===' + '=' * 111) chapter(f"Process arguments - configure") conf = configure() pprint.pprint(conf, width=132, indent=10, depth=2) chapter(f"Load left csv file [{conf['left']}]") left_data = DataTable(displayfield=conf['keyfield']) left_data.load_csv(conf['left'], conf['fielddelimiter'], conf['limit']) # print(left_data.records) # left_data.show_fields() chapter(f"Load right csv file [{conf['right']}]") right_data = DataTable(displayfield=conf['keyfield']) right_data.load_csv(conf['right'], conf['fielddelimiter'], conf['limit']) # print(left_data.records) # right_data.show_fields(('Name', 'Category', 'Type')) chapter(f"Check key is unique in both files") if left_data.is_unique_field(conf['keyfield']): logging.info(f"[{conf['keyfield']}] is unique in left file - OK") else: logging.error( f"[{conf['keyfield']}] is NOT unique in left file - exiting") exit(3) if right_data.is_unique_field(conf['keyfield']): logging.info(f"[{conf['keyfield']}] is unique in right file - OK") else: logging.error( f"[{conf['keyfield']}] is NOT unique in right file - exiting") exit(3) chapter(f"Diff them csv files") diff = diff_data(left_data, right_data, conf['keyfield'], conf['fieldlist']) # print(diff) logging.info('=== NORMAL END ===' + '=' * 113) exit(0)
def addSymbols(self, symbols): '''Add an iterable of :class:`~sym.Symbol`\\s to this Mach-O object.''' if not hasattr(self, 'symbols'): self.symbols = DataTable('name', 'addr', '!ordinal') self_symbols_append = self.symbols.append for sym in symbols: self_symbols_append(sym, name=sym.name, addr=sym.addr, ordinal=sym.ordinal)
def readProtocolList(machO, addresses): """Read protocols from an iterable of *addresses*, and return a :class:`~data_table.DataTable` of :class:`~objc.protocol.Protocol`\\s with the following column names: * ``'name'`` (unique, string, the name of the protocol) * ``'addr'`` (unique, integer, the VM address to the protocol) """ # read protocols from the Mach-O binary. protos = DataTable('!name', '!addr') refs = [] for vmaddr in addresses: (proto, protocolRefs) = readProtocol(machO, vmaddr) protos.append(proto, name=proto.name, addr=vmaddr) refs.append(protocolRefs) # connect the protocols. for proto, protocolRefs in zip(protos, refs): connectProtocol(proto, protocolRefs, protos) return protos
def readClassList(machO, addresses, protoRefsMap): """Read classes from an iterable of *addresses*, and return a :class:`~data_table.DataTable` of :class:`~objc.class_.Class`\\s with the following column names: * ``'name'`` (unique, string, the name of the class) * ``'addr'`` (unique, integer, the VM address to the class) """ classes = DataTable('!name', '!addr') supers = [] for vmaddr in addresses: (cls, superPtr) = readClass(machO, vmaddr, protoRefsMap) supers.append(superPtr) classes.append(cls, name=cls.name, addr=vmaddr) for cls, superPtr in zip(classes, supers): if not cls.isRoot: cls.superClass = classAt(machO, superPtr, classes) return classes
def __init__(self, filename, arch="armv7", lenientArchMatching=False): from .vmaddr import MappingSet self.filename = filename self._arch = Arch(arch) self._lenientArchMatching = lenientArchMatching self.fileno = -1 self.file = None self.loadCommands = DataTable('className', 'cmd') self.is64bit = False self.endian = '<' self._structCache = {} self._fileOrigin = 0 self.mappings = MappingSet()
def test_data_table(self): """test regular operations for DataTable""" with self.assertRaises(TypeError): DataTable("") with self.assertRaises(TypeError): DataTable(1, ["header1"]) table = DataTable([[1, 2], [3]], ["header1", "header2"]) rows = list(table.dict_records()) self.assertEqual(rows[0]["header2"], 2) self.assertEqual(rows[1]["header1"], 3) with self.assertRaises(KeyError): a = rows[1]["header2"] table = DataTable([1, 2], ["header1", "header2"]) with self.assertRaises(TypeError): rows = list(table)
def to_data_table(self): headers = [ "department_id", "number_of_orders", "number_of_first_orders", "percentage" ] records = self.counts.values() # remove the rows with no order count records = filter(lambda row: row["number_of_orders"] > 0, records) def append_ratio(record): row = [ record["department_id"], record["number_of_orders"], record["number_of_first_orders"], 0 ] row[3] = "{:.2f}".format(row[2] / row[1]) return row records = map(append_ratio, records) # put rows in increasing order of department's id records = sorted(records, key=lambda row: int(row[0])) return DataTable(records, headers)
def imprimir(self): DataTable(['Lexeme', 'Token', 'Type', 'Value'], self.tabla_simbolos).print()
def main(): logging.basicConfig(level=logging.INFO) chapter(f" NORMAL START ", filler='=') chapter(f"Process arguments - configure") conf = configure() chapter(f"Load csv [{conf['inputfile']}]") input_data = DataTable(displayfield='Name') input_data.load_csv(conf['inputfile'], conf['delimiter'], conf['limit']) # input_data.show_fields(('Active', 'Name', 'Description')) input_data.show_fields() chapter(f"Remove unwanted records") input_data.remove_records({'Active': 'N'}) chapter(f"Replace some shizzle - newlines, semicolons, etc") # input_data.replace_in_field(field='Description', frompart='\n', topart='<br/>') input_data.replace_in_field(field='Description', frompart=';', topart='.') # input_data.show_fields(('Active', 'Name', 'Description')) chapter(f"Checking uniqueness of field Name") isUnique = input_data.is_unique_field(fieldname='Name') chapter(f"Add some extra fields") input_data.add_counter_field('gen-id', initialvalue=10001) input_data.copy_field(sourcefield='Description', newfield='gen-short-description') input_data.add_fixed_field('fix-text-1', 'from') input_data.add_combined_field(newfield='gen-subtitle', fieldstocombine=['Type', 'fix-text-1', 'Country'], delimiter=' ') chapter(f"Generate complex combined feature field from the feature fields") input_data.add_combined_features_field('combined-features', conf['featurelist']) # input_data.show_fields(('Name', 'Product', 'combined-features')) chapter(f"Combine the category fields into one category (list)field") input_data.add_fixed_field('fix-maincat', 'Inhabitants') input_data.add_combined_categories_field('combined-categories', conf['categorieslist']) # input_data.show_fields(('Name', 'Product', 'combined-categories')) chapter(f"Re-map fields for Prestashop") output_data = input_data.re_map_table(conf['finalfieldmap'], displayfield='NAME') chapter(f"Some tests - output to screen") output_data.show_fields(('UNIQUE-ID', 'NAME', 'TYPE', 'CATEGORIES', 'FEATURES')) # output_data.show_record('Bollie') isUnique = output_data.is_unique_field(fieldname='NAME') # Finally we write the resulting products csv file chapter(f"Writing resulting csv - [{conf['outputfile']}]") output_data.write_csv(conf['outputfile'], delimiter=';') chapter(f" NORMAL END ", filler='=') exit(0)
import traceback import os import sys sys.path.append('../../') if __name__ == "__main__": hier_meta_dict = {} data_folder = '../data_news/used' hier_meta_dict['Location'] = data_folder + '/Location.hier' # hier_meta_dict['Topic'] = data_folder + '/Topic.hier' data_file = data_folder + "/data_table.csv" doc_folder = data_folder + "/docs_linked/" # the segphrased version splitter = "\t@@\t" dt = DataTable(data_file, hier_meta_dict) # used to store the constructing cells queries = [] # if we want to sample at most K document from the cell for experiment K = 100000 # group_name = 'Topics' # queries.append({'Topic':'Business'}) # queries.append({'Topic':'Arts'}) # queries.append({'Topic':'Travel'}) # queries.append({'Topic':'World'}) # queries.append({'Topic':'Science'}) # queries.append({'Topic':'Health'}) # queries.append({'Topic':'Technology'})
from data_table import DataTable import traceback if __name__ == "__main__": hier_meta_dict = {} hier_meta_dict['Location'] = '../data/raw/lochier.hier' hier_meta_dict['Topic'] = '../data/raw/topichier.hier' data_file = "../data/raw/data_table.csv" output_file = "../data/query/new_hier/cells" #data_file = "../data/raw/new_data_table_no_ner.csv" #output_file = "../data/query/new_hier/reduced_cells" dt = DataTable(data_file, hier_meta_dict) queries = [] queries.append({'Location':'Illinois'}) queries.append({'Location':'Illinois', 'Topic':'Sports'}) #queries.append({'Location':'New York'}) queries.append({'Location':'China'}) queries.append({'Location':'Russia'}) queries.append({'Location':'Japan'}) queries.append({'Location':'North Korea'}) queries.append({'Topic':'Asia Pacific'}) queries.append({'Topic':'Africa'}) queries.append({'Topic':'Gay Right'}) queries.append({'Location':'Syria'}) #queries.append({'Location':'Syria', 'Topic':'Military'}) => 0 doc queries.append({'Location':'United States of America', 'Topic':'Military'})