def __init__(self, config: Config, callback=None): self.config = config self.count: int = 0 self.callback = self.result_callback if callback is None else callback # prepare all words and hash values: self.groups: tuple = ordered_unique( list_to_bytes( chain([ self._prep_group(g, True) for g in self.config.groups_raw ], [self._prep_group(g, False) for g in self.config.groups]))) or (b'', ) self.words: tuple = ordered_unique( list_to_bytes( chain( *[ self._prep_word(w, True, False) for w in self.config.words_raw ], *[ self._prep_word(w, False, False) for w in self.config.words ]))) self.words_tr: tuple = ordered_unique( list_to_bytes( chain( *[ self._prep_word(w, True, True) for w in self.config.words_raw ], *[ self._prep_word(w, False, True) for w in self.config.words ]))) if len(self.words_tr) == len(self.words): self.words_tr = self.words # nothing changed, keep original instance self.prefixes: tuple = ordered_unique( list_to_bytes(self.config.prefixes)) or (b'', ) self.postfixes: tuple = ordered_unique( list_to_bytes(self.config.postfixes)) or (b'', ) if self.is_product_groups: self.targets: dict = dict( (t, (t, b'', b'')) for t in self.config.targets) elif self.is_product_postfixes: self.targets: dict = dict( (invhash32(g, t), (t, g, b'')) for t, g in product(self.config.targets, self.groups)) else: self.targets: dict = dict( (invhash32(p + g, t), (t, g, p)) for t, p, g in product( self.config.targets, self.postfixes, self.groups)) self.init: int = 0 if self.is_product_prefixes else hash32( self.prefixes[0])
def _handle_result(self, words: tuple): self.count += 1 value: int = hash32(b''.join(words), self.init) result = self.targets[value][0] group, postfix = list_to_str(self.targets[value][1:3]) prefix = to_str(self.prefixes[0]) words = list_to_str(words) if self.is_product_groups: (group, postfix), words = words[-2:], words[:-2] elif self.is_product_postfixes: postfix, words = words[-1], words[:-1] if self.is_product_prefixes: prefix, words = words[0], words[1:] self.callback(self, len(words), prefix, words, postfix, group, result)
def scan_mjs(filename:str, *, debug_mode:bool=False, pre_greedy:bool=False): mjsreader = MjsReader(filename, encoding='utf-8', debug_mode=debug_mode, pre_greedy=pre_greedy) mjsreader.read() source = os.path.basename(filename) for sig in [*mjsreader.var_hashes.values(), *mjsreader.func_hashes.values()]: hashvalue = ' ' if sig.hash is None else f'{sig.hash:08x}' name = ' ' if sig.name is None else sig.name typename = ' ' if isinstance(sig, FunctionSig) and sig.is_void: typename = Typedef.VOID.value else: NAMES = ('VOID', 'INT', 'FLOAT', 'STRING', 'INT_ARRAY', 'FLOAT_ARRAY', 'STRING_ARRAY') for N in NAMES: if sig.type.name == N: if sig.type is MjoType.INT: typename = Typedef.INT_UNK.value else: typename = getattr(Typedef, N).value break # if sig.type.name == Typedef.INT.name: typename = Typedef.INT.value # typename = ' ' if sig.type is None else sig.type.value print(f'{S.BRIGHT}{F.RED}{hashvalue}{S.RESET_ALL}\t{S.BRIGHT}{F.CYAN}{source}{S.RESET_ALL}\t{S.BRIGHT}{F.BLUE}{typename}{S.RESET_ALL}\t{S.BRIGHT}{F.YELLOW}{name}{S.RESET_ALL}', end='') if sig.group is not None: if sig.group == GROUP_LOCAL: groupname = '@' else: groupname = '' if not sig.group else f'@{sig.group}' print(f'{S.DIM}{F.GREEN}{groupname}{S.RESET_ALL}', end='') if isinstance(sig, FunctionSig): # if not sig.arguments: # print(f'\t{""}', end='') # else: # print(f'\t{sig.args_str}', end='') if not sig.arguments: print(f'({S.BRIGHT}{F.BLUE}void{S.RESET_ALL})', end='') else: print(f'({S.BRIGHT}{F.CYAN}{sig.args_str}{S.RESET_ALL})', end='') print() for grp in mjsreader.group_names: hashvalue = hash32(f'$main@{grp}') name = grp print(f'{S.BRIGHT}{F.RED}{hashvalue:08x}{S.RESET_ALL}\t{S.BRIGHT}{F.CYAN}{source}{S.RESET_ALL}\t{S.BRIGHT}{F.GREEN}{name}{S.RESET_ALL}') print('Total:', len([*mjsreader.var_hashes.values(), *mjsreader.func_hashes.values(), *mjsreader.group_names]))
def read_file(reader: csv.DictReader): if args.write_unknown: unkwriter = open('syscalls_unknown_cached.txt', 'wt+', encoding='utf-8') if args.write_collisions: clnwriter = open('syscalls_collisions_cached.txt', 'wt+', encoding='utf-8') status_counts = OrderedDict( ) #[(c,0) for c in Status.__members__.values()]) return_counts = OrderedDict() keyword_counts = OrderedDict() letter_counts = OrderedDict() for row in reader: hashvalue: int = int(row[Field.HASH.value], 16) address: str = row[Field.ADDRESS.value] # pylint: disable=unused-variable retvalue: str = row[Field.RETURN.value] name: str = row[Field.NAME.value] arguments: str = row[Field.ARGUMENTS.value] # pylint: disable=unused-variable status: Status = Status(row[Field.STATUS.value]) notes: str = row[Field.NOTES.value] # pylint: disable=unused-variable # name corrections: if name and name[0] != '$': # syscalls don't include '$' prefix name = f'${name}' cleanname: str = name.strip('#@%$_') fullname = f'{name}@{GROUP_SYSCALL}' if retvalue in ( 'file', 'page', 'sprite'): # older sheets before adding '*' for ptr types retvalue = f'{retvalue}*' # name lookups: rettype = TYPEDEF_LOOKUP[retvalue] # if status not in (Status.UNHASHED, Status.CONFIRMED): if args.write_unknown and status not in (Status.UNHASHED, Status.CONFIRMED, Status.COLLISION): unkwriter.write(f'{hashvalue:08x} ') if args.write_collisions and status is Status.COLLISION: clnwriter.write(f'{hashvalue:08x} ') # validation/errors/warnings: if name and status in (Status.UNHASHED, Status.COLLISION, Status.LIKELY, Status.CONFIRMED): fullhash = hash32(fullname) if fullhash != hashvalue: print( f'{S.BRIGHT}{F.RED}ERROR:{S.RESET_ALL} hashvalue mismatch! {hashvalue:08x} vs {fullhash:08x} : {name}' ) if name and (True or status in (Status.UNHASHED, Status.COLLISION, Status.LIKELY, Status.CONFIRMED)): # postfix_str = MjoType.getpostfix_fromname(name) postfix = MjoType.frompostfix_name( name, allow_unk=True, allow_alt=True ) # allow '!' (for $rand!) and '~' (%Op_internalCase~) if rettype not in (Ellipsis, MjoType.UNKNOWN) and rettype != postfix: print( f'{S.BRIGHT}{F.RED}ERROR:{S.RESET_ALL} return/postfix mismatch! {hashvalue:08x} : {name}' ) # statistics: status_counts.setdefault(status, 0) status_counts[status] += 1 return_counts.setdefault(retvalue, 0) return_counts[retvalue] += 1 if status in (Status.UNHASHED, Status.COLLISION, Status.CONFIRMED): kwds = [n for n in cleanname.split('_') if n] for i, kwd in enumerate(kwds): # [total, prefix, middle, postfix] keyword_counts.setdefault(kwd, [0, 0, 0, 0]) keyword_counts[kwd][0] += 1 #NOTE: entire words are treated as prefix if i == 0: keyword_counts[kwd][1] += 1 elif i + 1 < len(kwds): keyword_counts[kwd][2] += 1 else: keyword_counts[kwd][3] += 1 # letter stats: for j, c in enumerate(kwd): # [total, word prefix, word middle, word postfix] letter_counts.setdefault(c, [0, 0, 0, 0]) letter_counts[c][0] += 1 if j == 0: letter_counts[c][1] += 1 elif j + 1 < len(kwd): letter_counts[c][2] += 1 else: letter_counts[c][3] += 1 if args.write_unknown: unkwriter.flush() unkwriter.close() del unkwriter if args.write_collisions: clnwriter.flush() clnwriter.close() del clnwriter # print statistics: max_len = max([len(k.value) for k in status_counts.keys()] + [len(k) for k in return_counts.keys()]) # print status statistics: if show_status: total = sum(status_counts.values()) total_cat = sum( [c for s, c in status_counts.items() if s is not Status.NONE]) print(f'{S.BRIGHT}{F.BLUE}CATEGORIES: [STATUS]{S.RESET_ALL}') for k in Status.__members__.values(): cnt = status_counts.get(k, 0) #for k,cnt in status_counts.items(): if cnt == 0: print(f'{S.BRIGHT}{F.BLACK}', end='') print(f' {k.value.ljust(max_len)} : {cnt:d}{S.RESET_ALL}') print( f'{S.BRIGHT}{F.WHITE} {"total".ljust(max_len)} : {total_cat:d}/{total:d}{S.RESET_ALL}' ) # print return type statistics: if show_returns: for t, keys in TYPEDEFS.items(): name = 'OTHER' if t is Ellipsis else t.name print(f'{S.BRIGHT}{F.BLUE}RETURNS: [{name}]{S.RESET_ALL}') for k in keys: cnt = return_counts.get( k, 0) # '*' now included for pointer int types if cnt == 0: print(f'{S.BRIGHT}{F.BLACK}', end='') print(f' {k.ljust(max_len)} : {cnt:d}{S.RESET_ALL}') # print keyword statistics max_kwd_len = max([len(k) for k in keyword_counts.keys()]) COLS = (f'{S.BRIGHT}{F.GREEN}', f'{S.BRIGHT}{F.YELLOW}', f'{S.BRIGHT}{F.RED}', f'{S.DIM}{F.BLACK}') if keywords_min is not None: # kwds_sorted = list(keyword_counts.keys()) # print(f' {S.BRIGHT}{F.BLUE}{"keyword".ljust(max_kwd_len)}{S.RESET_ALL} : {S.BRIGHT}{F.BLUE}total{S.RESET_ALL} {S.DIM}{F.GREEN}pre {F.YELLOW}mid {F.RED}post{S.RESET_ALL} {S.BRIGHT}{F.BLUE}i % $ # %# $#{S.RESET_ALL}') # print('{S.BRIGHT}{F.BLUE}',end='') # print('') # print(f' keyword : total pre mid post') # print(' sprite : 37 37 37 37') # """ # keyword : total pre mid post i % $ # %# $# # KEYWORDS: [37] # sprite : 37, [37, 0, 0 ] 00, # KEYWORDS: [28] # set : 28, [19, 2, 7 ] # KEYWORDS: [27] # """ keyword_counts_sorted = {} for k, cnt in keyword_counts.items(): keyword_counts_sorted.setdefault(cnt[0], []).append((k, cnt)) cnts = list(keyword_counts_sorted.keys()) cnts.sort(reverse=True) print() print( f' {S.BRIGHT}{F.BLUE}{"keyword".ljust(max_kwd_len)}{S.RESET_ALL} : {S.BRIGHT}{F.BLUE}total{S.RESET_ALL} {S.DIM}{F.GREEN}pre {F.YELLOW}mid {F.RED}post{S.RESET_ALL}' ) for cnt in cnts: if cnt <= keywords_min: continue print(f'{S.DIM}{F.CYAN}KEYWORDS: [{cnt}]{S.RESET_ALL}') kwds = keyword_counts_sorted[cnt] # kwds.sort() kwds.sort(key=lambda k: k[0]) for k, cnts in kwds: # print(f' {k.ljust(max_kwd_len)} : {cnt:<3d}{S.RESET_ALL} ',end='') # {cnts[1:]} cnts_parts = '' #[] # cnts_parts = [] for j, cntx in enumerate(cnts[1:]): comma = ',' if j < 2 else '' # just = 4 if j < 2 else 3 if not cntx: j, cntx = -1, '0' # j,cntx = -1,'-' # cnts_parts.append(f'{COLS[j]}{cntx:<3}{S.RESET_ALL}') # cnt_part = f"{cntx}{comma}".ljust(just) # cnts_parts += f'{COLS[j]}{f"{cntx}{comma}".ljust(just))}' cnts_parts += f'{COLS[j]}{cntx}{S.DIM}{F.BLACK}{comma}{S.RESET_ALL}' + ( ' ' * (3 - len(str(cntx)))) # cnts_parts.append(f'{COLS[j]}{cntx:<3}{S.RESET_ALL}') # cnts_parts += (',' if j < 2 else '') # print(f'{COLS[j]}{cntx:<3}{S.RESET_ALL} ', end='') # print(f'{(COLS[j] if cntx else COLS[-1])}{cntx:<3d}{S.RESET_ALL} ', end='') # print(f' {k.ljust(max_kwd_len)} : {f"{cnt},".ljust(4)}{S.RESET_ALL} [{" ".join(cnts_parts)}]') # print(f' {k.ljust(max_kwd_len)} : {f"{cnt},".ljust(4)}{S.RESET_ALL} [{cnts_parts}]') print( f' {k.ljust(max_kwd_len)} : {cnt}{S.DIM}{F.BLACK},{S.RESET_ALL}{"".ljust(3-len(str(cnt)))} [{cnts_parts}]' ) # print(f' {k.ljust(max_kwd_len)} : {cnt:<3d}{S.RESET_ALL} [{" ".join(cnts_parts)}]') # print() # for cnt,kwds in keyword_counts_sorted.items(): # print(f'{S.BRIGHT}{F.BLUE}KEYWORDS: [{cnt}]{S.RESET_ALL}') # kwds.sort() # print(f' {k.ljust(max_kwd_len)} : {cnt:d}{S.RESET_ALL}') # letter statistics: if letter_sort is not None: max_kwd_len = len("letter") # print(letter_counts) for c in string.ascii_lowercase + string.digits: letter_counts.setdefault( c, [0, 0, 0, 0]) # add any letters not appearing in syscalls letter_counts_alpha = list(letter_counts.items()) letter_counts_alpha.sort(key=lambda pair: pair[0]) #mean, median, mode, stdev, variance # def sort_letter(pair:tuple): # l:str = pair[0] # if l in string.ascii_lowercase: # return chr(ord(l) - ord('a')) # if l in string.ascii_lowercase: # return chr(ord(l) - ord('a')) # sort to place in order of: lowercase, uppercase, digits letter_counts_alpha.sort(key=lambda pair: ('\x80' + pair[0]) if pair[0].isdigit() else pair[0].swapcase()) letter_counts_ordered = list(letter_counts_alpha) letter_counts_ordered.sort(key=lambda pair: pair[1][0], reverse=True) letter_counts_nonzero_nums = [ cnts[0] for l, cnts in letter_counts_ordered if cnts[0] ] letter_counts_nonzero = [ l for l, cnts in letter_counts_ordered if cnts[0] ] letter_counts_zero = [ l for l, cnts in letter_counts_ordered if not cnts[0] ] if letter_sort is True: letter_counts_alpha = letter_counts_ordered # letter_counts_alpha = list(letter_counts_ordered) print() print(f'{S.DIM}{F.CYAN}LETTERS: [FREQUENCY]{S.RESET_ALL}') max_letter_count = max(letter_counts_nonzero_nums) print( f' appear: ', end='' ) #{S.BRIGHT}{F.WHITE}{"".join(letter_counts_nonzero)}{S.RESET_ALL}')#, end='') # cnt_mode = -1 for l in letter_counts_nonzero: cnt = letter_counts[l][0] # if cnt_mode if cnt >= max_letter_count / 3: print(f'{S.BRIGHT}{F.WHITE}', end='') elif cnt >= max_letter_count / 6: print(f'{S.NORMAL}{F.WHITE}', end='') elif cnt >= max_letter_count / 25: print(f'{S.DIM}{F.WHITE}', end='') else: print(f'{S.BRIGHT}{F.BLACK}', end='') print(f'{l}{S.RESET_ALL}', end='') print() # print(f' appear: {S.BRIGHT}{F.WHITE}{"".join(letter_counts_nonzero)}{S.RESET_ALL}')#, end='') # print() print( f' never: {S.BRIGHT}{F.BLACK}{"".join(letter_counts_zero)}{S.RESET_ALL}' ) #, end='') # print() print(f' max: {max(letter_counts_nonzero_nums)}') print(f' median: {statistics.median(letter_counts_nonzero_nums)}') print(f' min: {min(letter_counts_nonzero_nums)}') print(f' sum: {sum(letter_counts_nonzero_nums)}') print(f' mean: {statistics.mean(letter_counts_nonzero_nums):g}') # print(f' mode: {repr(Counter(letter_counts_nonzero_nums).most_common(1)[0])[1:-1]}') print(f' stdev: {statistics.stdev(letter_counts_nonzero_nums):g}') print() for l, cnts in letter_counts_ordered: cnt = cnts[0] print( f' {S.BRIGHT}{F.BLUE}{"letter".ljust(max_kwd_len)}{S.RESET_ALL} : {S.BRIGHT}{F.BLUE}total{S.RESET_ALL} {S.DIM}{F.GREEN}pre {F.YELLOW}mid {F.RED}post{S.RESET_ALL}' ) print(f'{S.DIM}{F.CYAN}LETTERS: [COUNTS]{S.RESET_ALL}') for k, cnts in letter_counts_alpha: #string.ascii_lowercase: # cnts = letter_counts.get(k, (0, 0, 0, 0)) cnt = cnts[0] cnts_parts = '' #[] # cnts_parts = [] color = f'' if cnt else f'{S.BRIGHT}{F.BLACK}' for j, cntx in enumerate(cnts[1:]): comma = ',' if j < 2 else '' # just = 5 if j < 2 else 4 if not cntx: j, cntx = -1, '0' # j,cntx = -1,'-' # cnts_parts.append(f'{COLS[j]}{cntx:<3}{S.RESET_ALL}') # cnt_part = f"{cntx}{comma}".ljust(just) # cnts_parts += f'{COLS[j]}{f"{cntx}{comma}".ljust(just))}' cnts_parts += f'{COLS[j]}{cntx}{S.DIM}{F.BLACK}{comma}{S.RESET_ALL}' + ( ' ' * (4 - len(str(cntx)))) # cnts_parts.append(f'{COLS[j]}{cntx:<3}{S.RESET_ALL}') # cnts_parts += (',' if j < 2 else '') # print(f'{COLS[j]}{cntx:<3}{S.RESET_ALL} ', end='') # print(f'{(COLS[j] if cntx else COLS[-1])}{cntx:<3d}{S.RESET_ALL} ', end='') # print(f' {k.ljust(max_kwd_len)} : {f"{cnt},".ljust(4)}{S.RESET_ALL} [{" ".join(cnts_parts)}]') # print(f' {k.ljust(max_kwd_len)} : {f"{cnt},".ljust(4)}{S.RESET_ALL} [{cnts_parts}]') print( f' {color}{k.ljust(max_kwd_len)} : {cnt}{S.DIM}{F.BLACK},{S.RESET_ALL}{"".ljust(4-len(str(cnt)))} {color}[{S.RESET_ALL}{cnts_parts}{color}]{S.RESET_ALL}' )
def main(argv: list = None) -> int: ## PARSER SETUP ## DEFAULT_MJS = [ f'../data/mjs/{f}' for f in [ 'adv.mjh.bak', 'console.mjs.old', 'console.mjs.old2', ] ] # scenario.arc/originals.7z/* from "Ame no Marginal -Rain Marginal-"" (EN) # this data is not included in the repository for obvious reasons AME_ORIGINALS = [ f'../data/copyright/ame_originals/{f}' for f in [ 'おまけ.txt', 'オリジナル0.txt', 'オリジナル01.txt', 'オリジナル02.txt', 'オリジナル03.txt', 'オリジナル04.txt', 'オリジナル05.txt', 'オリジナル06.txt', 'オリジナル07.txt', 'オリジナル08.txt', 'オリジナル09.txt', 'オリジナル010.txt', 'スタッフとも.txt', ] ] import argparse parser = argparse.ArgumentParser( description= 'Read Majiro Engine hashes and names from a multitude of files and write to py+json output files', add_help=True) # pgroup = parser.add_mutually_exclusive_group() parser.add_argument('-i', '--input', dest='inputs', metavar='MJSFILE', nargs='+', default=[], required=False, help='parse hashes and groups from mjs/mjh files') parser.add_argument( '-I', '--input-mjs', dest='inputs2', action='store_const', const=DEFAULT_MJS, default=[], required=False, help='parse hashes and groups from all repo mjs/mjh files') parser.add_argument( '-A', '--ame-mjs', dest='inputs3', action='store_const', const=AME_ORIGINALS, default=[], required=False, help= 'parse hashes and groups from all repo \"Ame no Marginal original\" script files (not included)' ) parser.add_argument( '-P', '--python', dest='python', default=False, action='store_true', required=False, help='read hashes from python mjotool.known_hashes module') parser.add_argument( '-G', '--google', dest='google', default=False, action='store_true', required=False, help='read hashes, groups, and callbacks from Google Sheets') parser.add_argument( '-U', '--update', dest='update', default=False, action='store_true', required=False, help='update Google Sheet cached files and always download new copies') parser.add_argument('-H', '--hashes', metavar='JSONFILE', nargs='+', default=[], required=False, help='parse user-defined hashes from json files') parser.add_argument('-s', '--syscalls', metavar='JSONFILE', nargs='+', default=[], required=False, help='parse syscall hashes from json files') parser.add_argument('-g', '--groups', metavar='JSONFILE', nargs='+', default=[], required=False, help='parse groups from json files') parser.add_argument('-c', '--callbacks', metavar='JSONFILE', nargs='+', default=[], required=False, help='parse callbacks from json files') parser.add_argument( '-a', '--all', metavar='JSONFILE', nargs='+', default=[], required=False, help= 'parse "syscalls", "functions", "variables", and "groups" from json files' ) pgroup = parser.add_mutually_exclusive_group() pgroup.add_argument('--csv', dest='format', default='csv', action='store_const', const='csv', required=False, help='csv Google Sheets format') pgroup.add_argument('--tsv', dest='format', action='store_const', const='tsv', required=False, help='tsv Google Sheets format') parser.add_argument('-q', '--quiet-includes', dest='verbose_includes', action='store_false', default=True, required=False, help='disable printing of included sources') parser.add_argument( '-T', '--test', dest='test_name', action='store_const', default='', const='__test', required=False, help='write hashes to files with \"__test\" appended to the name') ########################################################################### args = parser.parse_args(argv) # print(args) # return 0 ## VARIABLE SETUP ## callback_names: Set[str] = set() group_names: Set[str] = set() var_hashes: Dict[int, str] = {} func_hashes: Dict[int, str] = {} sys_hashes: Dict[int, str] = {} sys_list: List[int] = [] # predefined known hashes that won't show up as declarations EXTRA_VAR_HASHES = {0xa704bdbd: "__SYS__NumParams@"} EXTRA_FUNC_HASHES = {} EXTRA_SYS_HASHES = {} # nothing yet var_hashes.update(EXTRA_VAR_HASHES) func_hashes.update(EXTRA_FUNC_HASHES) sys_hashes.update(EXTRA_SYS_HASHES) ########################################################################### ## DEFAULT BEHAVIOR ## if not args.google and not args.python and not ( args.inputs + args.inputs2 + args.inputs3 + args.hashes + args.syscalls + args.groups + args.callbacks + args.all): for scriptfile in DEFAULT_MJS: load_mjs_hashes(scriptfile, var_hashes, func_hashes, group_names, verbose=args.verbose_includes) ## LOAD FILES ## for scriptfile in set(args.inputs + args.inputs2 + args.inputs3): load_mjs_hashes(scriptfile, var_hashes, func_hashes, group_names, verbose=args.verbose_includes) for jsonfile in args.hashes: # combination of all user-defined hashes (these can be seperated later by the prefix) user_hashes: Dict[int, str] = {} load_json_hashes(jsonfile, user_hashes, verbose=args.verbose_includes) for k, v in user_hashes.items(): if v.fullname == '$': func_hashes[k] = v else: var_hashes[k] = v del user_hashes for jsonfile in args.syscalls: load_json_hashes(jsonfile, sys_hashes, verbose=args.verbose_includes) for jsonfile in args.groups: load_json_groups(jsonfile, group_names, verbose=args.verbose_includes) for jsonfile in args.callbacks: load_json_callbacks(jsonfile, callback_names, verbose=args.verbose_includes) for jsonfile in args.all: load_json_all(jsonfile, var_hashes, func_hashes, sys_hashes, group_names, callback_names, verbose=args.verbose_includes) if args.python: sys_list = [] load_python_hashes(var_hashes, func_hashes, sys_hashes, group_names, callback_names, sys_list, verbose=args.verbose_includes) if args.google: sys_list = [] load_sheets_all(var_hashes, func_hashes, sys_hashes, group_names, callback_names, sys_list, format=args.format, update=args.update, allow_collisions=('%Op_internalCase~@MAJIRO_INTER', ), verbose=args.verbose_includes) # add main function hashes for all known groups (even if they aren't used) for group in group_names: funcname = f'$main@{group}' func_hashes[hash32(funcname)] = funcname # generate hash lookups used for following types, # group hashes are stored as a hash with the `$main` function for easy `#group` preprocessor identification group_hashes: Dict[int, str] = dict( (hash32(f'$main@{g}'), g) for g in group_names) callback_hashes: Dict[int, str] = dict( (hash32(c), c) for c in callback_names) ########################################################################### # datasets to initialize with list comprehension def _fmt_local(name: str) -> str: return name[:-1] if (len(name) > 1 and name[-1] == '@') else name def _fmt_syscall(name: str) -> str: return name[:-len('@MAJIRO_INTER')] if name.endswith( '@MAJIRO_INTER') else name def _strip_group(name: str) -> str: idx_at = name.rfind('@', 1) return name[:idx_at] if (idx_at != -1) else name def _fmt_names(names: Dict[int, str], fmt_func) -> Dict[int, Tuple[str, str]]: return dict( (k, (fmt_func(v), _strip_group(v))) for k, v in names.items()) def fmt_locals(names: Dict[int, str]) -> Dict[int, Tuple[str, str]]: # pylint: disable=unused-variable return _fmt_names(names, _fmt_local) def fmt_syscalls(names: Dict[int, str]) -> Dict[int, Tuple[str, str]]: return _fmt_names(names, _fmt_syscall) def fmt_sort(names: Dict[int, str]) -> Dict[int, Tuple[str, str]]: return _fmt_names(names, str) def fmt_none(names: Dict[int, str]) -> Dict[int, Tuple[str, str]]: return dict((k, (v, v)) for k, v in names.items()) PY_HASHES = ( ('local_vars', var_hashes, ('_', ), fmt_sort, 'hashes for all four variable types: local, thread, savefile, persistent\n' ), #fmt_locals), ('thread_vars', var_hashes, ('%', ), fmt_sort, None), ('savefile_vars', var_hashes, ('@', ), fmt_sort, None), ('persistent_vars', var_hashes, ('#', ), fmt_sort, None), #('variables', var_hashes, (), fmt_sort, None), #('usercalls', func_hashes, (), fmt_sort, None), ('functions', func_hashes, (), fmt_sort, '\n==HR==\n\nhashes for user-defined and system-defined internal functions\n' ), ('syscalls', sys_hashes, (), fmt_syscalls, '\nsystem call hashes all use the group name `$syscall@MAJIRO_INTER`' ), ('groups', group_hashes, (), fmt_none, '\n==HR==\n\ngroup hashes are listed as the hash of `$main@GROUPNAME`,\n this is done in order to identify a file\'s common group from the entrypoint function hash' ), ('callbacks', callback_hashes, (), fmt_none, 'event "callback" names used with `$event_*` system calls'), ) JSON_HASHES = ( ('variables', var_hashes, (), fmt_sort, None), #('usercalls', func_hashes, (), fmt_sort, None), ('functions', func_hashes, (), fmt_sort, None), ('syscalls', sys_hashes, (), fmt_syscalls, None), ('groups', group_hashes, (), fmt_none, None), ('callbacks', callback_hashes, (), fmt_none, None), ) GROUPS = (( 'syscall', 'syscalls_list', sys_list, int, '==HR==\n\nlist of all system call hash values, whether or not a name is known for the hash' ), #('groups', 'groups', group_names, None), ) if not sys_list: GROUPS = () if args.verbose_includes: print() NAMES = (('syscalls', sys_hashes), ('functions', func_hashes), ('variables', var_hashes), ('groups', group_names), ('callbacks', callback_names)) print( f'{S.BRIGHT}{F.BLUE}Found:{S.RESET_ALL}', ', '.join(f"{S.BRIGHT}{F.WHITE}{len(v)} {k}{S.RESET_ALL}" for k, v in NAMES) + ',', f'and {S.BRIGHT}{F.WHITE}{len(sys_list)} total syscall hashes{S.RESET_ALL}' ) ########################################################################### print() # write python for our library hash_items = [ HashSelection( n.replace('_', ' ').rstrip('s'), n.upper(), f'{n.upper()}_LOOKUP', fn(d), p, c) for n, d, p, fn, c in PY_HASHES ] group_items = [ GroupSelection(n1.replace('_', ' ').rstrip('s'), n2.upper(), l, t, c) for n1, n2, l, t, c in GROUPS ] filename = f'../src/mjotool/known_hashes/_hashes{args.test_name}.py' print( f'{S.BRIGHT}{F.GREEN}Writing:{S.RESET_ALL} {S.BRIGHT}{F.BLUE}{filename}{S.RESET_ALL}' ) with open(filename, 'wt+', encoding='utf-8') as writer: write_python_file(writer, hash_items, group_items, readable=False, sort=True) writer.flush() # write compact af json for non-humans hash_items = [ HashSelection(n, n, f'{n}_lookup', fn(d), p, c) for n, d, p, fn, c in JSON_HASHES ] group_items = [ GroupSelection(n1, n2, l, t, c) for n1, n2, l, t, c in GROUPS ] filename = f'../data/known_hashes_compact{args.test_name}.json' print( f'{S.BRIGHT}{F.GREEN}Writing:{S.RESET_ALL} {S.BRIGHT}{F.CYAN}{filename}{S.RESET_ALL}' ) with open(filename, 'wt+', encoding='utf-8') as writer: write_json_file(writer, hash_items, group_items, tab='\t', readable=False, sort=True) writer.flush() # write """readable""" json for everybody else filename = f'../data/known_hashes_readable{args.test_name}.json' print( f'{S.BRIGHT}{F.GREEN}Writing:{S.RESET_ALL} {S.BRIGHT}{F.CYAN}{filename}{S.RESET_ALL}' ) with open(filename, 'wt+', encoding='utf-8') as writer: write_json_file(writer, hash_items, group_items, tab='\t', readable=True, sort=True) writer.flush() print(f'{S.BRIGHT}{F.WHITE}[Finished]{S.RESET_ALL}') return 0