def save_c_submit_code(data_df_list): create_table(COMPILE_SUCCESS_DATA_DBPATH, C_COMPILE_SUCCESS_RECORDS) result_list = [ data_df['gcc_compile_result'].map(lambda x: 1 if x else 0) for data_df in data_df_list ] count_list = [len(data_df) for data_df in data_df_list] success_res = np.sum(result_list) count_res = np.sum(count_list) print('success_res total: {}, total: {}'.format(success_res, count_res)) def trans(error_df, reverse_verdict, reverse_langdict): res = [ transform_data(row, reverse_verdict, reverse_langdict) for index, row in error_df.iterrows() ] return res reverse_verdict = reverse_dict(verdict) reverse_langdict = reverse_dict(langdict) data_items_list = [ trans(data_df, reverse_verdict, reverse_langdict) for data_df in data_df_list ] for data_items in data_items_list: insert_items(COMPILE_SUCCESS_DATA_DBPATH, C_COMPILE_SUCCESS_RECORDS, data_items)
def __init__( self, token_set: set, n_gram=1, ): """ :param token_set: a set of all characters """ self.BEGIN = "<BEGIN>" self.END = "<END>" self.UNK = "<UNK>" self.PAD = "<PAD>" self.BEGIN_TOKEN = "<BEGIN_TOKEN>" self.END_TOKEN = "<END_TOKEN>" self.n_gram = n_gram token_set = set( more_itertools.flatten( map(lambda x: list(self.preprocess_token(x)), token_set))) token_set = sorted(list(token_set)) self.id_to_character_dict = dict( list(enumerate(start=0, iterable=token_set))) self.id_to_character_dict[len(self.id_to_character_dict)] = self.UNK self.id_to_character_dict[len(self.id_to_character_dict)] = self.PAD self.id_to_character_dict[len( self.id_to_character_dict)] = self.BEGIN_TOKEN self.id_to_character_dict[len( self.id_to_character_dict)] = self.END_TOKEN self.character_to_id_dict = util.reverse_dict( self.id_to_character_dict)
def __init__(self, word_set: set, word_to_id_dict: dict, begin_tokens, end_tokens, unk_token, pad_token=None, hole_token=None, addition_tokens=None, add_position_to_dict=True): self.unk = unk_token self.pad = pad_token self.begin_tokens = begin_tokens self.end_tokens = end_tokens self.hole_token = hole_token self.addition_tokens = addition_tokens if addition_tokens is not None else [] if add_position_to_dict: position_tokens = set(begin_tokens) position_tokens |= set(end_tokens) if pad_token is not None: position_tokens |= {pad_token} position_tokens |= {unk_token} position_tokens |= set(self.addition_tokens) self.word_set = word_set | position_tokens for token in sorted(position_tokens): if token not in word_to_id_dict.keys(): word_to_id_dict[token] = len(word_to_id_dict) if hole_token is not None: word_to_id_dict[hole_token] = len(word_to_id_dict) self.word_to_id_dict = word_to_id_dict self.id_to_word_dict = util.reverse_dict(self.word_to_id_dict)
def __init__(self, production_list: typing.List,): self._token_set = set(i.strip() for i in more_itertools.collapse(production_list)) self._id_token_map = self._get_set_id_map(self._token_set) self._EMPTY = "<EMPTY>" #This token is used to indicate the stack is empty self._token_set.add(self._EMPTY) self._id_token_map[len(self._id_token_map)] = self._EMPTY self._token_id_map = util.reverse_dict(self._id_token_map) self._production_list = [Production(left, right, self._token_id_map) for left, right in production_list] self._id_production_map = self._get_set_id_map(self._production_list) self._production_id_map = util.reverse_dict(self._id_production_map) self._token_derivate_map = toolz.groupby(lambda x: x.left_id, self._production_list) self._string_production_map = {str(production): production for production in self._production_list} self._terminal_set = set(i.strip() for i in pycparser.c_lexer.CLexer.tokens) self._terminal_set.add(self.EMPTY) self._terminal_id_set = set(self._token_id_map[t] for t in self._terminal_set) self._match_terminal_node = self._create_matched_ternimal_node()
def __init__(self, word_set: set, word_to_id_dict: dict, begin_tokens, end_tokens, unk_token): self.unk = unk_token self.begin_tokens = begin_tokens self.end_tokens = end_tokens position_tokens = set(begin_tokens) position_tokens |= set(end_tokens) position_tokens |= {unk_token} self.word_set = word_set | position_tokens for token in sorted(position_tokens): word_to_id_dict[token] = len(word_to_id_dict) self.word_to_id_dict = word_to_id_dict self.id_to_word_dict = util.reverse_dict(self.word_to_id_dict)
def __init__(self, token_set: set, n_gram=1, embedding_shape=300): """ :param token_set: a set of all characters """ self.BEGIN = "<BEGIN>" self.END = "<END>" self.preprocess_token = lambda x: more_itertools.windowed( [self.BEGIN] + list(x) + [self.END], n_gram) self.preprocess_token_without_label = lambda x: more_itertools.windowed( list(x), n_gram) token_set = set( more_itertools.flatten( map(lambda x: list(self.preprocess_token(x)), token_set))) token_set = sorted(list(token_set)) self.id_to_character_dict = dict( list(enumerate(start=0, iterable=token_set))) self.character_to_id_dict = util.reverse_dict( self.id_to_character_dict) self.embedding_shape = embedding_shape
def __init__(self, embedding: WordEmbedding, word_set: set, use_position_label: bool, begin_tokens=None, end_tokens=None): self.unk = '<unk>' self.begin = ['<BEGIN>'] self.end = ['<END>'] if begin_tokens is not None: self.begin = begin_tokens if end_tokens is not None: self.end = end_tokens self.pad = '<PAD>' self.use_position_label = use_position_label word_set = sorted(set(word_set)) self.id_to_word_dict = dict(list(enumerate(word_set, start=2))) self.id_to_word_dict[0] = self.unk self.id_to_word_dict[1] = self.pad if use_position_label: for tok in self.begin: self.id_to_word_dict[len(self.id_to_word_dict)] = tok for tok in self.end: self.id_to_word_dict[len(self.id_to_word_dict)] = tok self.word_to_id_dict = util.reverse_dict(self.id_to_word_dict) print("The word vocabulary has {} words".format(len(self.word_to_id_dict))) self._embedding_matrix = np.array([embedding[b] for a, b in sorted(self.id_to_word_dict.items(), key=lambda x:x[0])])
def __init__(self, word_set:set, word_to_id_dict: dict, begin_tokens, end_tokens, unk_token, addition_tokens=None, add_position_to_dict=True): self.unk = unk_token self.begin_tokens = begin_tokens self.end_tokens = end_tokens self.addition_tokens = addition_tokens if addition_tokens is not None else [] if add_position_to_dict: position_tokens = set(begin_tokens) position_tokens |= set(end_tokens) position_tokens |= {unk_token} position_tokens |= set(self.addition_tokens) self.word_set = word_set | position_tokens for token in sorted(position_tokens): word_to_id_dict[token] = len(word_to_id_dict) self.word_to_id_dict = word_to_id_dict self.id_to_word_dict = util.reverse_dict(self.word_to_id_dict)
'VOID', 'VOLATILE', 'WHILE', '__INT128', ) keyword_map = {} for keyword in keywords: if keyword == '_BOOL': keyword_map['_Bool'] = keyword elif keyword == '_COMPLEX': keyword_map['_Complex'] = keyword else: keyword_map[keyword.lower()] = keyword keyword_map = reverse_dict(keyword_map) operator_map = { 'PLUS': '+', 'MINUS': '-', 'TIMES': '*', 'DIVIDE': '/', 'MOD': '%', 'OR': '|', 'AND': '&', 'NOT': '~', 'XOR': '^', 'LSHIFT': '<<', 'RSHIFT': '>>', 'LOR': '||', 'LAND': '&&',