def __init__(self, *args, **kwargs): super(OpenClEncoder, self).__init__(*args, **kwargs) # We start with an empty vocabulary and build it from inputs. self.lexer = lexers.Lexer(type=lexers.LexerType.OPENCL, vocabulary={}, max_encoded_length=100000) # Map relpath -> src. df = make_devmap_dataset.MakeGpuDataFrame( opencl_device_mapping_dataset.OpenClDeviceMappingsDataset().df, "amd_tahiti_7970", ) relpath_to_src = { row["relpath"]: row["program:opencl_src"] for _, row in df.iterrows() } # Map relpath -> bytecode ID. with self.ir_db.Session() as session: relpath_to_id = { row.relpath: row.id for row in session.query( ir_database.IntermediateRepresentation.id, ir_database.IntermediateRepresentation.relpath, ).filter( ir_database.IntermediateRepresentation.source_language == ir_database.SourceLanguage.OPENCL, ir_database.IntermediateRepresentation. compilation_succeeded == True, ir_database.IntermediateRepresentation.source == "pact17_opencl_devmap", ir_database.IntermediateRepresentation.relpath.in_( relpath_to_src.keys()), ) } not_found = set(relpath_to_src.keys()) - set(relpath_to_id.keys()) if not_found: raise OSError( f"{humanize.Plural(len(not_found), 'OpenCL relpath')} not" " found in IR database") # Encode the OpenCL sources. sorted_id_src_pairs: List[Tuple[int, str]] = { (relpath_to_id[relpath], relpath_to_src[relpath]) for relpath in sorted(relpath_to_src.keys()) } sorted_encodeds: List[np.array] = self.lexer.Lex( [src for id, src in sorted_id_src_pairs]) self._max_encoded_length = max( len(encoded) for encoded in sorted_encodeds) # Map id -> encoded. self.id_to_encoded = { id: encoded for (id, _), encoded in zip(sorted_id_src_pairs, sorted_encodeds) }
def opencl_relpaths() -> Set[str]: opencl_df = make_devmap_dataset.MakeGpuDataFrame( opencl_device_mapping_dataset.OpenClDeviceMappingsDataset().df, "amd_tahiti_7970", ) return set(opencl_df.relpath.values)