def hton_instance(g, instance, state_name): src = " (size_t size, void* pkt, void* buf) = inp();\n" src += " {0}* p = ({0}*) pkt;\n".format(state_name) types_fields = get_all_types_fields(g, state_name, 'p->') for type, field in types_fields: try: size = common.sizeof(type) if size == 2 or size == 4 or size == 8: src += " {0} = {1}({0});\n".format(field, size2convert[size]) except: pass element = instance.element new_element = element.clone( + "_" + state_name) new_element.code = src instance.element = new_element
def gen_bench_from_code(f, typ, code, bench_with_timestamp): header = '' header += common.pprint_includes(f.gen_includes(_lang)) header += \ ''' // Required for random generation #include "../benches.hpp" // Google benchmark #ifndef DISABLE_GOOGLE_BENCHMARK #include <benchmark/benchmark.h> #endif #include <ctime> double timestamp_ns() { timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return double(ts.tv_sec) * 1000000000.0 + double(ts.tv_nsec); } // std #include <cmath> // #include <map> #include <numeric> // #include <fstream> // Sleef #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wignored-qualifiers" #include <sleef.h> #pragma GCC diagnostic pop // MIPP #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #pragma GCC diagnostic ignored "-Wsign-conversion" #pragma GCC diagnostic ignored "-Wdouble-promotion" #pragma GCC diagnostic ignored "-Wunused-parameter" #if defined(__clang__) #pragma GCC diagnostic ignored "-Wzero-length-array" #endif #include <mipp.h> #pragma GCC diagnostic pop ''' return \ '''{header} // ------------------------------------------------------------------------- static const int sz = 1024; template <typename Random> static {type}* make_data(int sz, Random r) {{ {type}* data = ({type}*)nsimd_aligned_alloc(sz * {sizeof}); for (int i = 0; i < sz; ++i) {{ data[i] = r(); }} return data; }} static {type}* make_data(int sz) {{ {type}* data = ({type}*)nsimd_aligned_alloc(sz * {sizeof}); for (int i = 0; i < sz; ++i) {{ data[i] = {type}(0); }} return data; }} {random_code} {code} int main(int argc, char** argv) {{ std::vector<std::string> args(argv, argv + argc); if (std::find(args.begin(), args.end(), "--use_timestamp_ns") != args.end()) {{ {bench_with_timestamp} }} #ifndef DISABLE_GOOGLE_BENCHMARK else {{ ::benchmark::Initialize(&argc, argv); ::benchmark::RunSpecifiedBenchmarks(); }} #endif return 0; }} '''.format(, type=typ,, random_code=f.domain.code('rand_param', typ), code=code, bench_with_timestamp=bench_with_timestamp, sizeof=common.sizeof(typ), header=header, )
def gen_reinterpret_convert(opts, op, from_typ, to_typ, lang): filename = get_filename(opts, op, '{}_to_{}'.format(from_typ, to_typ), lang) if filename == None: return logical = 'l' if == 'reinterpretl' else '' if lang == 'c_base': if == 'upcvt': comp = '''{{ vecx2({to_typ}) tmp = vupcvt(vload{logical}a(in, {from_typ}), {from_typ}, {to_typ}); vstore{logical}a(out, vdowncvt( tmp.v0, tmp.v1, {to_typ}, {from_typ}), {from_typ}); }}'''.format(, from_typ=from_typ, to_typ=to_typ, logical=logical) else: comp = '''vstore{logical}a(out, v{op_name}(v{op_name}( vload{logical}a(in, {from_typ}), {from_typ}, {to_typ}), {to_typ}, {from_typ}), {from_typ});'''. \ format(, from_typ=from_typ, to_typ=to_typ, logical=logical) elif lang == 'cxx_base': if == 'upcvt': comp = '''vecx2({to_typ}) tmp = nsimd::upcvt(nsimd::load{logical}a( in, {from_typ}()), {from_typ}(), {to_typ}()); nsimd::store{logical}a(out, nsimd::downcvt( tmp.v0, tmp.v1, {to_typ}(), {from_typ}()), {from_typ}());'''. \ format(, from_typ=from_typ, to_typ=to_typ, logical=logical) else: comp = '''nsimd::store{logical}a(out, nsimd::{op_name}( nsimd::{op_name}(nsimd::load{logical}a( in, {from_typ}()), {from_typ}(), {to_typ}()), {to_typ}(), {from_typ}()), {from_typ}());'''. \ format(, from_typ=from_typ, to_typ=to_typ, logical=logical) else: if == 'upcvt': comp = \ '''nsimd::packx2<{to_typ}> tmp = nsimd::upcvt< nsimd::pack{logical}x2<{to_typ}> >(nsimd::load{logical}a< nsimd::pack{logical}<{from_typ}> >(in)); nsimd::store{logical}a(out, nsimd::downcvt< nsimd::pack{logical}<{from_typ}> >(tmp.v0, tmp.v1));'''. \ format(, from_typ=from_typ, to_typ=to_typ, logical=logical) else: comp = \ '''nsimd::store{logical}a(out, nsimd::{op_name}< nsimd::pack{logical}<{from_typ}> >(nsimd::{op_name}< nsimd::pack{logical}<{to_typ}> >(nsimd::load{logical}a< nsimd::pack{logical}<{from_typ}> >(in))));'''. \ format(, from_typ=from_typ, to_typ=to_typ, logical=logical) if logical == 'l': rand = '(rand() % 2)' else: if == 'reinterpret' and to_typ == 'f16' and \ from_typ in ['i16', 'u16']: rand = '(15360 /* no denormal */ | (1 << (rand() % 4)))' else: if to_typ in common.utypes or from_typ in common.utypes: rand = '(1 << (rand() % 4))' else: rand = '((2 * (rand() % 2) - 1) * (1 << (rand() % 4)))' if from_typ == 'f16': rand = 'nsimd_f32_to_f16((f32){});'.format(rand) neq_test = '(*(u16*)&in[j]) != (*(u16*)&out[j])' else: rand = '({}){}'.format(from_typ, rand) neq_test = 'in[j] != out[j]' with common.open_utf8(filename) as out: out.write('''{includes} #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} int main(void) {{ int i, j; {from_typ} *in, *out; int len = vlen({from_typ}); fprintf(stdout, "test of {op_name} from {from_typ} to {to_typ}...\\n"); CHECK(in = ({from_typ}*)nsimd_aligned_alloc(len * {sizeof})); CHECK(out = ({from_typ}*)nsimd_aligned_alloc(len * {sizeof})); for (i = 0; i < 100; i++) {{ for (j = 0; j < len; j++) {{ in[j] = {rand}; }} {comp} for (j = 0; j < len; j++) {{ if ({neq_test}) {{ exit(EXIT_FAILURE); }} }} }} fprintf(stdout, "test of {op_name} from {from_typ} to {to_typ}... OK\\n"); return EXIT_SUCCESS; }}'''.format(includes=get_includes(lang),, to_typ=to_typ, from_typ=from_typ, comp=comp,, rand=rand, neq_test=neq_test, sizeof=common.sizeof(from_typ))) common.clang_format(opts, filename)
def gen_nbtrue(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return if lang == 'c_base': nbtrue = 'vnbtrue(vloadla(buf, {}), {})'. \ format(typ, typ, typ) elif lang == 'cxx_base': nbtrue = 'nsimd::nbtrue(nsimd::loadla(buf, {}()), {}())'. \ format(typ, typ) else: nbtrue = 'nsimd::nbtrue(nsimd::loadla<nsimd::packl<{}> >(buf))'. \ format(typ) if typ == 'f16': scalar0 = 'nsimd_f32_to_f16(0)' scalar1 = 'nsimd_f32_to_f16(1)' else: scalar0 = '({})0'.format(typ) scalar1 = '({})1'.format(typ) with common.open_utf8(filename) as out: out.write('''{includes} #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} int main(void) {{ int i; {typ} *buf; int len = vlen({typ}); fprintf(stdout, "test of {op_name} over {typ}...\\n"); CHECK(buf = ({typ}*)nsimd_aligned_alloc(len * {sizeof})); /* Test with all elements to true */ for (i = 0; i < len; i++) {{ buf[i] = {scalar1}; }} if ({nbtrue} != len) {{ exit(EXIT_FAILURE); }} /* Test with all elements to false */ for (i = 0; i < len; i++) {{ buf[i] = {scalar0}; }} if ({nbtrue} != 0) {{ exit(EXIT_FAILURE); }} /* Test with only one element to true */ buf[0] = {scalar1}; if ({nbtrue} != 1) {{ exit(EXIT_FAILURE); }} fprintf(stdout, "test of {op_name} over {typ}... OK\\n"); return EXIT_SUCCESS; }}'''.format(includes=get_includes(lang),, typ=typ, nbtrue=nbtrue,, notl='!' if == 'any' else '', scalar0=scalar0, scalar1=scalar1, sizeof=common.sizeof(typ))) common.clang_format(opts, filename)
def gen_load_store(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return if'load'): deg =[4] align =[5] elif'store'): deg =[5] align =[6] variables = ', '.join(['v.v{}'.format(i) for i in range(0, int(deg))]) if lang == 'c_base': load_store = \ '''vecx{deg}({typ}) v = vload{deg}{align}(&vin[i], {typ}); vstore{deg}{align}(&vout[i], {variables}, {typ});'''. \ format(deg=deg, typ=typ, align=align, variables=variables) elif lang == 'cxx_base': load_store = \ '''vecx{deg}({typ}) v = nsimd::load{deg}{align}(&vin[i], {typ}()); nsimd::store{deg}{align}(&vout[i], {variables}, {typ}());'''. \ format(deg=deg, typ=typ, align=align, variables=variables) else: load_store = \ '''nsimd::packx{deg}<{typ}> v = nsimd::load{deg}{align}< nsimd::packx{deg}<{typ}> >(&vin[i]); nsimd::store{deg}{align}(&vout[i], {variables});'''. \ format(deg=deg, typ=typ, align=align, variables=variables) if typ == 'f16': rand = '*((u16*)vin + i) = nsimd_f32_to_u16((float)(rand() % 10));' comp = '*((u16 *)vin + i) != *((u16 *)vout + i)' else: rand = 'vin[i] = ({})(rand() % 10);'.format(typ) comp = 'vin[i] != vout[i]' with common.open_utf8(filename) as out: out.write('''{includes} #define SIZE (2048 / {sizeof}) #define STATUS "test of {op_name} over {typ}" #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} int main(void) {{ int i, vi; {typ} *vin, *vout; int len = vlen({typ}); int n = SIZE * {deg} * len; fprintf(stdout, "test of {op_name} over {typ}...\\n"); CHECK(vin = ({typ}*)nsimd_aligned_alloc(n * {sizeof})); CHECK(vout = ({typ}*)nsimd_aligned_alloc(n * {sizeof})); /* Fill with random data */ for (i = 0; i < n; i++) {{ {rand} }} /* Load and put back data into vout */ for (i = 0; i < n; i += {deg} * len) {{ {load_store} }} /* Compare results */ for (vi = 0; vi < SIZE; vi += len) {{ for (i = vi; i < vi + len; i++) {{ if ({comp}) {{ fprintf(stdout, STATUS "... FAIL\\n"); fflush(stdout); return -1; }} }} }} fprintf(stdout, "test of {op_name} over {typ}... OK\\n"); return EXIT_SUCCESS; }}'''.format(includes=get_includes(lang),, typ=typ, rand=rand,, deg=deg, sizeof=common.sizeof(typ), load_store=load_store, comp=comp)) common.clang_format(opts, filename)
def gen_addv(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return if lang == 'c_base': op_test = 'v{}(vloada(buf, {}), {})'.format(, typ, typ) extra_code = relative_distance_c elif lang == 'cxx_base': op_test = 'nsimd::{}(nsimd::loada(buf, {}()), {}())'.format(, typ, typ) extra_code = relative_distance_cpp else: op_test = 'nsimd::{}(nsimd::loada<nsimd::pack<{}>>(buf))'.format(, typ) extra_code = relative_distance_cpp nbits = {'f16': '10', 'f32': '21', 'f64': '48'} head = '''#define _POSIX_C_SOURCE 200112L {includes} #include <float.h> #include <math.h> #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} {extra_code}'''.format(, includes=get_includes(lang), extra_code=extra_code) if typ == 'f16': # Variables initialization init = '''f16 res = nsimd_f32_to_f16(0.0f); f32 ref = 0.0f;''' rand = '''nsimd_f32_to_f16((f32)(2 * (rand() % 2) - 1) * (f32)(1 << (rand() % 4)) / (f32)(1 << (rand() % 4)))''' init_statement = 'buf[i] = {};'.format(rand) ref_statement = 'ref += nsimd_u16_to_f32(((u16 *)buf)[i]);' test = '''if (relative_distance((double) ref, (double) nsimd_f16_to_f32(res)) > get_2th_power(-{nbits})) {{ return EXIT_FAILURE; }}'''.format(nbits=nbits[typ]) else: init = '''{typ} ref = ({typ})0; {typ} res = ({typ})0;'''.format(typ=typ) rand = '''({typ})(2 * (rand() % 2) - 1) * ({typ})(1 << (rand() % 4)) / ({typ})(1 << (rand() % 4))'''.format(typ=typ) init_statement = 'buf[i] = {};'.format(rand) ref_statement = 'ref += buf[i];' test = '''if (relative_distance((double)ref, (double)res) > get_2th_power(-{nbits})) {{ return EXIT_FAILURE; }}'''.format(nbits=nbits[typ]) with common.open_utf8(filename) as out: out.write(''' \ {head} int main(void) {{ const int len = vlen({typ}); {typ} *buf; int i; {init} fprintf(stdout, "test of {op_name} over {typ}...\\n"); CHECK(buf = ({typ} *)nsimd_aligned_alloc(len * {sizeof})); for(i = 0; i < len; i++) {{ {init_statement} }} for(i = 0; i < len; i++) {{ {ref_statement} }} res = {op_test}; {test} fprintf(stdout, "test of {op_name} over {typ}... OK\\n"); return EXIT_SUCCESS; }} '''.format(head=head, init=init,, typ=typ, sizeof=common.sizeof(typ), init_statement=init_statement, ref_statement=ref_statement, op_test=op_test, test=test)) common.clang_format(opts, filename)
def gen_test(opts, op, typ, lang, ulps): filename = get_filename(opts, op, typ, lang) if filename == None: return content = get_content(op, typ, lang) if in ['not', 'and', 'or', 'xor', 'andnot']: comp = 'return *({uT}*)&mpfr_out != *({uT}*)&nsimd_out'. \ format(uT=common.bitfield_type[typ]) else: if typ == 'f16': left = '(double)nsimd_f16_to_f32(mpfr_out)' right = '(double)nsimd_f16_to_f32(nsimd_out)' elif typ == 'f32': left = '(double)mpfr_out' right = '(double)nsimd_out' else: left = 'mpfr_out' right = 'nsimd_out' relative_distance = relative_distance_c if lang == 'c_base' \ else relative_distance_cpp if op.tests_ulps: comp = 'return relative_distance({}, {}) > get_2th_power(-{nbits})'. \ format(left, right, nbits='11' if typ != 'f16' else '9') extra_code = relative_distance elif op.src: if in ulps: nbits = ulps[][typ]["ulps"] nbits_dnz = ulps[][typ]["ulps for denormalized output"] inf_error = ulps[][typ]["Inf Error"] nan_error = ulps[][typ]["NaN Error"] comp = '''#pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #pragma GCC diagnostic ignored "-Wdouble-promotion" ''' if nan_error: # Ignore error with NaN output, we know we will encounter some comp += 'if ({isnan}((double){left})) return 0;\n' else: # Return false if one is NaN and not the other comp += 'if ({isnan}((double){left}) ^ isnan({rigth})) return 1;\n' if inf_error: # Ignore error with infinite output, we know we will encounter some comp += 'if ({isinf}((double){left})) return 0;\n' else: # One is infinite and not the other comp += 'if ({isinf}((double){left}) ^ {isinf}((double){rigth})) return 1;\n' # Wrong sign for infinite comp += 'if ({isinf}((double){left}) && {isinf}((double){rigth}) \ && ({right}*{left} < 0)) \ return 1;\n' comp += ''' if ({isnormal}((double){left})) {{ return relative_distance({left}, {right}) > get_2th_power(-({nbits})); }} else {{ return relative_distance({left}, {right}) > get_2th_power(-({nbits_dnz})); }} #pragma GCC diagnostic pop ''' if lang == 'c_base': comp = comp.format(left=left, right=right, nbits=nbits, nbits_dnz=nbits_dnz, isnormal='isnormal', isinf='isinf', isnan='isnan') else: comp = comp.format(left=left, right=right, nbits=nbits, nbits_dnz=nbits_dnz, isnormal='std::isnormal', isinf='std::isinf', isnan='std::isnan') else: nbits = {'f16': '10', 'f32': 21, 'f64': '48'} comp = 'return relative_distance({}, {}) > get_2th_power(-{nbits})'. \ format(left, right, nbits=nbits[typ]) extra_code = relative_distance else: comp = 'return {} != {}'.format(left, right) extra_code = '' includes = get_includes(lang) if op.src or op.tests_ulps or op.tests_mpfr: if lang == 'c_base': includes = '''#define _POSIX_C_SOURCE 200112L #include <math.h> #include <float.h> {}'''.format(includes) else: includes = '''#define _POSIX_C_SOURCE 200112L #include <cmath> #include <cfloat> {}'''.format(includes) if op.tests_mpfr and sys.platform.startswith('linux'): includes = includes + ''' #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wsign-conversion" #include <mpfr.h> #pragma GCC diagnostic pop ''' with common.open_utf8(filename) as out: out.write(template.format( \ includes=includes, sizeof=common.sizeof(typ), typ=typ,,, comp=comp, extra_code=extra_code, **content)) #vin_defi=content['vin_defi'], #vin_rand=content['vin_rand'], vout0_comp=content['vout0_comp'], #vout1_comp=content['vout1_comp'])) common.clang_format(opts, filename)
def get_content(op, typ, lang): cast = 'f32' if typ in ['f16', 'f32'] else 'f64' # By default we use emulation functions ("cpu" architecture) for testing # in which case increment is given by nsimd_cpu_len() cpu_step = 'nsimd_len_cpu_{}()'.format(typ) # For floatting points generate some non integer inputs if typ in common.iutypes: rand = '(1 << (rand() % 4))' else: if op.src: rand = '({cast})2 * ({cast})rand() / ({cast})RAND_MAX'. \ format(cast=cast) else: rand = '({cast})(1 << (rand() % 4)) / ({cast})(1 << (rand() % 4))'. \ format(cast=cast) # For signed types, make some positive and negative inputs if not in ['sqrt', 'rsqrt11'] and typ in common.itypes: rand = '(2 * (rand() % 2) - 1) * {}'.format(rand) if not in ['sqrt', 'rsqrt11'] and typ in common.ftypes: rand = '({})(2 * (rand() % 2) - 1) * {}'.format(cast, rand) # Depending on function parameters, generate specific input, ... if all(e == 'v' for e in op.params) or all(e == 'l' for e in op.params): logical = 'l' if op.params[0] == 'l' else '' if logical == 'l': rand = '(1 << (rand() % 2))' if typ != 'f16' \ else '(float)(1 << (rand() % 2))' nargs = range(1, len(op.params)) # Make vin_defi code = ['{} *vin{};'.format(typ, i) for i in nargs] code += ['CHECK(vin{} = ({}*)nsimd_aligned_alloc(SIZE * {}));'. \ format(i, typ, common.sizeof(typ)) for i in nargs] vin_defi = '\n'.join(code) if typ == 'f16': code = ['vin{}[i] = nsimd_f32_to_f16({});'. \ format(i, rand) for i in nargs] else: code = ['vin{}[i] = ({})({});'.format(i, typ, rand) for i in nargs] vin_rand = '\n'.join(code) # Make vout0_comp # We use MPFR on Linux to compare numerical results, but it is only on # Linux as MPFR does not play well on Windows. On Windows we compare # against the cpu implementation. When using MPFR, we set one element # at a time => cpu_step = '1' if op.tests_mpfr and sys.platform.startswith('linux'): cpu_step = '1' variables = ', '.join(['a{}'.format(i) for i in nargs]) mpfr_inits = '\n'.join(['mpfr_init2(a{}, 64);'.format(i) \ for i in nargs]) if typ == 'f16': mpfr_set = '''mpfr_set_flt(a{i}, nsimd_u16_to_f32( ((u16 *)vin{i})[i]), MPFR_RNDN);''' vout0_set = '''((u16 *)vout0)[i] = nsimd_f32_to_u16( mpfr_get_flt(c, MPFR_RNDN));''' elif typ == 'f32': mpfr_set = 'mpfr_set_flt(a{i}, vin{i}[i], MPFR_RNDN);' vout0_set = 'vout0[i] = mpfr_get_flt(c, MPFR_RNDN);' else: mpfr_set = 'mpfr_set_d(a{i}, vin{i}[i], MPFR_RNDN);' vout0_set = 'vout0[i] = mpfr_get_d(c, MPFR_RNDN);' mpfr_sets = '\n'.join([mpfr_set.format(i=j) for j in nargs]) mpfr_clears = '\n'.join(['mpfr_clear(a{});'.format(i) \ for i in nargs]) vout0_comp = \ '''mpfr_t c, {variables}; mpfr_init2(c, 64); {mpfr_inits} {mpfr_sets} {mpfr_op_name}(c, {variables}, MPFR_RNDN); {vout0_set} mpfr_clear(c); {mpfr_clears}'''. \ format(variables=variables, mpfr_sets=mpfr_sets, mpfr_clears=mpfr_clears, vout0_set=vout0_set, mpfr_op_name=op.tests_mpfr_name(), mpfr_inits=mpfr_inits) else: args = ', '.join(['va{}'.format(i) for i in nargs]) code = ['nsimd_cpu_v{}{} {}, vc;'.format(logical, typ, args)] code += ['va{} = nsimd_load{}u_cpu_{}(&vin{}[i]);'. \ format(i, logical, typ, i) for i in nargs] code += ['vc = nsimd_{}_cpu_{}({});'.format(, typ, args)] code += ['nsimd_store{}u_cpu_{}(&vout0[i], vc);'. \ format(logical, typ)] vout0_comp = '\n'.join(code) # Make vout1_comp args = ', '.join(['va{}'.format(i) for i in nargs]) if lang == 'c_base': code = ['vec{}({}) {}, vc;'.format(logical, typ, args)] code += ['va{} = vload{}u(&vin{}[i], {});'. \ format(i, logical, i, typ) for i in nargs] code += ['vc = v{}({}, {});'.format(, args, typ)] code += ['vstore{}u(&vout1[i], vc, {});'.format(logical, typ)] vout1_comp = '\n'.join(code) if lang == 'cxx_base': code = ['vec{}({}) {}, vc;'.format(logical, typ, args)] code += ['va{} = nsimd::load{}u(&vin{}[i], {}());'. \ format(i, logical, i, typ) for i in nargs] code += ['vc = nsimd::{}({}, {}());'.format(, args, typ)] code += ['nsimd::store{}u(&vout1[i], vc, {}());'. \ format(logical, typ)] vout1_comp = '\n'.join(code) if lang == 'cxx_adv': code = ['nsimd::pack{}<{}> {}, vc;'.format(logical, typ, args)] code += ['''va{i} = nsimd::load{logical}u< nsimd::pack{logical}<{typ}> >( &vin{i}[i]);'''. \ format(i=i, logical=logical, typ=typ) for i in nargs] if op.cxx_operator: if len(op.params[1:]) == 1: code += ['vc = {}va1;'. \ format(op.cxx_operator[8:])] if len(op.params[1:]) == 2: code += ['vc = va1 {} va2;'. \ format(op.cxx_operator[8:])] else: code += ['vc = nsimd::{}({});'.format(, args)] code += ['nsimd::store{}u(&vout1[i], vc);'.format(logical, typ)] vout1_comp = '\n'.join(code) elif op.params == ['l', 'v', 'v']: vin_defi = \ '''{typ} *vin1, *vin2; CHECK(vin1 = ({typ}*)nsimd_aligned_alloc(SIZE * {sizeof})); CHECK(vin2 = ({typ}*)nsimd_aligned_alloc(SIZE * {sizeof}));'''. \ format(typ=typ, sizeof=common.sizeof(typ)) if typ == 'f16': vin_rand = '''vin1[i] = nsimd_f32_to_f16((float)(rand() % 4)); vin2[i] = nsimd_f32_to_f16((float)(rand() % 4));''' else: vin_rand = '''vin1[i] = ({typ})(rand() % 4); vin2[i] = ({typ})(rand() % 4);'''.format(typ=typ) vout0_comp = '''nsimd_cpu_v{typ} va1, va2; nsimd_cpu_vl{typ} vc; va1 = nsimd_loadu_cpu_{typ}(&vin1[i]); va2 = nsimd_loadu_cpu_{typ}(&vin2[i]); vc = nsimd_{op_name}_cpu_{typ}(va1, va2); nsimd_storelu_cpu_{typ}(&vout0[i], vc);'''. \ format(typ=typ, if lang == 'c_base': vout1_comp = '''vec({typ}) va1, va2; vecl({typ}) vc; va1 = vloadu(&vin1[i], {typ}); va2 = vloadu(&vin2[i], {typ}); vc = v{op_name}(va1, va2, {typ}); vstorelu(&vout1[i], vc, {typ});'''. \ format(typ=typ, if lang == 'cxx_base': vout1_comp = '''vec({typ}) va1, va2; vecl({typ}) vc; va1 = nsimd::loadu(&vin1[i], {typ}()); va2 = nsimd::loadu(&vin2[i], {typ}()); vc = nsimd::{op_name}(va1, va2, {typ}()); nsimd::storelu(&vout1[i], vc, {typ}());'''. \ format(typ=typ, if lang == 'cxx_adv': if op.cxx_operator: do_computation = 'vc = va1 {} va2;'. \ format(op.cxx_operator[8:]) else: do_computation = 'vc = nsimd::{}(va1, va2, {}());'. \ format(, typ) vout1_comp = '''nsimd::pack<{typ}> va1, va2; nsimd::packl<{typ}> vc; va1 = nsimd::loadu<nsimd::pack<{typ}> >(&vin1[i]); va2 = nsimd::loadu<nsimd::pack<{typ}> >(&vin2[i]); {do_computation} nsimd::storelu(&vout1[i], vc);'''. \ format(typ=typ,, do_computation=do_computation) elif op.params == ['v', 'v', 'p']: vin_defi = \ '''{typ} *vin1; CHECK(vin1 = ({typ}*)nsimd_aligned_alloc(SIZE * {sizeof}));'''. \ format(typ=typ, sizeof=common.sizeof(typ)) vin_rand = 'vin1[i] = ({typ})(rand() % 4);'.format(typ=typ) vout0_comp = '''nsimd_cpu_v{typ} va1, vc; va1 = nsimd_loadu_cpu_{typ}(&vin1[i]); vc = nsimd_{op_name}_cpu_{typ}(va1, (i / step) % 7); nsimd_storeu_cpu_{typ}(&vout0[i], vc);'''. \ format(typ=typ, if lang == 'c_base': vout1_comp = '''vec({typ}) va1, vc; va1 = vloadu(&vin1[i], {typ}); vc = v{op_name}(va1, (i / step) % 7, {typ}); vstoreu(&vout1[i], vc, {typ});'''. \ format(typ=typ, if lang == 'cxx_base': vout1_comp = \ '''vec({typ}) va1, vc; va1 = nsimd::loadu(&vin1[i], {typ}()); vc = nsimd::{op_name}(va1, (i / step) % 7, {typ}()); nsimd::storeu(&vout1[i], vc, {typ}());'''. \ format(typ=typ, if lang == 'cxx_adv': if op.cxx_operator: do_computation = 'vc = va1 {} ((i / step) % 7);'. \ format(op.cxx_operator[8:]) else: do_computation = 'vc = nsimd::{}(va1, (i / step) % 7);'. \ format( vout1_comp = \ '''nsimd::pack<{typ}> va1, vc; va1 = nsimd::loadu<nsimd::pack<{typ}> >(&vin1[i]); {do_computation} nsimd::storeu(&vout1[i], vc);'''. \ format(typ=typ, do_computation=do_computation) else: raise ValueError('No test available for operator "{}" on type "{}"'. \ format(, typ)) return { 'vin_defi': vin_defi, 'vin_rand': vin_rand, 'cpu_step': cpu_step, 'vout0_comp': vout0_comp, 'vout1_comp': vout1_comp }
def gen_reverse(opts, op, typ, lang): filename = get_filename(opts, op, typ, lang) if filename == None: return if lang == 'c_base': test_code = 'vstorea( out, vreverse( vloada( in, {typ} ), {typ} ), {typ} );'.format( typ=typ) elif lang == 'cxx_base': test_code = 'nsimd::storea( out, nsimd::reverse( nsimd::loada( in, {typ}() ), {typ}() ), {typ}() );'.format( typ=typ) elif lang == 'cxx_adv': test_code = 'nsimd::storea( out, nsimd::reverse( nsimd::loada<nsimd::pack<{typ}>>( in ) ) );'.format( typ=typ) if typ == 'f16': init = 'in[ i ] = nsimd_f32_to_f16((float)(i + 1));' comp = 'ok &= nsimd_f16_to_f32( out[len - 1 - i] ) == nsimd_f16_to_f32( in[i] );' else: init = 'in[ i ] = ({typ})(i + 1);'.format(typ=typ) comp = 'ok &= out[len - 1 - i] == in[i];' with common.open_utf8(filename) as out: out.write('''{includes} #define CHECK(a) {{ \\ errno = 0; \\ if (!(a)) {{ \\ fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\ __LINE__, strerror(errno)); \\ fflush(stderr); \\ exit(EXIT_FAILURE); \\ }} \\ }} int main(void) {{ unsigned char i; int ok; {typ} * in; {typ} * out; int len = vlen({typ}); fprintf(stdout, "test of {op_name} over {typ}...\\n"); CHECK(in = ({typ}*)nsimd_aligned_alloc(len * {sizeof})); CHECK(out = ({typ}*)nsimd_aligned_alloc(len * {sizeof})); for( i = 0 ; i < len ; ++i ) {{ {init} }} {test_code} ok = 1; for( i = 0 ; i < len ; ++i ) {{ {comp} }} /*fprintf( stdout, "%f %f %f %f\\n", in[ 0 ], out[ 0 ], in[ 1 ], out[ 1 ] );*/ if( ok ) {{ fprintf(stdout, "test of {op_name} over {typ}... OK\\n"); }} else {{ fprintf(stderr, "test of {op_name} over {typ}... FAIL\\n"); exit(EXIT_FAILURE); }} nsimd_aligned_free( in ); nsimd_aligned_free( out ); return EXIT_SUCCESS; }} '''.format(includes=get_includes(lang),, typ=typ, test_code=test_code,, sizeof=common.sizeof(typ), init=init, comp=comp)) common.clang_format(opts, filename)
def gen_bench_from_code(f, typ, code): header = '' header += common.pprint_includes(f.gen_includes(_lang)) header += \ ''' // Required for random generation #include "../benches.hpp" // Google benchmark #include <benchmark/benchmark.h> // std #include <cmath> // Sleef #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wignored-qualifiers" #include <sleef.h> #pragma GCC diagnostic pop // MIPP #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #pragma GCC diagnostic ignored "-Wsign-conversion" #pragma GCC diagnostic ignored "-Wdouble-promotion" #pragma GCC diagnostic ignored "-Wunused-parameter" #if defined(__clang__) #pragma GCC diagnostic ignored "-Wzero-length-array" #endif #include <mipp.h> #pragma GCC diagnostic pop ''' return \ '''{header} // ------------------------------------------------------------------------- static const int sz = 1024; template <typename Random> static {type}* make_data(int sz, Random r) {{ {type}* data = ({type}*)nsimd_aligned_alloc(sz * {sizeof}); for (int i = 0; i < sz; ++i) {{ data[i] = r(); }} return data; }} static {type}* make_data(int sz) {{ {type}* data = ({type}*)nsimd_aligned_alloc(sz * {sizeof}); for (int i = 0; i < sz; ++i) {{ data[i] = {type}(0); }} return data; }} {random_code} // ------------------------------------------------------------------------- {code} BENCHMARK_MAIN(); '''.format(, type=typ,, code=code, random_code=f.domain.code('rand_param', typ), sizeof=common.sizeof(typ), header=header, )