NVVM 7.0.1 // .version 7.8 .target sm_80 .address_size 64 // .globl activation_8 .visible .const .align 4 .b8 params[8]; .visible .func (.param .align 4 .b8 func_retval0[32]) activation_8( .param .align 4 .b8 activation_8_param_0[32] ) { .pragma "abi_param_reg all"; .reg .b16 %rs<33>; .reg .f32 %f<89>; .reg .b32 %r<17>; ld.param.u32 %r1, [activation_8_param_0+28]; ld.param.u32 %r2, [activation_8_param_0+24]; ld.param.u32 %r3, [activation_8_param_0+20]; ld.param.u32 %r4, [activation_8_param_0+16]; ld.param.u32 %r5, [activation_8_param_0+12]; ld.param.u32 %r6, [activation_8_param_0+8]; ld.param.u32 %r7, [activation_8_param_0+4]; ld.param.u32 %r8, [activation_8_param_0]; mov.b32 {%rs1, %rs3}, %r8; // begin inline asm { cvt.f32.f16 %f1, %rs1;} // end inline asm mul.ftz.f32 %f33, %f1, 0fBFB8AA3B; ex2.approx.ftz.f32 %f34, %f33; add.ftz.f32 %f35, %f34, 0f3F800000; rcp.approx.ftz.f32 %f2, %f35; // begin inline asm { cvt.f32.f16 %f3, %rs3;} // end inline asm mul.ftz.f32 %f36, %f3, 0fBFB8AA3B; ex2.approx.ftz.f32 %f37, %f36; add.ftz.f32 %f38, %f37, 0f3F800000; rcp.approx.ftz.f32 %f4, %f38; // begin inline asm { cvt.rn.f16.f32 %rs4, %f4;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs2, %f2;} // end inline asm mov.b32 %r9, {%rs2, %rs4}; mov.b32 {%rs5, %rs7}, %r7; // begin inline asm { cvt.f32.f16 %f5, %rs5;} // end inline asm mul.ftz.f32 %f39, %f5, 0fBFB8AA3B; ex2.approx.ftz.f32 %f40, %f39; add.ftz.f32 %f41, %f40, 0f3F800000; rcp.approx.ftz.f32 %f6, %f41; // begin inline asm { cvt.f32.f16 %f7, %rs7;} // end inline asm mul.ftz.f32 %f42, %f7, 0fBFB8AA3B; ex2.approx.ftz.f32 %f43, %f42; add.ftz.f32 %f44, %f43, 0f3F800000; rcp.approx.ftz.f32 %f8, %f44; // begin inline asm { cvt.rn.f16.f32 %rs8, %f8;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs6, %f6;} // end inline asm mov.b32 %r10, {%rs6, %rs8}; mov.b32 {%rs9, %rs11}, %r6; // begin inline asm { cvt.f32.f16 %f9, %rs9;} // end inline asm mul.ftz.f32 %f45, %f9, 0fBFB8AA3B; ex2.approx.ftz.f32 %f46, %f45; add.ftz.f32 %f47, %f46, 0f3F800000; rcp.approx.ftz.f32 %f10, %f47; // begin inline asm { cvt.f32.f16 %f11, %rs11;} // end inline asm mul.ftz.f32 %f48, %f11, 0fBFB8AA3B; ex2.approx.ftz.f32 %f49, %f48; add.ftz.f32 %f50, %f49, 0f3F800000; rcp.approx.ftz.f32 %f12, %f50; // begin inline asm { cvt.rn.f16.f32 %rs12, %f12;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs10, %f10;} // end inline asm mov.b32 %r11, {%rs10, %rs12}; mov.b32 {%rs13, %rs15}, %r5; // begin inline asm { cvt.f32.f16 %f13, %rs13;} // end inline asm mul.ftz.f32 %f51, %f13, 0fBFB8AA3B; ex2.approx.ftz.f32 %f52, %f51; add.ftz.f32 %f53, %f52, 0f3F800000; rcp.approx.ftz.f32 %f14, %f53; // begin inline asm { cvt.f32.f16 %f15, %rs15;} // end inline asm mul.ftz.f32 %f54, %f15, 0fBFB8AA3B; ex2.approx.ftz.f32 %f55, %f54; add.ftz.f32 %f56, %f55, 0f3F800000; rcp.approx.ftz.f32 %f16, %f56; // begin inline asm { cvt.rn.f16.f32 %rs16, %f16;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs14, %f14;} // end inline asm mov.b32 %r12, {%rs14, %rs16}; mov.b32 {%rs17, %rs19}, %r4; // begin inline asm { cvt.f32.f16 %f17, %rs17;} // end inline asm mul.ftz.f32 %f57, %f17, 0fBFB8AA3B; ex2.approx.ftz.f32 %f58, %f57; add.ftz.f32 %f59, %f58, 0f3F800000; rcp.approx.ftz.f32 %f18, %f59; // begin inline asm { cvt.f32.f16 %f19, %rs19;} // end inline asm mul.ftz.f32 %f60, %f19, 0fBFB8AA3B; ex2.approx.ftz.f32 %f61, %f60; add.ftz.f32 %f62, %f61, 0f3F800000; rcp.approx.ftz.f32 %f20, %f62; // begin inline asm { cvt.rn.f16.f32 %rs20, %f20;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs18, %f18;} // end inline asm mov.b32 %r13, {%rs18, %rs20}; mov.b32 {%rs21, %rs23}, %r3; // begin inline asm { cvt.f32.f16 %f21, %rs21;} // end inline asm mul.ftz.f32 %f63, %f21, 0fBFB8AA3B; ex2.approx.ftz.f32 %f64, %f63; add.ftz.f32 %f65, %f64, 0f3F800000; rcp.approx.ftz.f32 %f22, %f65; // begin inline asm { cvt.f32.f16 %f23, %rs23;} // end inline asm mul.ftz.f32 %f66, %f23, 0fBFB8AA3B; ex2.approx.ftz.f32 %f67, %f66; add.ftz.f32 %f68, %f67, 0f3F800000; rcp.approx.ftz.f32 %f24, %f68; // begin inline asm { cvt.rn.f16.f32 %rs24, %f24;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs22, %f22;} // end inline asm mov.b32 %r14, {%rs22, %rs24}; mov.b32 {%rs25, %rs27}, %r2; // begin inline asm { cvt.f32.f16 %f25, %rs25;} // end inline asm mul.ftz.f32 %f69, %f25, 0fBFB8AA3B; ex2.approx.ftz.f32 %f70, %f69; add.ftz.f32 %f71, %f70, 0f3F800000; rcp.approx.ftz.f32 %f26, %f71; // begin inline asm { cvt.f32.f16 %f27, %rs27;} // end inline asm mul.ftz.f32 %f72, %f27, 0fBFB8AA3B; ex2.approx.ftz.f32 %f73, %f72; add.ftz.f32 %f74, %f73, 0f3F800000; rcp.approx.ftz.f32 %f28, %f74; // begin inline asm { cvt.rn.f16.f32 %rs28, %f28;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs26, %f26;} // end inline asm mov.b32 %r15, {%rs26, %rs28}; mov.b32 {%rs29, %rs31}, %r1; // begin inline asm { cvt.f32.f16 %f29, %rs29;} // end inline asm mul.ftz.f32 %f75, %f29, 0fBFB8AA3B; ex2.approx.ftz.f32 %f76, %f75; add.ftz.f32 %f77, %f76, 0f3F800000; rcp.approx.ftz.f32 %f30, %f77; // begin inline asm { cvt.f32.f16 %f31, %rs31;} // end inline asm mul.ftz.f32 %f78, %f31, 0fBFB8AA3B; ex2.approx.ftz.f32 %f79, %f78; add.ftz.f32 %f80, %f79, 0f3F800000; rcp.approx.ftz.f32 %f32, %f80; // begin inline asm { cvt.rn.f16.f32 %rs32, %f32;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs30, %f30;} // end inline asm mov.b32 %r16, {%rs30, %rs32}; mov.b32 %f81, %r16; mov.b32 %f82, %r15; mov.b32 %f83, %r14; mov.b32 %f84, %r13; mov.b32 %f85, %r12; mov.b32 %f86, %r11; mov.b32 %f87, %r10; mov.b32 %f88, %r9; st.param.f32 [func_retval0+0], %f88; st.param.f32 [func_retval0+4], %f87; st.param.f32 [func_retval0+8], %f86; st.param.f32 [func_retval0+12], %f85; st.param.f32 [func_retval0+16], %f84; st.param.f32 [func_retval0+20], %f83; st.param.f32 [func_retval0+24], %f82; st.param.f32 [func_retval0+28], %f81; ret; }