// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: CL-31678015 // Cuda compilation tools, release 11.8, V11.8.85 // Based on NVVM 7.0.1 // .version 7.8 .target sm_80 .address_size 64 // .globl activation_8 .visible .const .align 4 .b8 params[8]; .visible .func (.param .align 4 .b8 func_retval0[32]) activation_8( .param .align 4 .b8 activation_8_param_0[32] ) { .pragma "abi_param_reg all"; .reg .pred %p<17>; .reg .b16 %rs<71>; .reg .f32 %f<162>; .reg .b32 %r<17>; ld.param.f32 %f1, [activation_8_param_0]; mov.b32 %r1, %f1; ld.param.f32 %f8, [activation_8_param_0+28]; ld.param.f32 %f7, [activation_8_param_0+24]; ld.param.f32 %f6, [activation_8_param_0+20]; ld.param.f32 %f5, [activation_8_param_0+16]; ld.param.f32 %f4, [activation_8_param_0+12]; ld.param.f32 %f3, [activation_8_param_0+8]; ld.param.f32 %f2, [activation_8_param_0+4]; ld.const.f32 %f9, [params]; mov.b32 {%rs39, %rs2}, %r1; // begin inline asm { cvt.f32.f16 %f146, %rs39;} // end inline asm setp.geu.ftz.f32 %p1, %f146, 0f00000000; @%p1 bra $L__BB0_2; mul.ftz.f32 %f59, %f146, 0f3FB8AA3B; ex2.approx.ftz.f32 %f60, %f59; add.ftz.f32 %f61, %f60, 0fBF800000; mul.ftz.f32 %f146, %f9, %f61; $L__BB0_2: // begin inline asm { cvt.rn.f16.f32 %rs40, %f146;} // end inline asm // begin inline asm { cvt.f32.f16 %f147, %rs2;} // end inline asm setp.geu.ftz.f32 %p2, %f147, 0f00000000; @%p2 bra $L__BB0_4; mul.ftz.f32 %f64, %f147, 0f3FB8AA3B; ex2.approx.ftz.f32 %f65, %f64; add.ftz.f32 %f66, %f65, 0fBF800000; mul.ftz.f32 %f147, %f9, %f66; $L__BB0_4: // begin inline asm { cvt.rn.f16.f32 %rs42, %f147;} // end inline asm mov.b32 %r2, %f2; mov.b32 {%rs43, %rs7}, %r2; // begin inline asm { cvt.f32.f16 %f148, %rs43;} // end inline asm setp.geu.ftz.f32 %p3, %f148, 0f00000000; @%p3 bra $L__BB0_6; mul.ftz.f32 %f69, %f148, 0f3FB8AA3B; ex2.approx.ftz.f32 %f70, %f69; add.ftz.f32 %f71, %f70, 0fBF800000; mul.ftz.f32 %f148, %f9, %f71; $L__BB0_6: // begin inline asm { cvt.rn.f16.f32 %rs44, %f148;} // end inline asm // begin inline asm { cvt.f32.f16 %f149, %rs7;} // end inline asm setp.geu.ftz.f32 %p4, %f149, 0f00000000; @%p4 bra $L__BB0_8; mul.ftz.f32 %f74, %f149, 0f3FB8AA3B; ex2.approx.ftz.f32 %f75, %f74; add.ftz.f32 %f76, %f75, 0fBF800000; mul.ftz.f32 %f149, %f9, %f76; $L__BB0_8: // begin inline asm { cvt.rn.f16.f32 %rs46, %f149;} // end inline asm mov.b32 %r3, %f3; mov.b32 {%rs47, %rs12}, %r3; // begin inline asm { cvt.f32.f16 %f150, %rs47;} // end inline asm setp.geu.ftz.f32 %p5, %f150, 0f00000000; @%p5 bra $L__BB0_10; mul.ftz.f32 %f79, %f150, 0f3FB8AA3B; ex2.approx.ftz.f32 %f80, %f79; add.ftz.f32 %f81, %f80, 0fBF800000; mul.ftz.f32 %f150, %f9, %f81; $L__BB0_10: // begin inline asm { cvt.rn.f16.f32 %rs48, %f150;} // end inline asm // begin inline asm { cvt.f32.f16 %f151, %rs12;} // end inline asm setp.geu.ftz.f32 %p6, %f151, 0f00000000; @%p6 bra $L__BB0_12; mul.ftz.f32 %f84, %f151, 0f3FB8AA3B; ex2.approx.ftz.f32 %f85, %f84; add.ftz.f32 %f86, %f85, 0fBF800000; mul.ftz.f32 %f151, %f9, %f86; $L__BB0_12: // begin inline asm { cvt.rn.f16.f32 %rs50, %f151;} // end inline asm mov.b32 %r4, %f4; mov.b32 {%rs51, %rs17}, %r4; // begin inline asm { cvt.f32.f16 %f152, %rs51;} // end inline asm setp.geu.ftz.f32 %p7, %f152, 0f00000000; @%p7 bra $L__BB0_14; mul.ftz.f32 %f89, %f152, 0f3FB8AA3B; ex2.approx.ftz.f32 %f90, %f89; add.ftz.f32 %f91, %f90, 0fBF800000; mul.ftz.f32 %f152, %f9, %f91; $L__BB0_14: // begin inline asm { cvt.rn.f16.f32 %rs52, %f152;} // end inline asm // begin inline asm { cvt.f32.f16 %f153, %rs17;} // end inline asm setp.geu.ftz.f32 %p8, %f153, 0f00000000; @%p8 bra $L__BB0_16; mul.ftz.f32 %f94, %f153, 0f3FB8AA3B; ex2.approx.ftz.f32 %f95, %f94; add.ftz.f32 %f96, %f95, 0fBF800000; mul.ftz.f32 %f153, %f9, %f96; $L__BB0_16: // begin inline asm { cvt.rn.f16.f32 %rs54, %f153;} // end inline asm mov.b32 %r5, %f5; mov.b32 {%rs55, %rs22}, %r5; // begin inline asm { cvt.f32.f16 %f154, %rs55;} // end inline asm setp.geu.ftz.f32 %p9, %f154, 0f00000000; @%p9 bra $L__BB0_18; mul.ftz.f32 %f99, %f154, 0f3FB8AA3B; ex2.approx.ftz.f32 %f100, %f99; add.ftz.f32 %f101, %f100, 0fBF800000; mul.ftz.f32 %f154, %f9, %f101; $L__BB0_18: // begin inline asm { cvt.rn.f16.f32 %rs56, %f154;} // end inline asm // begin inline asm { cvt.f32.f16 %f155, %rs22;} // end inline asm setp.geu.ftz.f32 %p10, %f155, 0f00000000; @%p10 bra $L__BB0_20; mul.ftz.f32 %f104, %f155, 0f3FB8AA3B; ex2.approx.ftz.f32 %f105, %f104; add.ftz.f32 %f106, %f105, 0fBF800000; mul.ftz.f32 %f155, %f9, %f106; $L__BB0_20: // begin inline asm { cvt.rn.f16.f32 %rs58, %f155;} // end inline asm mov.b32 %r6, %f6; mov.b32 {%rs59, %rs27}, %r6; // begin inline asm { cvt.f32.f16 %f156, %rs59;} // end inline asm setp.geu.ftz.f32 %p11, %f156, 0f00000000; @%p11 bra $L__BB0_22; mul.ftz.f32 %f109, %f156, 0f3FB8AA3B; ex2.approx.ftz.f32 %f110, %f109; add.ftz.f32 %f111, %f110, 0fBF800000; mul.ftz.f32 %f156, %f9, %f111; $L__BB0_22: // begin inline asm { cvt.rn.f16.f32 %rs60, %f156;} // end inline asm // begin inline asm { cvt.f32.f16 %f157, %rs27;} // end inline asm setp.geu.ftz.f32 %p12, %f157, 0f00000000; @%p12 bra $L__BB0_24; mul.ftz.f32 %f114, %f157, 0f3FB8AA3B; ex2.approx.ftz.f32 %f115, %f114; add.ftz.f32 %f116, %f115, 0fBF800000; mul.ftz.f32 %f157, %f9, %f116; $L__BB0_24: // begin inline asm { cvt.rn.f16.f32 %rs62, %f157;} // end inline asm mov.b32 %r7, %f7; mov.b32 {%rs63, %rs32}, %r7; // begin inline asm { cvt.f32.f16 %f158, %rs63;} // end inline asm setp.geu.ftz.f32 %p13, %f158, 0f00000000; @%p13 bra $L__BB0_26; mul.ftz.f32 %f119, %f158, 0f3FB8AA3B; ex2.approx.ftz.f32 %f120, %f119; add.ftz.f32 %f121, %f120, 0fBF800000; mul.ftz.f32 %f158, %f9, %f121; $L__BB0_26: // begin inline asm { cvt.rn.f16.f32 %rs64, %f158;} // end inline asm // begin inline asm { cvt.f32.f16 %f159, %rs32;} // end inline asm setp.geu.ftz.f32 %p14, %f159, 0f00000000; @%p14 bra $L__BB0_28; mul.ftz.f32 %f124, %f159, 0f3FB8AA3B; ex2.approx.ftz.f32 %f125, %f124; add.ftz.f32 %f126, %f125, 0fBF800000; mul.ftz.f32 %f159, %f9, %f126; $L__BB0_28: // begin inline asm { cvt.rn.f16.f32 %rs66, %f159;} // end inline asm mov.b32 %r8, %f8; mov.b32 {%rs67, %rs37}, %r8; // begin inline asm { cvt.f32.f16 %f160, %rs67;} // end inline asm setp.geu.ftz.f32 %p15, %f160, 0f00000000; @%p15 bra $L__BB0_30; mul.ftz.f32 %f129, %f160, 0f3FB8AA3B; ex2.approx.ftz.f32 %f130, %f129; add.ftz.f32 %f131, %f130, 0fBF800000; mul.ftz.f32 %f160, %f9, %f131; $L__BB0_30: // begin inline asm { cvt.rn.f16.f32 %rs68, %f160;} // end inline asm // begin inline asm { cvt.f32.f16 %f161, %rs37;} // end inline asm setp.geu.ftz.f32 %p16, %f161, 0f00000000; @%p16 bra $L__BB0_32; mul.ftz.f32 %f134, %f161, 0f3FB8AA3B; ex2.approx.ftz.f32 %f135, %f134; add.ftz.f32 %f136, %f135, 0fBF800000; mul.ftz.f32 %f161, %f9, %f136; $L__BB0_32: // begin inline asm { cvt.rn.f16.f32 %rs70, %f161;} // end inline asm mov.b32 %r9, {%rs68, %rs70}; mov.b32 %r10, {%rs40, %rs42}; mov.b32 %r11, {%rs44, %rs46}; mov.b32 %r12, {%rs48, %rs50}; mov.b32 %r13, {%rs52, %rs54}; mov.b32 %r14, {%rs56, %rs58}; mov.b32 %r15, {%rs60, %rs62}; mov.b32 %r16, {%rs64, %rs66}; mov.b32 %f138, %r9; mov.b32 %f139, %r16; mov.b32 %f140, %r15; mov.b32 %f141, %r14; mov.b32 %f142, %r13; mov.b32 %f143, %r12; mov.b32 %f144, %r11; mov.b32 %f145, %r10; st.param.f32 [func_retval0+0], %f145; st.param.f32 [func_retval0+4], %f144; st.param.f32 [func_retval0+8], %f143; st.param.f32 [func_retval0+12], %f142; st.param.f32 [func_retval0+16], %f141; st.param.f32 [func_retval0+20], %f140; st.param.f32 [func_retval0+24], %f139; st.param.f32 [func_retval0+28], %f138; ret; }