.version 7.8 .target sm_80 .address_size 64 // .globl activation_16 .visible .const .align 4 .b8 params[8]; .visible .func (.param .align 4 .b8 func_retval0[64]) activation_16( .param .align 4 .b8 activation_16_param_0[64] ) { .pragma "abi_param_reg all"; .reg .b16 %rs<65>; .reg .f32 %f<274>; .reg .b32 %r<33>; ld.param.u32 %r1, [activation_16_param_0+60]; ld.param.u32 %r2, [activation_16_param_0+56]; ld.param.u32 %r3, [activation_16_param_0+52]; ld.param.u32 %r4, [activation_16_param_0+48]; ld.param.u32 %r5, [activation_16_param_0+44]; ld.param.u32 %r6, [activation_16_param_0+40]; ld.param.u32 %r7, [activation_16_param_0+36]; ld.param.u32 %r8, [activation_16_param_0+32]; ld.param.u32 %r9, [activation_16_param_0+28]; ld.param.u32 %r10, [activation_16_param_0+24]; ld.param.u32 %r11, [activation_16_param_0+20]; ld.param.u32 %r12, [activation_16_param_0+16]; ld.param.u32 %r13, [activation_16_param_0+12]; ld.param.u32 %r14, [activation_16_param_0+8]; ld.param.u32 %r15, [activation_16_param_0+4]; ld.param.u32 %r16, [activation_16_param_0]; mov.b32 {%rs1, %rs3}, %r16; // begin inline asm { cvt.f32.f16 %f1, %rs1;} // end inline asm ld.const.f32 %f65, [params]; mul.ftz.f32 %f66, %f1, %f65; mul.ftz.f32 %f67, %f66, 0f3FB8AA3B; ex2.approx.ftz.f32 %f68, %f67; add.ftz.f32 %f69, %f68, 0f3F800000; lg2.approx.ftz.f32 %f70, %f69; mul.ftz.f32 %f71, %f70, 0f3F317218; div.approx.ftz.f32 %f2, %f71, %f65; // begin inline asm { cvt.f32.f16 %f3, %rs3;} // end inline asm mul.ftz.f32 %f72, %f3, %f65; mul.ftz.f32 %f73, %f72, 0f3FB8AA3B; ex2.approx.ftz.f32 %f74, %f73; add.ftz.f32 %f75, %f74, 0f3F800000; lg2.approx.ftz.f32 %f76, %f75; mul.ftz.f32 %f77, %f76, 0f3F317218; div.approx.ftz.f32 %f4, %f77, %f65; // begin inline asm { cvt.rn.f16.f32 %rs4, %f4;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs2, %f2;} // end inline asm mov.b32 %r17, {%rs2, %rs4}; mov.b32 {%rs5, %rs7}, %r15; // begin inline asm { cvt.f32.f16 %f5, %rs5;} // end inline asm mul.ftz.f32 %f78, %f5, %f65; mul.ftz.f32 %f79, %f78, 0f3FB8AA3B; ex2.approx.ftz.f32 %f80, %f79; add.ftz.f32 %f81, %f80, 0f3F800000; lg2.approx.ftz.f32 %f82, %f81; mul.ftz.f32 %f83, %f82, 0f3F317218; div.approx.ftz.f32 %f6, %f83, %f65; // begin inline asm { cvt.f32.f16 %f7, %rs7;} // end inline asm mul.ftz.f32 %f84, %f7, %f65; mul.ftz.f32 %f85, %f84, 0f3FB8AA3B; ex2.approx.ftz.f32 %f86, %f85; add.ftz.f32 %f87, %f86, 0f3F800000; lg2.approx.ftz.f32 %f88, %f87; mul.ftz.f32 %f89, %f88, 0f3F317218; div.approx.ftz.f32 %f8, %f89, %f65; // begin inline asm { cvt.rn.f16.f32 %rs8, %f8;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs6, %f6;} // end inline asm mov.b32 %r18, {%rs6, %rs8}; mov.b32 {%rs9, %rs11}, %r14; // begin inline asm { cvt.f32.f16 %f9, %rs9;} // end inline asm mul.ftz.f32 %f90, %f9, %f65; mul.ftz.f32 %f91, %f90, 0f3FB8AA3B; ex2.approx.ftz.f32 %f92, %f91; add.ftz.f32 %f93, %f92, 0f3F800000; lg2.approx.ftz.f32 %f94, %f93; mul.ftz.f32 %f95, %f94, 0f3F317218; div.approx.ftz.f32 %f10, %f95, %f65; // begin inline asm { cvt.f32.f16 %f11, %rs11;} // end inline asm mul.ftz.f32 %f96, %f11, %f65; mul.ftz.f32 %f97, %f96, 0f3FB8AA3B; ex2.approx.ftz.f32 %f98, %f97; add.ftz.f32 %f99, %f98, 0f3F800000; lg2.approx.ftz.f32 %f100, %f99; mul.ftz.f32 %f101, %f100, 0f3F317218; div.approx.ftz.f32 %f12, %f101, %f65; // begin inline asm { cvt.rn.f16.f32 %rs12, %f12;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs10, %f10;} // end inline asm mov.b32 %r19, {%rs10, %rs12}; mov.b32 {%rs13, %rs15}, %r13; // begin inline asm { cvt.f32.f16 %f13, %rs13;} // end inline asm mul.ftz.f32 %f102, %f13, %f65; mul.ftz.f32 %f103, %f102, 0f3FB8AA3B; ex2.approx.ftz.f32 %f104, %f103; add.ftz.f32 %f105, %f104, 0f3F800000; lg2.approx.ftz.f32 %f106, %f105; mul.ftz.f32 %f107, %f106, 0f3F317218; div.approx.ftz.f32 %f14, %f107, %f65; // begin inline asm { cvt.f32.f16 %f15, %rs15;} // end inline asm mul.ftz.f32 %f108, %f15, %f65; mul.ftz.f32 %f109, %f108, 0f3FB8AA3B; ex2.approx.ftz.f32 %f110, %f109; add.ftz.f32 %f111, %f110, 0f3F800000; lg2.approx.ftz.f32 %f112, %f111; mul.ftz.f32 %f113, %f112, 0f3F317218; div.approx.ftz.f32 %f16, %f113, %f65; // begin inline asm { cvt.rn.f16.f32 %rs16, %f16;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs14, %f14;} // end inline asm mov.b32 %r20, {%rs14, %rs16}; mov.b32 {%rs17, %rs19}, %r12; // begin inline asm { cvt.f32.f16 %f17, %rs17;} // end inline asm mul.ftz.f32 %f114, %f17, %f65; mul.ftz.f32 %f115, %f114, 0f3FB8AA3B; ex2.approx.ftz.f32 %f116, %f115; add.ftz.f32 %f117, %f116, 0f3F800000; lg2.approx.ftz.f32 %f118, %f117; mul.ftz.f32 %f119, %f118, 0f3F317218; div.approx.ftz.f32 %f18, %f119, %f65; // begin inline asm { cvt.f32.f16 %f19, %rs19;} // end inline asm mul.ftz.f32 %f120, %f19, %f65; mul.ftz.f32 %f121, %f120, 0f3FB8AA3B; ex2.approx.ftz.f32 %f122, %f121; add.ftz.f32 %f123, %f122, 0f3F800000; lg2.approx.ftz.f32 %f124, %f123; mul.ftz.f32 %f125, %f124, 0f3F317218; div.approx.ftz.f32 %f20, %f125, %f65; // begin inline asm { cvt.rn.f16.f32 %rs20, %f20;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs18, %f18;} // end inline asm mov.b32 %r21, {%rs18, %rs20}; mov.b32 {%rs21, %rs23}, %r11; // begin inline asm { cvt.f32.f16 %f21, %rs21;} // end inline asm mul.ftz.f32 %f126, %f21, %f65; mul.ftz.f32 %f127, %f126, 0f3FB8AA3B; ex2.approx.ftz.f32 %f128, %f127; add.ftz.f32 %f129, %f128, 0f3F800000; lg2.approx.ftz.f32 %f130, %f129; mul.ftz.f32 %f131, %f130, 0f3F317218; div.approx.ftz.f32 %f22, %f131, %f65; // begin inline asm { cvt.f32.f16 %f23, %rs23;} // end inline asm mul.ftz.f32 %f132, %f23, %f65; mul.ftz.f32 %f133, %f132, 0f3FB8AA3B; ex2.approx.ftz.f32 %f134, %f133; add.ftz.f32 %f135, %f134, 0f3F800000; lg2.approx.ftz.f32 %f136, %f135; mul.ftz.f32 %f137, %f136, 0f3F317218; div.approx.ftz.f32 %f24, %f137, %f65; // begin inline asm { cvt.rn.f16.f32 %rs24, %f24;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs22, %f22;} // end inline asm mov.b32 %r22, {%rs22, %rs24}; mov.b32 {%rs25, %rs27}, %r10; // begin inline asm { cvt.f32.f16 %f25, %rs25;} // end inline asm mul.ftz.f32 %f138, %f25, %f65; mul.ftz.f32 %f139, %f138, 0f3FB8AA3B; ex2.approx.ftz.f32 %f140, %f139; add.ftz.f32 %f141, %f140, 0f3F800000; lg2.approx.ftz.f32 %f142, %f141; mul.ftz.f32 %f143, %f142, 0f3F317218; div.approx.ftz.f32 %f26, %f143, %f65; // begin inline asm { cvt.f32.f16 %f27, %rs27;} // end inline asm mul.ftz.f32 %f144, %f27, %f65; mul.ftz.f32 %f145, %f144, 0f3FB8AA3B; ex2.approx.ftz.f32 %f146, %f145; add.ftz.f32 %f147, %f146, 0f3F800000; lg2.approx.ftz.f32 %f148, %f147; mul.ftz.f32 %f149, %f148, 0f3F317218; div.approx.ftz.f32 %f28, %f149, %f65; // begin inline asm { cvt.rn.f16.f32 %rs28, %f28;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs26, %f26;} // end inline asm mov.b32 %r23, {%rs26, %rs28}; mov.b32 {%rs29, %rs31}, %r9; // begin inline asm { cvt.f32.f16 %f29, %rs29;} // end inline asm mul.ftz.f32 %f150, %f29, %f65; mul.ftz.f32 %f151, %f150, 0f3FB8AA3B; ex2.approx.ftz.f32 %f152, %f151; add.ftz.f32 %f153, %f152, 0f3F800000; lg2.approx.ftz.f32 %f154, %f153; mul.ftz.f32 %f155, %f154, 0f3F317218; div.approx.ftz.f32 %f30, %f155, %f65; // begin inline asm { cvt.f32.f16 %f31, %rs31;} // end inline asm mul.ftz.f32 %f156, %f31, %f65; mul.ftz.f32 %f157, %f156, 0f3FB8AA3B; ex2.approx.ftz.f32 %f158, %f157; add.ftz.f32 %f159, %f158, 0f3F800000; lg2.approx.ftz.f32 %f160, %f159; mul.ftz.f32 %f161, %f160, 0f3F317218; div.approx.ftz.f32 %f32, %f161, %f65; // begin inline asm { cvt.rn.f16.f32 %rs32, %f32;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs30, %f30;} // end inline asm mov.b32 %r24, {%rs30, %rs32}; mov.b32 {%rs33, %rs35}, %r8; // begin inline asm { cvt.f32.f16 %f33, %rs33;} // end inline asm mul.ftz.f32 %f162, %f33, %f65; mul.ftz.f32 %f163, %f162, 0f3FB8AA3B; ex2.approx.ftz.f32 %f164, %f163; add.ftz.f32 %f165, %f164, 0f3F800000; lg2.approx.ftz.f32 %f166, %f165; mul.ftz.f32 %f167, %f166, 0f3F317218; div.approx.ftz.f32 %f34, %f167, %f65; // begin inline asm { cvt.f32.f16 %f35, %rs35;} // end inline asm mul.ftz.f32 %f168, %f35, %f65; mul.ftz.f32 %f169, %f168, 0f3FB8AA3B; ex2.approx.ftz.f32 %f170, %f169; add.ftz.f32 %f171, %f170, 0f3F800000; lg2.approx.ftz.f32 %f172, %f171; mul.ftz.f32 %f173, %f172, 0f3F317218; div.approx.ftz.f32 %f36, %f173, %f65; // begin inline asm { cvt.rn.f16.f32 %rs36, %f36;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs34, %f34;} // end inline asm mov.b32 %r25, {%rs34, %rs36}; mov.b32 {%rs37, %rs39}, %r7; // begin inline asm { cvt.f32.f16 %f37, %rs37;} // end inline asm mul.ftz.f32 %f174, %f37, %f65; mul.ftz.f32 %f175, %f174, 0f3FB8AA3B; ex2.approx.ftz.f32 %f176, %f175; add.ftz.f32 %f177, %f176, 0f3F800000; lg2.approx.ftz.f32 %f178, %f177; mul.ftz.f32 %f179, %f178, 0f3F317218; div.approx.ftz.f32 %f38, %f179, %f65; // begin inline asm { cvt.f32.f16 %f39, %rs39;} // end inline asm mul.ftz.f32 %f180, %f39, %f65; mul.ftz.f32 %f181, %f180, 0f3FB8AA3B; ex2.approx.ftz.f32 %f182, %f181; add.ftz.f32 %f183, %f182, 0f3F800000; lg2.approx.ftz.f32 %f184, %f183; mul.ftz.f32 %f185, %f184, 0f3F317218; div.approx.ftz.f32 %f40, %f185, %f65; // begin inline asm { cvt.rn.f16.f32 %rs40, %f40;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs38, %f38;} // end inline asm mov.b32 %r26, {%rs38, %rs40}; mov.b32 {%rs41, %rs43}, %r6; // begin inline asm { cvt.f32.f16 %f41, %rs41;} // end inline asm mul.ftz.f32 %f186, %f41, %f65; mul.ftz.f32 %f187, %f186, 0f3FB8AA3B; ex2.approx.ftz.f32 %f188, %f187; add.ftz.f32 %f189, %f188, 0f3F800000; lg2.approx.ftz.f32 %f190, %f189; mul.ftz.f32 %f191, %f190, 0f3F317218; div.approx.ftz.f32 %f42, %f191, %f65; // begin inline asm { cvt.f32.f16 %f43, %rs43;} // end inline asm mul.ftz.f32 %f192, %f43, %f65; mul.ftz.f32 %f193, %f192, 0f3FB8AA3B; ex2.approx.ftz.f32 %f194, %f193; add.ftz.f32 %f195, %f194, 0f3F800000; lg2.approx.ftz.f32 %f196, %f195; mul.ftz.f32 %f197, %f196, 0f3F317218; div.approx.ftz.f32 %f44, %f197, %f65; // begin inline asm { cvt.rn.f16.f32 %rs44, %f44;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs42, %f42;} // end inline asm mov.b32 %r27, {%rs42, %rs44}; mov.b32 {%rs45, %rs47}, %r5; // begin inline asm { cvt.f32.f16 %f45, %rs45;} // end inline asm mul.ftz.f32 %f198, %f45, %f65; mul.ftz.f32 %f199, %f198, 0f3FB8AA3B; ex2.approx.ftz.f32 %f200, %f199; add.ftz.f32 %f201, %f200, 0f3F800000; lg2.approx.ftz.f32 %f202, %f201; mul.ftz.f32 %f203, %f202, 0f3F317218; div.approx.ftz.f32 %f46, %f203, %f65; // begin inline asm { cvt.f32.f16 %f47, %rs47;} // end inline asm mul.ftz.f32 %f204, %f47, %f65; mul.ftz.f32 %f205, %f204, 0f3FB8AA3B; ex2.approx.ftz.f32 %f206, %f205; add.ftz.f32 %f207, %f206, 0f3F800000; lg2.approx.ftz.f32 %f208, %f207; mul.ftz.f32 %f209, %f208, 0f3F317218; div.approx.ftz.f32 %f48, %f209, %f65; // begin inline asm { cvt.rn.f16.f32 %rs48, %f48;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs46, %f46;} // end inline asm mov.b32 %r28, {%rs46, %rs48}; mov.b32 {%rs49, %rs51}, %r4; // begin inline asm { cvt.f32.f16 %f49, %rs49;} // end inline asm mul.ftz.f32 %f210, %f49, %f65; mul.ftz.f32 %f211, %f210, 0f3FB8AA3B; ex2.approx.ftz.f32 %f212, %f211; add.ftz.f32 %f213, %f212, 0f3F800000; lg2.approx.ftz.f32 %f214, %f213; mul.ftz.f32 %f215, %f214, 0f3F317218; div.approx.ftz.f32 %f50, %f215, %f65; // begin inline asm { cvt.f32.f16 %f51, %rs51;} // end inline asm mul.ftz.f32 %f216, %f51, %f65; mul.ftz.f32 %f217, %f216, 0f3FB8AA3B; ex2.approx.ftz.f32 %f218, %f217; add.ftz.f32 %f219, %f218, 0f3F800000; lg2.approx.ftz.f32 %f220, %f219; mul.ftz.f32 %f221, %f220, 0f3F317218; div.approx.ftz.f32 %f52, %f221, %f65; // begin inline asm { cvt.rn.f16.f32 %rs52, %f52;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs50, %f50;} // end inline asm mov.b32 %r29, {%rs50, %rs52}; mov.b32 {%rs53, %rs55}, %r3; // begin inline asm { cvt.f32.f16 %f53, %rs53;} // end inline asm mul.ftz.f32 %f222, %f53, %f65; mul.ftz.f32 %f223, %f222, 0f3FB8AA3B; ex2.approx.ftz.f32 %f224, %f223; add.ftz.f32 %f225, %f224, 0f3F800000; lg2.approx.ftz.f32 %f226, %f225; mul.ftz.f32 %f227, %f226, 0f3F317218; div.approx.ftz.f32 %f54, %f227, %f65; // begin inline asm { cvt.f32.f16 %f55, %rs55;} // end inline asm mul.ftz.f32 %f228, %f55, %f65; mul.ftz.f32 %f229, %f228, 0f3FB8AA3B; ex2.approx.ftz.f32 %f230, %f229; add.ftz.f32 %f231, %f230, 0f3F800000; lg2.approx.ftz.f32 %f232, %f231; mul.ftz.f32 %f233, %f232, 0f3F317218; div.approx.ftz.f32 %f56, %f233, %f65; // begin inline asm { cvt.rn.f16.f32 %rs56, %f56;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs54, %f54;} // end inline asm mov.b32 %r30, {%rs54, %rs56}; mov.b32 {%rs57, %rs59}, %r2; // begin inline asm { cvt.f32.f16 %f57, %rs57;} // end inline asm mul.ftz.f32 %f234, %f57, %f65; mul.ftz.f32 %f235, %f234, 0f3FB8AA3B; ex2.approx.ftz.f32 %f236, %f235; add.ftz.f32 %f237, %f236, 0f3F800000; lg2.approx.ftz.f32 %f238, %f237; mul.ftz.f32 %f239, %f238, 0f3F317218; div.approx.ftz.f32 %f58, %f239, %f65; // begin inline asm { cvt.f32.f16 %f59, %rs59;} // end inline asm mul.ftz.f32 %f240, %f59, %f65; mul.ftz.f32 %f241, %f240, 0f3FB8AA3B; ex2.approx.ftz.f32 %f242, %f241; add.ftz.f32 %f243, %f242, 0f3F800000; lg2.approx.ftz.f32 %f244, %f243; mul.ftz.f32 %f245, %f244, 0f3F317218; div.approx.ftz.f32 %f60, %f245, %f65; // begin inline asm { cvt.rn.f16.f32 %rs60, %f60;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs58, %f58;} // end inline asm mov.b32 %r31, {%rs58, %rs60}; mov.b32 {%rs61, %rs63}, %r1; // begin inline asm { cvt.f32.f16 %f61, %rs61;} // end inline asm mul.ftz.f32 %f246, %f61, %f65; mul.ftz.f32 %f247, %f246, 0f3FB8AA3B; ex2.approx.ftz.f32 %f248, %f247; add.ftz.f32 %f249, %f248, 0f3F800000; lg2.approx.ftz.f32 %f250, %f249; mul.ftz.f32 %f251, %f250, 0f3F317218; div.approx.ftz.f32 %f62, %f251, %f65; // begin inline asm { cvt.f32.f16 %f63, %rs63;} // end inline asm mul.ftz.f32 %f252, %f63, %f65; mul.ftz.f32 %f253, %f252, 0f3FB8AA3B; ex2.approx.ftz.f32 %f254, %f253; add.ftz.f32 %f255, %f254, 0f3F800000; lg2.approx.ftz.f32 %f256, %f255; mul.ftz.f32 %f257, %f256, 0f3F317218; div.approx.ftz.f32 %f64, %f257, %f65; // begin inline asm { cvt.rn.f16.f32 %rs64, %f64;} // end inline asm // begin inline asm { cvt.rn.f16.f32 %rs62, %f62;} // end inline asm mov.b32 %r32, {%rs62, %rs64}; mov.b32 %f258, %r32; mov.b32 %f259, %r31; mov.b32 %f260, %r30; mov.b32 %f261, %r29; mov.b32 %f262, %r28; mov.b32 %f263, %r27; mov.b32 %f264, %r26; mov.b32 %f265, %r25; mov.b32 %f266, %r24; mov.b32 %f267, %r23; mov.b32 %f268, %r22; mov.b32 %f269, %r21; mov.b32 %f270, %r20; mov.b32 %f271, %r19; mov.b32 %f272, %r18; mov.b32 %f273, %r17; st.param.f32 [func_retval0+0], %f273; st.param.f32 [func_retval0+4], %f272; st.param.f32 [func_retval0+8], %f271; st.param.f32 [func_retval0+12], %f270; st.param.f32 [func_retval0+16], %f269; st.param.f32 [func_retval0+20], %f268; st.param.f32 [func_retval0+24], %f267; st.param.f32 [func_retval0+28], %f266; st.param.f32 [func_retval0+32], %f265; st.param.f32 [func_retval0+36], %f264; st.param.f32 [func_retval0+40], %f263; st.param.f32 [func_retval0+44], %f262; st.param.f32 [func_retval0+48], %f261; st.param.f32 [func_retval0+52], %f260; st.param.f32 [func_retval0+56], %f259; st.param.f32 [func_retval0+60], %f258; ret; }