param_0+24]; ld.param.v2.u32 {%r107, %r108}, [PoolingKernel_param_0+32]; ld.param.v2.u32 {%r109, %r110}, [PoolingKernel_param_0+40]; ld.param.v2.u32 {%r111, %r112}, [PoolingKernel_param_0+48]; ld.param.v2.u32 {%r113, %r114}, [PoolingKernel_param_0+56]; ld.param.v2.u32 {%r115, %r116}, [PoolingKernel_param_0+64]; ld.param.v2.u32 {%r117, %r118}, [PoolingKernel_param_0+72]; ld.param.v2.u32 {%r119, %r120}, [PoolingKernel_param_0+80]; ld.param.v4.u8 {%rs166, %rs167, %rs168, %rs169}, [PoolingKernel_param_0+88]; ld.param.v2.u8 {%rs170, %rs171}, [PoolingKernel_param_0+92]; ld.param.v2.u32 {%r121, %r122}, [PoolingKernel_param_0+96]; ld.param.v2.u32 {%r123, %r124}, [PoolingKernel_param_0+104]; ld.param.v4.u8 {%rs172, %rs173, %rs174, %rs175}, [PoolingKernel_param_0+116]; ld.param.u64 %rd5, [PoolingKernel_param_0+8]; ld.param.u64 %rd4, [PoolingKernel_param_0]; cvta.to.global.u64 %rd2, %rd5; setp.ne.s16 %p13, %rs172, 0; setp.ne.s16 %p14, %rs173, 0; and.pred %p15, %p13, %p14; @%p15 bra $L__BB0_73; bra.uni $L__BB0_1; $L__BB0_73: setp.eq.s16 %p198, %rs166, 2; @%p198 bra $L__BB0_159; bra.uni $L__BB0_74; $L__BB0_159: mov.u32 %r68, %tid.x; setp.ge.u32 %p324, %r68, %r124; @%p324 bra $L__BB0_203; ld.param.u32 %r364, [PoolingKernel_param_0+112]; mov.u32 %r69, %ctaid.z; mov.u32 %r318, %ctaid.x; mov.u32 %r319, %ctaid.y; div.u32 %r320, %r319, %r364; mul.lo.s32 %r321, %r320, %r364; sub.s32 %r322, %r319, %r321; div.u32 %r323, %r68, %r121; div.u32 %r324, %r323, %r122; mul.lo.s32 %r325, %r324, %r122; sub.s32 %r326, %r323, %r325; mul.lo.s32 %r327, %r323, %r121; sub.s32 %r328, %r68, %r327; mad.lo.s32 %r70, %r121, %r318, %r328; mad.lo.s32 %r71, %r122, %r322, %r326; mad.lo.s32 %r72, %r123, %r320, %r324; cvt.u32.u16 %r329, %rs169; and.b32 %r330, %r329, 255; mul.lo.s32 %r331, %r71, %r330; cvt.u32.u16 %r332, %rs170; and.b32 %r333, %r332, 255; sub.s32 %r73, %r331, %r333; cvt.u32.u16 %r334, %rs168; and.b32 %r335, %r334, 255; mul.lo.s32 %r336, %r72, %r335; cvt.u32.u16 %r337, %rs171; and.b32 %r338, %r337, 255; sub.s32 %r74, %r336, %r338; setp.lt.s32 %p325, %r69, 0; mov.u16 %rs115, 1; mov.u16 %rs497, %rs115; @%p325 bra $L__BB0_163; setp.ge.u32 %p326, %r69, %r105; setp.lt.s32 %p327, %r70, 0; or.pred %p328, %p327, %p326; setp.ge.u32 %p329, %r70, %r106; or.pred %p330, %p328, %p329; setp.lt.s32 %p331, %r74, 0; or.pred %p332, %p330, %p331; setp.ge.u32 %p333, %r74, %r107; or.pred %p334, %p332, %p333; setp.lt.s32 %p335, %r73, 0; or.pred %p336, %p334, %p335; setp.ge.u32 %p337, %r73, %r108; or.pred %p338, %p336, %p337; @%p338 bra $L__BB0_163; mul.lo.s32 %r339, %r69, %r109; mad.lo.s32 %r340, %r70, %r110, %r339; mad.lo.s32 %r341, %r74, %r111, %r340; mad.lo.s32 %r342, %r73, %r112, %r341; mul.wide.u32 %rd57, %r342, 2; add.s64 %rd58, %rd2, %rd57; ld.global.u16 %rs496, [%rd58]; mov.u16 %rs497, 0; $L__BB0_163: or.b32 %r343, %r69, %r70; setp.gt.s32 %p339, %r343, -1; setp.lt.u32 %p340, %r69, %r105; and.pred %p341, %p340, %p339; setp.lt.u32 %p342, %r70, %r106; and.pred %p11, %p341, %p342; not.pred %p343, %p11; setp.lt.s32 %p344, %r74, 0; or.pred %p345, %p343, %p344; setp.ge.u32 %p346, %r74, %r107; or.pred %p347, %p345, %p346; setp.lt.s32 %p348, %r73, -1; or.pred %p349, %p347, %p348; add.s32 %r75, %r73, 1; setp.ge.u32 %p350, %r75, %r108; or.pred %p351, %p349, %p350; @%p351 bra $L__BB0_165; mul.lo.s32 %r344, %r69, %r109; mad.lo.s32 %r345, %r70, %r110, %r344; mad.lo.s32 %r346, %r74, %r111, %r345; mad.lo.s32 %r347, %r75, %r112, %r346; mul.wide.u32 %rd59, %r347, 2; add.s64 %rd60, %rd2, %rd59; ld.global.u16 %rs114, [%rd60]; mov.u16 %rs115, 0; $L__BB0_165: setp.gt.s32 %p352, %r74, -2; and.pred %p353, %p11, %p352; add.s32 %r76, %r74, 1; setp.lt.u32 %p354, %r76, %r107; and.pred %p12, %p353, %p354; not.pred %p355, %p12; setp.lt.s32 %p356, %r73, 0; or.pred %p357, %p355, %p356; setp.ge.u32 %p358, %r73, %r108; mov.u16 %rs121, 1; or.pred %p359, %p357, %p358; mov.u16 %rs118, %rs121; @%p359 bra $L__BB0_167; mul.lo.s32 %r348, %r69, %r109; mad.lo.s32 %r349, %r70, %r110, %r348; mad.lo.s32 %r350, %r76, %r111, %r349; mad.lo.s32 %r351, %r73, %r112, %r350; mul.wide.u32 %rd61, %r351, 2; add.s64 %rd62, %rd2, %rd61; ld.global.u16 %rs117, [%rd62]; mov.u16 %rs118, 0; $L__BB0_167: or.pred %p362, %p355, %p348; or.pred %p364, %p362, %p350; @%p364 bra $L__BB0_169; mul.lo.s32 %r352, %r69, %r109; mad.lo.s32 %r353, %r70, %r110, %r352; mad.lo.s32 %r354, %r76, %r111, %r353; mad.lo.s32 %r355, %r75, %r112, %r354; mul.wide.u32 %rd63, %r355, 2; add.s64 %rd64, %rd2, %rd63; ld.global.u16 %rs120, [%rd64]; mov.u16 %rs121, 0; $L__BB0_169: ld.param.u64 %rd84, [PoolingKernel_param_0]; mov.f64 %fd4, 0d0000000000000000; // begin inline asm { cvt.rn.f16.f64 %rs509, %fd4;} // end inline asm setp.eq.s64 %p365, %rd84, 0; @%p365 bra $L__BB0_189; ld.param.u64 %rd85, [PoolingKernel_param_0]; setp.eq.s64 %p366, %rd85, 1; @%p366 bra $L__BB0_180; ld.param.u64 %rd86, [PoolingKernel_param_0]; setp.ne.s64 %p367, %rd86, 2; @%p367 bra $L__BB0_199; setp.ne.s16 %p368, %rs497, 0; mov.f64 %fd5, 0dC0EFFC0000000000; // begin inline asm { cvt.rn.f16.f64 %rs505, %fd5;} // end inline asm @%p368 bra $L__BB0_174; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs496, %rs505; selp.u16 %rs374, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p369, %rs374, 0; selp.b16 %rs505, %rs505, %rs496, %p369; $L__BB0_174: setp.ne.s16 %p370, %rs115, 0; @%p370 bra $L__BB0_176; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs114, %rs505; selp.u16 %rs377, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p371, %rs377, 0; selp.b16 %rs505, %rs505, %rs114, %p371; $L__BB0_176: setp.ne.s16 %p372, %rs118, 0; @%p372 bra $L__BB0_178; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs117, %rs505; selp.u16 %rs380, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p373, %rs380, 0; selp.b16 %rs505, %rs505, %rs117, %p373; $L__BB0_178: setp.ne.s16 %p374, %rs121, 0; @%p374 bra $L__BB0_200; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs120, %rs505; selp.u16 %rs383, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p375, %rs383, 0; selp.b16 %rs505, %rs505, %rs120, %p375; bra.uni $L__BB0_200; $L__BB0_1: setp.eq.s16 %p16, %rs166, 2; @%p16 bra $L__BB0_48; bra.uni $L__BB0_2; $L__BB0_48: mov.u32 %r27, %ctaid.z; mov.u32 %r28, %ctaid.x; add.s32 %r204, %r116, -1; shr.u32 %r205, %r204, 4; add.s32 %r206, %r205, 1; mov.u32 %r207, %ctaid.y; div.u32 %r208, %r207, %r206; mul.lo.s32 %r209, %r208, %r206; sub.s32 %r210, %r207, %r209; shl.b32 %r211, %r210, 4; shl.b32 %r212, %r208, 4; mov.u32 %r213, %tid.y; add.s32 %r29, %r212, %r213; mov.u32 %r214, %tid.x; add.s32 %r30, %r211, %r214; cvt.u32.u16 %r215, %rs169; and.b32 %r216, %r215, 255; mul.lo.s32 %r217, %r30, %r216; cvt.u32.u16 %r218, %rs170; and.b32 %r219, %r218, 255; sub.s32 %r31, %r217, %r219; cvt.u32.u16 %r220, %rs168; and.b32 %r221, %r220, 255; mul.lo.s32 %r222, %r29, %r221; cvt.u32.u16 %r223, %rs171; and.b32 %r224, %r223, 255; sub.s32 %r32, %r222, %r224; setp.lt.s32 %p129, %r27, 0; mov.u16 %rs16, 1; mov.u16 %rs445, %rs16; @%p129 bra $L__BB0_51; setp.ge.u32 %p130, %r27, %r105; setp.lt.s32 %p131, %r28, 0; or.pred %p132, %p131, %p130; setp.ge.u32 %p133, %r28, %r106; or.pred %p134, %p132, %p133; setp.lt.s32 %p135, %r32, 0; or.pred %p136, %p134, %p135; setp.ge.u32 %p137, %r32, %r107; or.pred %p138, %p136, %p137; setp.lt.s32 %p139, %r31, 0; or.pred %p140, %p138, %p139; setp.ge.u32 %p141, %r31, %r108; or.pred %p142, %p140, %p141; @%p142 bra $L__BB0_51; mul.lo.s32 %r225, %r27, %r109; mad.lo.s32 %r226, %r28, %r110, %r225; mad.lo.s32 %r227, %r32, %r111, %r226; mad.lo.s32 %r228, %r31, %r112, %r227; mul.wide.u32 %rd27, %r228, 4; add.s64 %rd28, %rd2, %rd27; ld.global.f32 %f174, [%rd28]; mov.u16 %rs445, 0; $L__BB0_51: or.b32 %r229, %r27, %r28; setp.gt.s32 %p143, %r229, -1; setp.lt.u32 %p144, %r27, %r105; and.pred %p145, %p144, %p143; setp.lt.u32 %p146, %r28, %r106; and.pred %p5, %p145, %p146; not.pred %p147, %p5; setp.lt.s32 %p148, %r32, 0; or.pred %p149, %p147, %p148; setp.ge.u32 %p150, %r32, %r107; or.pred %p151, %p149, %p150; setp.lt.s32 %p152, %r31, -1; or.pred %p153, %p151, %p152; add.s32 %r33, %r31, 1; setp.ge.u32 %p154, %r33, %r108; or.pred %p155, %p153, %p154; @%p155 bra $L__BB0_53; mul.lo.s32 %r230, %r27, %r109; mad.lo.s32 %r231, %r28, %r110, %r230; mad.lo.s32 %r232, %r32, %r111, %r231; mad.lo.s32 %r233, %r33, %r112, %r232; mul.wide.u32 %rd29, %r233, 4; add.s64 %rd30, %rd2, %rd29; ld.global.f32 %f42, [%rd30]; mov.u16 %rs16, 0; $L__BB0_53: setp.gt.s32 %p156, %r32, -2; and.pred %p157, %p5, %p156; add.s32 %r34, %r32, 1; setp.lt.u32 %p158, %r34, %r107; and.pred %p6, %p157, %p158; not.pred %p159, %p6; setp.lt.s32 %p160, %r31, 0; or.pred %p161, %p159, %p160; setp.ge.u32 %p162, %r31, %r108; mov.u16 %rs18, 1; or.pred %p163, %p161, %p162; mov.u16 %rs17, %rs18; @%p163 bra $L__BB0_55; mul.lo.s32 %r234, %r27, %r109; mad.lo.s32 %r235, %r28, %r110, %r234; mad.lo.s32 %r236, %r34, %r111, %r235; mad.lo.s32 %r237, %r31, %r112, %r236; mul.wide.u32 %rd31, %r237, 4; add.s64 %rd32, %rd2, %rd31; ld.global.f32 %f44, [%rd32]; mov.u16 %rs17, 0; $L__BB0_55: or.pred %p166, %p159, %p152; or.pred %p168, %p166, %p154; @%p168 bra $L__BB0_57; mul.lo.s32 %r238, %r27, %r109; mad.lo.s32 %r239, %r28, %r110, %r238; mad.lo.s32 %r240, %r34, %r111, %r239; mad.lo.s32 %r241, %r33, %r112, %r240; mul.wide.u32 %rd33, %r241, 4; add.s64 %rd34, %rd2, %rd33; ld.global.f32 %f46, [%rd34]; mov.u16 %rs18, 0; $L__BB0_57: mov.f32 %f179, 0f00000000; setp.eq.s64 %p169, %rd4, 0; @%p169 bra $L__BB0_69; setp.eq.s64 %p170, %rd4, 1; @%p170 bra $L__BB0_68; setp.ne.s64 %p171, %rd4, 2; @%p171 bra $L__BB0_70; setp.ne.s16 %p172, %rs445, 0; mov.f32 %f179, 0fFF7FFFEE; @%p172 bra $L__BB0_62; mov.f32 %f122, 0fFF7FFFEE; max.f32 %f179, %f122, %f174; $L__BB0_62: setp.ne.s16 %p173, %rs16, 0; @%p173 bra $L__BB0_64; max.f32 %f179, %f179, %f42; $L__BB0_64: setp.ne.s16 %p174, %rs17, 0; @%p174 bra $L__BB0_66; max.f32 %f179, %f179, %f44; $L__BB0_66: setp.ne.s16 %p175, %rs18, 0; @%p175 bra $L__BB0_70; max.f32 %f179, %f179, %f46; bra.uni $L__BB0_70; $L__BB0_74: setp.ne.s16 %p199, %rs166, 3; @%p199 bra $L__BB0_203; mov.u32 %r40, %tid.x; setp.ge.u32 %p200, %r40, %r124; @%p200 bra $L__BB0_203; ld.param.u32 %r363, [PoolingKernel_param_0+112]; mov.u32 %r41, %ctaid.z; mov.u32 %r253, %ctaid.x; mov.u32 %r254, %ctaid.y; div.u32 %r255, %r254, %r363; mul.lo.s32 %r256, %r255, %r363; sub.s32 %r257, %r254, %r256; div.u32 %r258, %r40, %r121; div.u32 %r259, %r258, %r122; mul.lo.s32 %r260, %r259, %r122; sub.s32 %r261, %r258, %r260; mul.lo.s32 %r262, %r258, %r121; sub.s32 %r263, %r40, %r262; mad.lo.s32 %r42, %r121, %r253, %r263; mad.lo.s32 %r43, %r122, %r257, %r261; mad.lo.s32 %r44, %r123, %r255, %r259; cvt.u32.u16 %r264, %rs169; and.b32 %r265, %r264, 255; mul.lo.s32 %r266, %r43, %r265; cvt.u32.u16 %r267, %rs170; and.b32 %r268, %r267, 255; sub.s32 %r45, %r266, %r268; cvt.u32.u16 %r269, %rs168; and.b32 %r270, %r269, 255; mul.lo.s32 %r271, %r44, %r270; cvt.u32.u16 %r272, %rs171; and.b32 %r273, %r272, 255; sub.s32 %r46, %r271, %r273; setp.lt.s32 %p201, %r41, 0; mov.u16 %rs24, 1; mov.u16 %rs450, %rs24; @%p201 bra $L__BB0_79; setp.ge.u32 %p202, %r41, %r105; setp.lt.s32 %p203, %r42, 0; or.pred %p204, %p203, %p202; setp.ge.u32 %p205, %r42, %r106; or.pred %p206, %p204, %p205; setp.lt.s32 %p207, %r46, 0; or.pred %p208, %p206, %p207; setp.ge.u32 %p209, %r46, %r107; or.pred %p210, %p208, %p209; setp.lt.s32 %p211, %r45, 0; or.pred %p212, %p210, %p211; setp.ge.u32 %p213, %r45, %r108; or.pred %p214, %p212, %p213; @%p214 bra $L__BB0_79; mul.lo.s32 %r274, %r41, %r109; mad.lo.s32 %r275, %r42, %r110, %r274; mad.lo.s32 %r276, %r46, %r111, %r275; mad.lo.s32 %r277, %r45, %r112, %r276; mul.wide.u32 %rd37, %r277, 2; add.s64 %rd38, %rd2, %rd37; ld.global.u16 %rs449, [%rd38]; mov.u16 %rs450, 0; $L__BB0_79: or.b32 %r278, %r41, %r42; setp.gt.s32 %p215, %r278, -1; setp.lt.u32 %p216, %r41, %r105; and.pred %p217, %p216, %p215; setp.lt.u32 %p218, %r42, %r106; and.pred %p7, %p217, %p218; setp.gt.s32 %p219, %r46, -1; and.pred %p220, %p7, %p219; setp.lt.u32 %p221, %r46, %r107; and.pred %p8, %p220, %p221; not.pred %p222, %p8; setp.lt.s32 %p223, %r45, -1; or.pred %p224, %p222, %p223; add.s32 %r47, %r45, 1; setp.ge.u32 %p225, %r47, %r108; or.pred %p226, %p224, %p225; @%p226 bra $L__BB0_81; mul.lo.s32 %r279, %r41, %r109; mad.lo.s32 %r280, %r42, %r110, %r279; mad.lo.s32 %r281, %r46, %r111, %r280; mad.lo.s32 %r282, %r47, %r112, %r281; mul.wide.u32 %rd39, %r282, 2; add.s64 %rd40, %rd2, %rd39; ld.global.u16 %rs23, [%rd40]; mov.u16 %rs24, 0; $L__BB0_81: setp.lt.u32 %p412, %r46, %r107; and.pred %p411, %p220, %p412; not.pred %p410, %p411; setp.lt.s32 %p227, %r45, -2; or.pred %p229, %p410, %p227; add.s32 %r48, %r45, 2; setp.ge.u32 %p230, %r48, %r108; mov.u16 %rs27, 1; or.pred %p231, %p229, %p230; @%p231 bra $L__BB0_83; mov.u32 %r383, %ctaid.z; mul.lo.s32 %r283, %r383, %r109; mad.lo.s32 %r284, %r42, %r110, %r283; mad.lo.s32 %r285, %r46, %r111, %r284; mad.lo.s32 %r286, %r48, %r112, %r285; mul.wide.u32 %rd41, %r286, 2; add.s64 %rd42, %rd2, %rd41; ld.global.u16 %rs26, [%rd42]; mov.u16 %rs27, 0; $L__BB0_83: mov.u16 %rs30, 1; setp.gt.s32 %p232, %r46, -2; and.pred %p233, %p7, %p232; add.s32 %r49, %r46, 1; setp.lt.u32 %p234, %r49, %r107; and.pred %p9, %p233, %p234; not.pred %p235, %p9; setp.lt.s32 %p236, %r45, 0; or.pred %p237, %p235, %p236; setp.ge.u32 %p238, %r45, %r108; or.pred %p239, %p237, %p238; @%p239 bra $L__BB0_85; add.s32 %r385, %r46, 1; mov.u32 %r382, %ctaid.z; mul.lo.s32 %r287, %r382, %r109; mad.lo.s32 %r288, %r42, %r110, %r287; mad.lo.s32 %r289, %r385, %r111, %r288; mad.lo.s32 %r290, %r45, %r112, %r289; mul.wide.u32 %rd43, %r290, 2; add.s64 %rd44, %rd2, %rd43; ld.global.u16 %rs29, [%rd44]; mov.u16 %rs30, 0; $L__BB0_85: add.s32 %r379, %r45, 1; setp.ge.u32 %p414, %r379, %r108; setp.lt.s32 %p413, %r45, -1; or.pred %p242, %p235, %p413; mov.u16 %rs33, 1; or.pred %p244, %p242, %p414; @%p244 bra $L__BB0_87; add.s32 %r384, %r46, 1; add.s32 %r381, %r45, 1; mov.u32 %r380, %ctaid.z; mul.lo.s32 %r291, %r380, %r109; mad.lo.s32 %r292, %r42, %r110, %r291; mad.lo.s32 %r293, %r384, %r111, %r292; mad.lo.s32 %r294, %r381, %r112, %r293; mul.wide.u32 %rd45, %r294, 2; add.s64 %rd46, %rd2, %rd45; ld.global.u16 %rs32, [%rd46]; mov.u16 %rs33, 0; $L__BB0_87: add.s32 %r365, %r45, 2; setp.ge.u32 %p402, %r365, %r108; mov.u16 %rs36, 1; setp.lt.s32 %p401, %r45, -2; or.pred %p247, %p235, %p401; or.pred %p249, %p247, %p402; @%p249 bra $L__BB0_89; add.s32 %r378, %r46, 1; add.s32 %r377, %r45, 2; mov.u32 %r376, %ctaid.z; mul.lo.s32 %r295, %r376, %r109; mad.lo.s32 %r296, %r42, %r110, %r295; mad.lo.s32 %r297, %r378, %r111, %r296; mad.lo.s32 %r298, %r377, %r112, %r297; mul.wide.u32 %rd47, %r298, 2; add.s64 %rd48, %rd2, %rd47; ld.global.u16 %rs35, [%rd48]; mov.u16 %rs36, 0; $L__BB0_89: setp.ge.u32 %p404, %r45, %r108; setp.lt.s32 %p403, %r45, 0; setp.gt.s32 %p250, %r46, -3; and.pred %p251, %p7, %p250; add.s32 %r50, %r46, 2; setp.lt.u32 %p252, %r50, %r107; and.pred %p10, %p251, %p252; not.pred %p253, %p10; or.pred %p255, %p253, %p403; mov.u16 %rs39, 1; or.pred %p257, %p255, %p404; @%p257 bra $L__BB0_91; ld.param.u64 %rd79, [PoolingKernel_param_0+8]; cvta.to.global.u64 %rd78, %rd79; mov.u32 %r375, %ctaid.z; mul.lo.s32 %r299, %r375, %r109; mad.lo.s32 %r300, %r42, %r110, %r299; mad.lo.s32 %r301, %r50, %r111, %r300; mad.lo.s32 %r302, %r45, %r112, %r301; mul.wide.u32 %rd49, %r302, 2; add.s64 %rd50, %rd78, %rd49; ld.global.u16 %rs38, [%rd50]; mov.u16 %rs39, 0; $L__BB0_91: mov.u16 %rs42, 1; add.s32 %r366, %r45, 1; setp.ge.u32 %p406, %r366, %r108; setp.lt.s32 %p405, %r45, -1; or.pred %p260, %p253, %p405; or.pred %p262, %p260, %p406; @%p262 bra $L__BB0_93; ld.param.u64 %rd77, [PoolingKernel_param_0+8]; cvta.to.global.u64 %rd76, %rd77; add.s32 %r374, %r45, 1; mov.u32 %r373, %ctaid.z; mul.lo.s32 %r303, %r373, %r109; mad.lo.s32 %r304, %r42, %r110, %r303; mad.lo.s32 %r305, %r50, %r111, %r304; mad.lo.s32 %r306, %r374, %r112, %r305; mul.wide.u32 %rd51, %r306, 2; add.s64 %rd52, %rd76, %rd51; ld.global.u16 %rs41, [%rd52]; mov.u16 %rs42, 0; $L__BB0_93: add.s32 %r367, %r45, 2; setp.ge.u32 %p408, %r367, %r108; setp.lt.s32 %p407, %r45, -2; or.pred %p265, %p253, %p407; mov.u16 %rs45, 1; or.pred %p267, %p265, %p408; @%p267 bra $L__BB0_95; ld.param.u64 %rd75, [PoolingKernel_param_0+8]; cvta.to.global.u64 %rd74, %rd75; add.s32 %r372, %r45, 2; mov.u32 %r371, %ctaid.z; mul.lo.s32 %r307, %r371, %r109; mad.lo.s32 %r308, %r42, %r110, %r307; mad.lo.s32 %r309, %r50, %r111, %r308; mad.lo.s32 %r310, %r372, %r112, %r309; mul.wide.u32 %rd53, %r310, 2; add.s64 %rd54, %rd74, %rd53; ld.global.u16 %rs44, [%rd54]; mov.u16 %rs45, 0; $L__BB0_95: ld.param.u64 %rd67, [PoolingKernel_param_0]; mov.f64 %fd1, 0d0000000000000000; // begin inline asm { cvt.rn.f16.f64 %rs477, %fd1;} // end inline asm setp.eq.s64 %p268, %rd67, 0; @%p268 bra $L__BB0_135; ld.param.u64 %rd68, [PoolingKernel_param_0]; setp.eq.s64 %p269, %rd68, 1; @%p269 bra $L__BB0_116; ld.param.u64 %rd69, [PoolingKernel_param_0]; setp.ne.s64 %p270, %rd69, 2; @%p270 bra $L__BB0_155; setp.ne.s16 %p271, %rs450, 0; mov.f64 %fd2, 0dC0EFFC0000000000; // begin inline asm { cvt.rn.f16.f64 %rs468, %fd2;} // end inline asm @%p271 bra $L__BB0_100; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs449, %rs468; selp.u16 %rs239, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p272, %rs239, 0; selp.b16 %rs468, %rs468, %rs449, %p272; $L__BB0_100: setp.ne.s16 %p273, %rs24, 0; @%p273 bra $L__BB0_102; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs23, %rs468; selp.u16 %rs242, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p274, %rs242, 0; selp.b16 %rs468, %rs468, %rs23, %p274; $L__BB0_102: setp.ne.s16 %p275, %rs27, 0; @%p275 bra $L__BB0_104; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs26, %rs468; selp.u16 %rs245, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p276, %rs245, 0; selp.b16 %rs468, %rs468, %rs26, %p276; $L__BB0_104: setp.ne.s16 %p277, %rs30, 0; @%p277 bra $L__BB0_106; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs29, %rs468; selp.u16 %rs248, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p278, %rs248, 0; selp.b16 %rs468, %rs468, %rs29, %p278; $L__BB0_106: setp.ne.s16 %p279, %rs33, 0; @%p279 bra $L__BB0_108; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs32, %rs468; selp.u16 %rs251, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p280, %rs251, 0; selp.b16 %rs468, %rs468, %rs32, %p280; $L__BB0_108: setp.ne.s16 %p281, %rs36, 0; @%p281 bra $L__BB0_110; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs35, %rs468; selp.u16 %rs254, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p282, %rs254, 0; selp.b16 %rs468, %rs468, %rs35, %p282; $L__BB0_110: setp.ne.s16 %p283, %rs39, 0; @%p283 bra $L__BB0_112; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs38, %rs468; selp.u16 %rs257, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p284, %rs257, 0; selp.b16 %rs468, %rs468, %rs38, %p284; $L__BB0_112: setp.ne.s16 %p285, %rs42, 0; @%p285 bra $L__BB0_114; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs41, %rs468; selp.u16 %rs260, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p286, %rs260, 0; selp.b16 %rs468, %rs468, %rs41, %p286; $L__BB0_114: setp.ne.s16 %p287, %rs45, 0; @%p287 bra $L__BB0_156; // begin inline asm { .reg .pred __$temp3; setp.gt.f16 __$temp3, %rs44, %rs468; selp.u16 %rs263, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p288, %rs263, 0; selp.b16 %rs468, %rs468, %rs44, %p288; bra.uni $L__BB0_156; $L__BB0_2: setp.ne.s16 %p17, %rs166, 3; @%p17 bra $L__BB0_203; mov.u32 %r17, %ctaid.z; mov.u32 %r18, %ctaid.x; add.s32 %r125, %r116, -1; shr.u32 %r126, %r125, 4; add.s32 %r127, %r126, 1; mov.u32 %r128, %ctaid.y; div.u32 %r129, %r128, %r127; mul.lo.s32 %r130, %r129, %r127; sub.s32 %r131, %r128, %r130; shl.b32 %r132, %r131, 4; shl.b32 %r133, %r129, 4; mov.u32 %r134, %tid.y; add.s32 %r19, %r133, %r134; mov.u32 %r135, %tid.x; add.s32 %r20, %r132, %r135; cvt.u32.u16 %r136, %rs169; and.b32 %r137, %r136, 255; mul.lo.s32 %r138, %r20, %r137; cvt.u32.u16 %r139, %rs170; and.b32 %r140, %r139, 255; sub.s32 %r21, %r138, %r140; cvt.u32.u16 %r141, %rs168; and.b32 %r142, %r141, 255; mul.lo.s32 %r143, %r19, %r142; cvt.u32.u16 %r144, %rs171; and.b32 %r145, %r144, 255; sub.s32 %r22, %r143, %r145; setp.lt.s32 %p18, %r17, 0; mov.u16 %rs7, 1; mov.u16 %rs436, %rs7; @%p18 bra $L__BB0_6; setp.ge.u32 %p19, %r17, %r105; setp.lt.s32 %p20, %r18, 0; or.pred %p21, %p20, %p19; setp.ge.u32 %p22, %r18, %r106; or.pred %p23, %p21, %p22; setp.lt.s32 %p24, %r22, 0; or.pred %p25, %p23, %p24; setp.ge.u32 %p26, %r22, %r107; or.pred %p27, %p25, %p26; setp.lt.s32 %p28, %r21, 0; or.pred %p29, %p27, %p28; setp.ge.u32 %p30, %r21, %r108; or.pred %p31, %p29, %p30; @%p31 bra $L__BB0_6; mul.lo.s32 %r146, %r17, %r109; mad.lo.s32 %r147, %r18, %r110, %r146; mad.lo.s32 %r148, %r22, %r111, %r147; mad.lo.s32 %r149, %r21, %r112, %r148; mul.wide.u32 %rd7, %r149, 4; add.s64 %rd8, %rd2, %rd7; ld.global.f32 %f156, [%rd8]; mov.u16 %rs436, 0; $L__BB0_6: or.b32 %r150, %r17, %r18; setp.gt.s32 %p32, %r150, -1; setp.lt.u32 %p33, %r17, %r105; and.pred %p34, %p33, %p32; setp.lt.u32 %p35, %r18, %r106; and.pred %p1, %p34, %p35; setp.gt.s32 %p36, %r22, -1; and.pred %p37, %p1, %p36; setp.lt.u32 %p38, %r22, %r107; and.pred %p2, %p37, %p38; not.pred %p39, %p2; setp.lt.s32 %p40, %r21, -1; or.pred %p41, %p39, %p40; add.s32 %r23, %r21, 1; setp.ge.u32 %p42, %r23, %r108; or.pred %p43, %p41, %p42; @%p43 bra $L__BB0_8; mul.lo.s32 %r151, %r17, %r109; mad.lo.s32 %r152, %r18, %r110, %r151; mad.lo.s32 %r153, %r22, %r111, %r152; mad.lo.s32 %r154, %r23, %r112, %r153; mul.wide.u32 %rd9, %r154, 4; add.s64 %rd10, %rd2, %rd9; ld.global.f32 %f4, [%rd10]; mov.u16 %rs7, 0; $L__BB0_8: setp.lt.s32 %p44, %r21, -2; or.pred %p46, %p39, %p44; add.s32 %r24, %r21, 2; setp.ge.u32 %p47, %r24, %r108; mov.u16 %rs9, 1; or.pred %p48, %p46, %p47; mov.u16 %rs8, %rs9; @%p48 bra $L__BB0_10; mul.lo.s32 %r155, %r17, %r109; mad.lo.s32 %r156, %r18, %r110, %r155; mad.lo.s32 %r157, %r22, %r111, %r156; mad.lo.s32 %r158, %r24, %r112, %r157; mul.wide.u32 %rd11, %r158, 4; add.s64 %rd12, %rd2, %rd11; ld.global.f32 %f6, [%rd12]; mov.u16 %rs8, 0; $L__BB0_10: setp.gt.s32 %p49, %r22, -2; and.pred %p50, %p1, %p49; add.s32 %r25, %r22, 1; setp.lt.u32 %p51, %r25, %r107; and.pred %p3, %p50, %p51; not.pred %p52, %p3; setp.lt.s32 %p53, %r21, 0; or.pred %p54, %p52, %p53; setp.ge.u32 %p55, %r21, %r108; or.pred %p56, %p54, %p55; @%p56 bra $L__BB0_12; mul.lo.s32 %r159, %r17, %r109; mad.lo.s32 %r160, %r18, %r110, %r159; mad.lo.s32 %r161, %r25, %r111, %r160; mad.lo.s32 %r162, %r21, %r112, %r161; mul.wide.u32 %rd13, %r162, 4; add.s64 %rd14, %rd2, %rd13; ld.global.f32 %f8, [%rd14]; mov.u16 %rs9, 0; $L__BB0_12: or.pred %p59, %p52, %p40; mov.u16 %rs11, 1; or.pred %p61, %p59, %p42; mov.u16 %rs10, %rs11; @%p61 bra $L__BB0_14; mul.lo.s32 %r163, %r17, %r109; mad.lo.s32 %r164, %r18, %r110, %r163; mad.lo.s32 %r165, %r25, %r111, %r164; mad.lo.s32 %r166, %r23, %r112, %r165; mul.wide.u32 %rd15, %r166, 4; add.s64 %rd16, %rd2, %rd15; ld.global.f32 %f10, [%rd16]; mov.u16 %rs10, 0; $L__BB0_14: or.pred %p64, %p52, %p44; or.pred %p66, %p64, %p47; @%p66 bra $L__BB0_16; mul.lo.s32 %r167, %r17, %r109; mad.lo.s32 %r168, %r18, %r110, %r167; mad.lo.s32 %r169, %r25, %r111, %r168; mad.lo.s32 %r170, %r24, %r112, %r169; mul.wide.u32 %rd17, %r170, 4; add.s64 %rd18, %rd2, %rd17; ld.global.f32 %f12, [%rd18]; mov.u16 %rs11, 0; $L__BB0_16: setp.gt.s32 %p67, %r22, -3; and.pred %p68, %p1, %p67; add.s32 %r26, %r22, 2; setp.lt.u32 %p69, %r26, %r107; and.pred %p4, %p68, %p69; not.pred %p70, %p4; or.pred %p72, %p70, %p53; mov.u16 %rs13, 1; or.pred %p74, %p72, %p55; mov.u16 %rs12, %rs13; @%p74 bra $L__BB0_18; mul.lo.s32 %r171, %r17, %r109; mad.lo.s32 %r172, %r18, %r110, %r171; mad.lo.s32 %r173, %r26, %r111, %r172; mad.lo.s32 %r174, %r21, %r112, %r173; mul.wide.u32 %rd19, %r174, 4; add.s64 %rd20, %rd2, %rd19; ld.global.f32 %f14, [%rd20]; mov.u16 %rs12, 0; $L__BB0_18: or.pred %p77, %p70, %p40; or.pred %p79, %p77, %p42; @%p79 bra $L__BB0_20; mul.lo.s32 %r175, %r17, %r109; mad.lo.s32 %r176, %r18, %r110, %r175; mad.lo.s32 %r177, %r26, %r111, %r176; mad.lo.s32 %r178, %r23, %r112, %r177; mul.wide.u32 %rd21, %r178, 4; add.s64 %rd22, %rd2, %rd21; ld.global.f32 %f16, [%rd22]; mov.u16 %rs13, 0; $L__BB0_20: or.pred %p82, %p70, %p44; mov.u16 %rs14, 1; or.pred %p84, %p82, %p47; @%p84 bra $L__BB0_22; mul.lo.s32 %r179, %r17, %r109; mad.lo.s32 %r180, %r18, %r110, %r179; mad.lo.s32 %r181, %r26, %r111, %r180; mad.lo.s32 %r182, %r24, %r112, %r181; mul.wide.u32 %rd23, %r182, 4; add.s64 %rd24, %rd2, %rd23; ld.global.f32 %f18, [%rd24]; mov.u16 %rs14, 0; $L__BB0_22: mov.f32 %f166, 0f00000000; setp.eq.s64 %p85, %rd4, 0; @%p85 bra $L__BB0_44; setp.eq.s64 %p86, %rd4, 1; @%p86 bra $L__BB0_43; setp.ne.s64 %p87, %rd4, 2; @%p87 bra $L__BB0_45; setp.ne.s16 %p88, %rs436, 0; mov.f32 %f166, 0fFF7FFFEE; @%p88 bra $L__BB0_27; mov.f32 %f77, 0fFF7FFFEE; max.f32 %f166, %f77, %f156; $L__BB0_27: setp.ne.s16 %p89, %rs7, 0; @%p89 bra $L__BB0_29; max.f32 %f166, %f166, %f4; $L__BB0_29: setp.ne.s16 %p90, %rs8, 0; @%p90 bra $L__BB0_31; max.f32 %f166, %f166, %f6; $L__BB0_31: setp.ne.s16 %p91, %rs9, 0; @%p91 bra $L__BB0_33; max.f32 %f166, %f166, %f8; $L__BB0_33: setp.ne.s16 %p92, %rs10, 0; @%p92 bra $L__BB0_35; max.f32 %f166, %f166, %f10; $L__BB0_35: setp.ne.s16 %p93, %rs11, 0; @%p93 bra $L__BB0_37; max.f32 %f166, %f166, %f12; $L__BB0_37: setp.ne.s16 %p94, %rs12, 0; @%p94 bra $L__BB0_39; max.f32 %f166, %f166, %f14; $L__BB0_39: setp.ne.s16 %p95, %rs13, 0; @%p95 bra $L__BB0_41; max.f32 %f166, %f166, %f16; $L__BB0_41: setp.ne.s16 %p96, %rs14, 0; @%p96 bra $L__BB0_45; max.f32 %f166, %f166, %f18; bra.uni $L__BB0_45; $L__BB0_69: setp.eq.s16 %p180, %rs445, 0; selp.u32 %r242, 1, 0, %p180; add.f32 %f131, %f174, 0f00000000; selp.f32 %f132, %f131, 0f00000000, %p180; selp.b32 %r243, 2, 1, %p180; add.f32 %f133, %f132, %f42; setp.eq.s16 %p181, %rs16, 0; selp.b32 %r244, %r243, %r242, %p181; selp.f32 %f134, %f133, %f132, %p181; add.f32 %f135, %f134, %f44; setp.eq.s16 %p182, %rs17, 0; selp.u32 %r245, 1, 0, %p182; add.s32 %r246, %r244, %r245; selp.f32 %f136, %f135, %f134, %p182; add.f32 %f137, %f136, %f46; setp.eq.s16 %p183, %rs18, 0; selp.u32 %r247, 1, 0, %p183; add.s32 %r248, %r246, %r247; selp.f32 %f138, %f137, %f136, %p183; cvt.rn.f32.s32 %f139, %r248; div.rn.f32 %f179, %f138, %f139; bra.uni $L__BB0_70; $L__BB0_68: setp.eq.s16 %p176, %rs445, 0; fma.rn.f32 %f123, %f174, %f174, 0f00000000; selp.f32 %f124, %f123, 0f00000000, %p176; fma.rn.f32 %f125, %f42, %f42, %f124; setp.eq.s16 %p177, %rs16, 0; selp.f32 %f126, %f125, %f124, %p177; fma.rn.f32 %f127, %f44, %f44, %f126; setp.eq.s16 %p178, %rs17, 0; selp.f32 %f128, %f127, %f126, %p178; fma.rn.f32 %f129, %f46, %f46, %f128; setp.eq.s16 %p179, %rs18, 0; selp.f32 %f130, %f129, %f128, %p179; sqrt.rn.f32 %f179, %f130; $L__BB0_70: @%p129 bra $L__BB0_203; setp.ge.u32 %p185, %r27, %r113; setp.lt.s32 %p186, %r28, 0; or.pred %p187, %p186, %p185; setp.ge.u32 %p188, %r28, %r114; or.pred %p189, %p187, %p188; setp.lt.s32 %p190, %r29, 0; or.pred %p191, %p189, %p190; setp.ge.u32 %p192, %r29, %r115; or.pred %p193, %p191, %p192; setp.lt.s32 %p194, %r30, 0; or.pred %p195, %p193, %p194; setp.ge.u32 %p196, %r30, %r116; or.pred %p197, %p195, %p196; @%p197 bra $L__BB0_203; ld.param.u64 %rd83, [PoolingKernel_param_0+16]; cvta.to.global.u64 %rd82, %rd83; mul.lo.s32 %r249, %r27, %r117; mad.lo.s32 %r250, %r28, %r118, %r249; mad.lo.s32 %r251, %r29, %r119, %r250; mad.lo.s32 %r252, %r30, %r120, %r251; mul.wide.u32 %rd35, %r252, 4; add.s64 %rd36, %rd82, %rd35; st.global.f32 [%rd36], %f179; bra.uni $L__BB0_203; $L__BB0_189: setp.ne.s16 %p380, %rs497, 0; mov.u32 %r396, 0; @%p380 bra $L__BB0_191; // begin inline asm {add.f16 %rs509,%rs509,%rs496; } // end inline asm mov.u32 %r396, 1; $L__BB0_191: setp.ne.s16 %p381, %rs115, 0; @%p381 bra $L__BB0_193; add.s32 %r396, %r396, 1; // begin inline asm {add.f16 %rs509,%rs509,%rs114; } // end inline asm $L__BB0_193: setp.ne.s16 %p382, %rs118, 0; @%p382 bra $L__BB0_195; add.s32 %r396, %r396, 1; // begin inline asm {add.f16 %rs509,%rs509,%rs117; } // end inline asm $L__BB0_195: setp.ne.s16 %p383, %rs121, 0; @%p383 bra $L__BB0_197; add.s32 %r396, %r396, 1; // begin inline asm {add.f16 %rs509,%rs509,%rs120; } // end inline asm $L__BB0_197: // begin inline asm cvt.rn.f16.s32 %rs424, %r396; // end inline asm // begin inline asm { cvt.f32.f16 %f148, %rs509;} // end inline asm // begin inline asm { cvt.f32.f16 %f149, %rs424;} // end inline asm // begin inline asm {rcp.approx.ftz.f32 %f150, %f149; } // end inline asm mul.f32 %f152, %f148, %f150; // begin inline asm { cvt.rn.f16.f32 %rs505, %f152;} // end inline asm and.b16 %rs429, %rs505, 32767; mov.u16 %rs430, 143; // begin inline asm { .reg .pred __$temp3; setp.lt.f16 __$temp3, %rs429, %rs430; selp.u16 %rs428, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p384, %rs428, 0; setp.eq.s16 %p385, %rs429, 0; or.pred %p386, %p384, %p385; @%p386 bra $L__BB0_200; neg.f32 %f154, %f149; fma.rn.f32 %f155, %f154, %f152, %f148; fma.rn.f32 %f153, %f150, %f155, %f152; // begin inline asm { cvt.rn.f16.f32 %rs505, %f153;} // end inline asm bra.uni $L__BB0_200; $L__BB0_180: setp.ne.s16 %p376, %rs497, 0; @%p376 bra $L__BB0_182; // begin inline asm {mul.f16 %rs386,%rs496,%rs496; } // end inline asm // begin inline asm {add.f16 %rs509,%rs509,%rs386; } // end inline asm $L__BB0_182: setp.ne.s16 %p377, %rs115, 0; @%p377 bra $L__BB0_184; // begin inline asm {mul.f16 %rs392,%rs114,%rs114; } // end inline asm // begin inline asm {add.f16 %rs509,%rs509,%rs392; } // end inline asm $L__BB0_184: setp.ne.s16 %p378, %rs118, 0; @%p378 bra $L__BB0_186; // begin inline asm {mul.f16 %rs398,%rs117,%rs117; } // end inline asm // begin inline asm {add.f16 %rs509,%rs509,%rs398; } // end inline asm $L__BB0_186: setp.ne.s16 %p379, %rs121, 0; @%p379 bra $L__BB0_188; // begin inline asm {mul.f16 %rs404,%rs120,%rs120; } // end inline asm // begin inline asm {add.f16 %rs509,%rs509,%rs404; } // end inline asm $L__BB0_188: // begin inline asm {.reg.b32 f; .reg.b16 r; mov.b16 r,%rs509; cvt.f32.f16 f,r; sqrt.approx.ftz.f32 f,f; cvt.rn.f16.f32 r,f; mov.b16 %rs505,r; } // end inline asm bra.uni $L__BB0_200; $L__BB0_199: mov.f64 %fd6, 0d0000000000000000; // begin inline asm { cvt.rn.f16.f64 %rs505, %fd6;} // end inline asm $L__BB0_200: @%p325 bra $L__BB0_203; setp.ge.u32 %p388, %r69, %r113; setp.lt.s32 %p389, %r70, 0; or.pred %p390, %p389, %p388; setp.ge.u32 %p391, %r70, %r114; or.pred %p392, %p390, %p391; setp.lt.s32 %p393, %r72, 0; or.pred %p394, %p392, %p393; setp.ge.u32 %p395, %r72, %r115; or.pred %p396, %p394, %p395; setp.lt.s32 %p397, %r71, 0; or.pred %p398, %p396, %p397; setp.ge.u32 %p399, %r71, %r116; or.pred %p400, %p398, %p399; @%p400 bra $L__BB0_203; ld.param.u64 %rd73, [PoolingKernel_param_0+16]; cvta.to.global.u64 %rd72, %rd73; mul.lo.s32 %r359, %r69, %r117; mad.lo.s32 %r360, %r70, %r118, %r359; mad.lo.s32 %r361, %r72, %r119, %r360; mad.lo.s32 %r362, %r71, %r120, %r361; mul.wide.u32 %rd65, %r362, 2; add.s64 %rd66, %rd72, %rd65; st.global.u16 [%rd66], %rs505; bra.uni $L__BB0_203; $L__BB0_44: setp.eq.s16 %p106, %rs436, 0; selp.u32 %r183, 1, 0, %p106; add.f32 %f96, %f156, 0f00000000; selp.f32 %f97, %f96, 0f00000000, %p106; selp.b32 %r184, 2, 1, %p106; add.f32 %f98, %f97, %f4; setp.eq.s16 %p107, %rs7, 0; selp.b32 %r185, %r184, %r183, %p107; selp.f32 %f99, %f98, %f97, %p107; add.f32 %f100, %f99, %f6; setp.eq.s16 %p108, %rs8, 0; selp.u32 %r186, 1, 0, %p108; add.s32 %r187, %r185, %r186; selp.f32 %f101, %f100, %f99, %p108; add.f32 %f102, %f101, %f8; setp.eq.s16 %p109, %rs9, 0; selp.u32 %r188, 1, 0, %p109; add.s32 %r189, %r187, %r188; selp.f32 %f103, %f102, %f101, %p109; add.f32 %f104, %f103, %f10; setp.eq.s16 %p110, %rs10, 0; selp.u32 %r190, 1, 0, %p110; add.s32 %r191, %r189, %r190; selp.f32 %f105, %f104, %f103, %p110; add.f32 %f106, %f105, %f12; setp.eq.s16 %p111, %rs11, 0; selp.u32 %r192, 1, 0, %p111; add.s32 %r193, %r191, %r192; selp.f32 %f107, %f106, %f105, %p111; add.f32 %f108, %f107, %f14; setp.eq.s16 %p112, %rs12, 0; selp.u32 %r194, 1, 0, %p112; add.s32 %r195, %r193, %r194; selp.f32 %f109, %f108, %f107, %p112; add.f32 %f110, %f109, %f16; setp.eq.s16 %p113, %rs13, 0; selp.u32 %r196, 1, 0, %p113; add.s32 %r197, %r195, %r196; selp.f32 %f111, %f110, %f109, %p113; add.f32 %f112, %f111, %f18; setp.eq.s16 %p114, %rs14, 0; selp.u32 %r198, 1, 0, %p114; add.s32 %r199, %r197, %r198; selp.f32 %f113, %f112, %f111, %p114; cvt.rn.f32.s32 %f114, %r199; div.rn.f32 %f166, %f113, %f114; bra.uni $L__BB0_45; $L__BB0_43: setp.eq.s16 %p97, %rs436, 0; fma.rn.f32 %f78, %f156, %f156, 0f00000000; selp.f32 %f79, %f78, 0f00000000, %p97; fma.rn.f32 %f80, %f4, %f4, %f79; setp.eq.s16 %p98, %rs7, 0; selp.f32 %f81, %f80, %f79, %p98; fma.rn.f32 %f82, %f6, %f6, %f81; setp.eq.s16 %p99, %rs8, 0; selp.f32 %f83, %f82, %f81, %p99; fma.rn.f32 %f84, %f8, %f8, %f83; setp.eq.s16 %p100, %rs9, 0; selp.f32 %f85, %f84, %f83, %p100; fma.rn.f32 %f86, %f10, %f10, %f85; setp.eq.s16 %p101, %rs10, 0; selp.f32 %f87, %f86, %f85, %p101; fma.rn.f32 %f88, %f12, %f12, %f87; setp.eq.s16 %p102, %rs11, 0; selp.f32 %f89, %f88, %f87, %p102; fma.rn.f32 %f90, %f14, %f14, %f89; setp.eq.s16 %p103, %rs12, 0; selp.f32 %f91, %f90, %f89, %p103; fma.rn.f32 %f92, %f16, %f16, %f91; setp.eq.s16 %p104, %rs13, 0; selp.f32 %f93, %f92, %f91, %p104; fma.rn.f32 %f94, %f18, %f18, %f93; setp.eq.s16 %p105, %rs14, 0; selp.f32 %f95, %f94, %f93, %p105; sqrt.rn.f32 %f166, %f95; $L__BB0_45: @%p18 bra $L__BB0_203; setp.ge.u32 %p116, %r17, %r113; setp.lt.s32 %p117, %r18, 0; or.pred %p118, %p117, %p116; setp.ge.u32 %p119, %r18, %r114; or.pred %p120, %p118, %p119; setp.lt.s32 %p121, %r19, 0; or.pred %p122, %p120, %p121; setp.ge.u32 %p123, %r19, %r115; or.pred %p124, %p122, %p123; setp.lt.s32 %p125, %r20, 0; or.pred %p126, %p124, %p125; setp.ge.u32 %p127, %r20, %r116; or.pred %p128, %p126, %p127; @%p128 bra $L__BB0_203; ld.param.u64 %rd81, [PoolingKernel_param_0+16]; cvta.to.global.u64 %rd80, %rd81; mul.lo.s32 %r200, %r17, %r117; mad.lo.s32 %r201, %r18, %r118, %r200; mad.lo.s32 %r202, %r19, %r119, %r201; mad.lo.s32 %r203, %r20, %r120, %r202; mul.wide.u32 %rd25, %r203, 4; add.s64 %rd26, %rd80, %rd25; st.global.f32 [%rd26], %f166; bra.uni $L__BB0_203; $L__BB0_135: setp.ne.s16 %p298, %rs450, 0; mov.u32 %r387, 0; @%p298 bra $L__BB0_137; // begin inline asm {add.f16 %rs477,%rs477,%rs449; } // end inline asm mov.u32 %r387, 1; $L__BB0_137: setp.ne.s16 %p299, %rs24, 0; @%p299 bra $L__BB0_139; add.s32 %r387, %r387, 1; // begin inline asm {add.f16 %rs477,%rs477,%rs23; } // end inline asm $L__BB0_139: setp.ne.s16 %p300, %rs27, 0; @%p300 bra $L__BB0_141; add.s32 %r387, %r387, 1; // begin inline asm {add.f16 %rs477,%rs477,%rs26; } // end inline asm $L__BB0_141: setp.ne.s16 %p301, %rs30, 0; @%p301 bra $L__BB0_143; add.s32 %r387, %r387, 1; // begin inline asm {add.f16 %rs477,%rs477,%rs29; } // end inline asm $L__BB0_143: setp.ne.s16 %p302, %rs33, 0; @%p302 bra $L__BB0_145; add.s32 %r387, %r387, 1; // begin inline asm {add.f16 %rs477,%rs477,%rs32; } // end inline asm $L__BB0_145: setp.ne.s16 %p303, %rs36, 0; @%p303 bra $L__BB0_147; add.s32 %r387, %r387, 1; // begin inline asm {add.f16 %rs477,%rs477,%rs35; } // end inline asm $L__BB0_147: setp.ne.s16 %p304, %rs39, 0; @%p304 bra $L__BB0_149; add.s32 %r387, %r387, 1; // begin inline asm {add.f16 %rs477,%rs477,%rs38; } // end inline asm $L__BB0_149: setp.ne.s16 %p305, %rs42, 0; @%p305 bra $L__BB0_151; add.s32 %r387, %r387, 1; // begin inline asm {add.f16 %rs477,%rs477,%rs41; } // end inline asm $L__BB0_151: setp.ne.s16 %p306, %rs45, 0; @%p306 bra $L__BB0_153; add.s32 %r387, %r387, 1; // begin inline asm {add.f16 %rs477,%rs477,%rs44; } // end inline asm $L__BB0_153: // begin inline asm cvt.rn.f16.s32 %rs349, %r387; // end inline asm // begin inline asm { cvt.f32.f16 %f140, %rs477;} // end inline asm // begin inline asm { cvt.f32.f16 %f141, %rs349;} // end inline asm // begin inline asm {rcp.approx.ftz.f32 %f142, %f141; } // end inline asm mul.f32 %f144, %f140, %f142; // begin inline asm { cvt.rn.f16.f32 %rs468, %f144;} // end inline asm and.b16 %rs354, %rs468, 32767; mov.u16 %rs355, 143; // begin inline asm { .reg .pred __$temp3; setp.lt.f16 __$temp3, %rs354, %rs355; selp.u16 %rs353, 1, 0, __$temp3;} // end inline asm setp.eq.s16 %p307, %rs353, 0; setp.eq.s16 %p308, %rs354, 0; or.pred %p309, %p307, %p308; @%p309 bra $L__BB0_156; neg.f32 %f146, %f141; fma.rn.f32 %f147, %f146, %f144, %f140; fma.rn.f32 %f145, %f142, %f147, %f144; // begin inline asm { cvt.rn.f16.f32 %rs468, %f145;} // end inline asm bra.uni $L__BB0_156; $L__BB0_116: setp.ne.s16 %p289, %rs450, 0; @%p289 bra $L__BB0_118; // begin inline asm {mul.f16 %rs266,%rs449,%rs449; } // end inline asm // begin inline asm {add.f16 %rs477,%rs477,%rs266; } // end inline asm $L__BB0_118: setp.ne.s16 %p290, %rs24, 0; @%p290 bra $L__BB0_120; // begin inline asm {mul.f16 %rs272,%rs23,%rs23; } // end inline asm // begin inline asm {add.f16 %rs477,%rs477,%rs272; } // end inline asm $L__BB0_120: setp.ne.s16 %p291, %rs27, 0; @%p291 bra $L__BB0_122; // begin inline asm {mul.f16 %rs278,%rs26,%rs26; } // end inline asm // begin inline asm {add.f16 %rs477,%rs477,%rs278; } // end inline asm $L__BB0_122: setp.ne.s16 %p292, %rs30, 0; @%p292 bra $L__BB0_124; // begin inline asm {mul.f16 %rs284,%rs29,%rs29; } // end inline asm // begin inline asm {add.f16 %rs477,%rs477,%rs284; } // end inline asm $L__BB0_124: setp.ne.s16 %p293, %rs33, 0; @%p293 bra $L__BB0_126; // begin inline asm {mul.f16 %rs290,%rs32,%rs32; } // end inline asm // begin inline asm {add.f16 %rs477,%rs477,%rs290; } // end inline asm $L__BB0_126: setp.ne.s16 %p294, %rs36, 0; @%p294 bra $L__BB0_128; // begin inline asm {mul.f16 %rs296,%rs35,%rs35; } // end inline asm // begin inline asm {add.f16 %rs477,%rs477,%rs296; } // end inline asm $L__BB0_128: setp.ne.s16 %p295, %rs39, 0; @%p295 bra $L__BB0_130; // begin inline asm {mul.f16 %rs302,%rs38,%rs38; } // end inline asm // begin inline asm {add.f16 %rs477,%rs477,%rs302; } // end inline asm $L__BB0_130: setp.ne.s16 %p296, %rs42, 0; @%p296 bra $L__BB0_132; // begin inline asm {mul.f16 %rs308,%rs41,%rs41; } // end inline asm // begin inline asm {add.f16 %rs477,%rs477,%rs308; } // end inline asm $L__BB0_132: setp.ne.s16 %p297, %rs45, 0; @%p297 bra $L__BB0_134; // begin inline asm {mul.f16 %rs314,%rs44,%rs44; } // end inline asm // begin inline asm {add.f16 %rs477,%rs477,%rs314; } // end inline asm $L__BB0_134: // begin inline asm {.reg.b32 f; .reg.b16 r; mov.b16 r,%rs477; cvt.f32.f16 f,r; sqrt.approx.ftz.f32 f,f; cvt.rn.f16.f32 r,f; mov.b16 %rs468,r; } // end inline asm bra.uni $L__BB0_156; $L__BB0_155: mov.f64 %fd3, 0d0000000000000000; // begin inline asm { cvt.rn.f16.f64 %rs468, %fd3;} // end inline asm $L__BB0_156: mov.u32 %r368, %ctaid.z; setp.lt.s32 %p409, %r368, 0; @%p409 bra $L__BB0_203; mov.u32 %r369, %ctaid.z; setp.ge.u32 %p311, %r369, %r113; setp.lt.s32 %p312, %r42, 0; or.pred %p313, %p312, %p311; setp.ge.u32 %p314, %r42, %r114; or.pred %p315, %p313, %p314; setp.lt.s32 %p316, %r44, 0; or.pred %p317, %p315, %p316; setp.ge.u32 %p318, %r44, %r115; or.pred %p319, %p317, %p318; setp.lt.s32 %p320, %r43, 0; or.pred %p321, %p319, %p320; setp.ge.u32 %p322, %r43, %r116; or.pred %p323, %p321, %p322; @%p323 bra $L__BB0_203; mov.u32 %r370, %ctaid.z; ld.param.u64 %rd71, [PoolingKernel_param_0+16]; cvta.to.global.u64 %rd70, %rd71; mul.lo.s32 %r314, %r370, %r117; mad.lo.s32 %r315, %r42, %r118, %r314; mad.lo.s32 %r316, %r44, %r119, %r315; mad.lo.s32 %r317, %r43, %r120, %r316; mul.wide.u32 %rd55, %r317, 2; add.s64 %rd56, %rd70, %rd55; st.global.u16 [%rd56], %rs468; $L__BB0_203: ret; }