setp.ge.s32 %p3, %r2, %r104; setp.ge.s32 %p4, %r3, %r103; or.pred %p5, %p3, %p4; ld.param.u32 %r107, [depthwise_conv_fp16_nhwc_kernel_param_0+36]; setp.ge.s32 %p6, %r1, %r107; or.pred %p7, %p5, %p6; @%p7 bra $L__BB0_60; ld.param.u32 %r108, [%rd1+32]; setp.lt.s32 %p8, %r108, 1; @%p8 bra $L__BB0_60; ld.param.v4.u8 {%rs3, %rs4, %rs5, %rs6}, [%rd1+144]; mov.u32 %r255, 0; ld.param.v2.u32 {%r110, %r111}, [%rd1+56]; ld.param.u64 %rd14, [%rd1+24]; setp.ne.s64 %p9, %rd14, 0; ld.param.v2.u32 {%r112, %r113}, [%rd1+32]; setp.lt.s32 %p10, %r1, %r113; and.pred %p1, %p9, %p10; setp.ne.s16 %p11, %rs5, 0; setp.eq.s16 %p12, %rs4, 0; or.pred %p2, %p11, %p12; ld.param.u32 %r115, [%rd1+132]; add.s32 %r7, %r115, %r3; ld.param.u32 %r116, [%rd1+120]; add.s32 %r8, %r116, %r2; ld.param.v2.u32 {%r117, %r118}, [%rd1+104]; mul.lo.s32 %r119, %r118, %r2; sub.s32 %r11, %r119, %r116; ld.param.u32 %r120, [%rd1+104]; mul.lo.s32 %r121, %r120, %r3; sub.s32 %r12, %r121, %r115; ld.param.u8 %rs2, [%rd1+145]; ld.param.u32 %r13, [%rd1+56]; ld.param.v2.u32 {%r122, %r123}, [%rd1+112]; ld.param.u32 %r16, [%rd1+52]; ld.param.v2.u32 {%r124, %r125}, [%rd1+48]; ld.param.u64 %rd15, [%rd1+8]; cvta.to.global.u64 %rd2, %rd15; ld.param.u64 %rd16, [%rd1+16]; cvta.to.global.u64 %rd3, %rd16; ld.param.u32 %r19, [%rd1+80]; ld.param.v2.u32 {%r126, %r127}, [%rd1+88]; ld.param.v2.u32 {%r128, %r129}, [%rd1+96]; mul.lo.s32 %r23, %r128, %r1; cvta.to.global.u64 %rd17, %rd14; mul.wide.s32 %rd18, %r1, 2; add.s64 %rd4, %rd17, %rd18; ld.param.v2.f32 {%f39, %f40}, [%rd1+136]; ld.param.u64 %rd19, [%rd1]; cvta.to.global.u64 %rd5, %rd19; ld.param.u32 %r24, [%rd1+64]; ld.param.v2.u32 {%r131, %r132}, [%rd1+72]; mul.lo.s32 %r25, %r131, %r3; mul.lo.s32 %r26, %r132, %r2; ld.param.u32 %r27, [%rd1+112]; ld.param.u32 %r28, [%rd1+48]; ld.param.u32 %r29, [%rd1+60]; max.s32 %r135, %r29, 1; add.s32 %r30, %r135, -1; and.b32 %r31, %r135, 3; sub.s32 %r32, %r135, %r31; and.b32 %r33, %r111, 1; sub.s32 %r34, %r111, %r33; mul.wide.s32 %rd6, %r118, 2; not.pred %p105, %p1; $L__BB0_3: setp.eq.s16 %p13, %rs3, 0; mov.f32 %f85, 0f00000000; @%p13 bra $L__BB0_33; @%p2 bra $L__BB0_13; bra.uni $L__BB0_5; $L__BB0_13: setp.lt.s32 %p23, %r110, 1; @%p23 bra $L__BB0_57; mad.lo.s32 %r50, %r19, %r255, %r1; mov.f32 %f85, 0f00000000; mov.u32 %r259, 0; $L__BB0_15: setp.lt.s32 %p24, %r111, 1; @%p24 bra $L__BB0_32; setp.eq.s32 %p25, %r111, 1; mov.u32 %r262, 0; @%p25 bra $L__BB0_27; not.b32 %r254, %r259; add.s32 %r253, %r110, %r254; setp.eq.s16 %p26, %rs2, 0; selp.b32 %r144, %r253, %r259, %p26; mul.lo.s32 %r145, %r144, %r27; sub.s32 %r54, %r7, %r145; rem.s32 %r55, %r54, %r117; mov.u32 %r261, %r34; $L__BB0_18: mul.lo.s32 %r249, %r129, %r259; setp.eq.s16 %p108, %rs2, 0; not.b32 %r146, %r262; add.s32 %r147, %r111, %r146; selp.b32 %r148, %r147, %r262, %p108; mul.lo.s32 %r149, %r148, %r123; sub.s32 %r58, %r8, %r149; add.s32 %r150, %r23, %r262; add.s32 %r151, %r150, %r249; mul.wide.s32 %rd23, %r151, 2; add.s64 %rd10, %rd3, %rd23; setp.ne.s32 %p28, %r55, 0; @%p28 bra $L__BB0_22; div.s32 %r59, %r54, %r117; rem.s32 %r152, %r58, %r118; setp.ne.s32 %p29, %r152, 0; @%p29 bra $L__BB0_22; div.s32 %r60, %r58, %r118; setp.le.s32 %p30, %r125, %r60; or.b32 %r153, %r60, %r59; setp.lt.s32 %p31, %r153, 0; or.pred %p32, %p31, %p30; setp.le.s32 %p33, %r124, %r59; or.pred %p34, %p32, %p33; @%p34 bra $L__BB0_22; mad.lo.s32 %r154, %r126, %r59, %r50; mad.lo.s32 %r155, %r127, %r60, %r154; mul.wide.s32 %rd24, %r155, 2; add.s64 %rd25, %rd2, %rd24; ld.global.u16 %rs12, [%rd25]; // begin inline asm { cvt.f32.f16 %f48, %rs12;} // end inline asm ld.global.u16 %rs13, [%rd10]; // begin inline asm { cvt.f32.f16 %f49, %rs13;} // end inline asm fma.rn.ftz.f32 %f85, %f48, %f49, %f85; $L__BB0_22: setp.eq.s16 %p109, %rs2, 0; mov.u32 %r156, -2; sub.s32 %r157, %r156, %r262; add.s32 %r158, %r111, %r157; add.s32 %r159, %r262, 1; selp.b32 %r160, %r158, %r159, %p109; mul.lo.s32 %r161, %r160, %r123; sub.s32 %r61, %r8, %r161; @%p28 bra $L__BB0_26; div.s32 %r62, %r54, %r117; rem.s32 %r162, %r61, %r118; setp.ne.s32 %p37, %r162, 0; @%p37 bra $L__BB0_26; div.s32 %r63, %r61, %r118; setp.le.s32 %p38, %r125, %r63; or.b32 %r163, %r63, %r62; setp.lt.s32 %p39, %r163, 0; or.pred %p40, %p39, %p38; setp.le.s32 %p41, %r124, %r62; or.pred %p42, %p40, %p41; @%p42 bra $L__BB0_26; mad.lo.s32 %r164, %r126, %r62, %r50; mad.lo.s32 %r165, %r127, %r63, %r164; mul.wide.s32 %rd26, %r165, 2; add.s64 %rd27, %rd2, %rd26; ld.global.u16 %rs15, [%rd27]; // begin inline asm { cvt.f32.f16 %f50, %rs15;} // end inline asm ld.global.u16 %rs16, [%rd10+2]; // begin inline asm { cvt.f32.f16 %f51, %rs16;} // end inline asm fma.rn.ftz.f32 %f85, %f50, %f51, %f85; $L__BB0_26: add.s32 %r262, %r262, 2; add.s32 %r261, %r261, -2; setp.ne.s32 %p43, %r261, 0; @%p43 bra $L__BB0_18; $L__BB0_27: and.b32 %r244, %r111, 1; setp.eq.s32 %p44, %r244, 0; @%p44 bra $L__BB0_32; not.b32 %r251, %r259; add.s32 %r250, %r110, %r251; setp.eq.s16 %p45, %rs2, 0; not.b32 %r166, %r262; add.s32 %r167, %r111, %r166; selp.b32 %r168, %r167, %r262, %p45; selp.b32 %r169, %r250, %r259, %p45; mul.lo.s32 %r170, %r169, %r27; mul.lo.s32 %r171, %r168, %r123; sub.s32 %r67, %r7, %r170; sub.s32 %r68, %r8, %r171; rem.s32 %r172, %r67, %r117; setp.ne.s32 %p46, %r172, 0; @%p46 bra $L__BB0_32; div.s32 %r69, %r67, %r117; rem.s32 %r173, %r68, %r118; setp.ne.s32 %p47, %r173, 0; @%p47 bra $L__BB0_32; div.s32 %r70, %r68, %r118; setp.le.s32 %p48, %r125, %r70; or.b32 %r174, %r70, %r69; setp.lt.s32 %p49, %r174, 0; or.pred %p50, %p49, %p48; setp.le.s32 %p51, %r124, %r69; or.pred %p52, %p50, %p51; @%p52 bra $L__BB0_32; mul.lo.s32 %r252, %r129, %r259; mad.lo.s32 %r175, %r126, %r69, %r50; mad.lo.s32 %r176, %r127, %r70, %r175; mul.wide.s32 %rd28, %r176, 2; add.s64 %rd29, %rd2, %rd28; ld.global.u16 %rs18, [%rd29]; // begin inline asm { cvt.f32.f16 %f52, %rs18;} // end inline asm add.s32 %r177, %r23, %r262; add.s32 %r178, %r177, %r252; mul.wide.s32 %rd30, %r178, 2; add.s64 %rd31, %rd3, %rd30; ld.global.u16 %rs19, [%rd31]; // begin inline asm { cvt.f32.f16 %f53, %rs19;} // end inline asm fma.rn.ftz.f32 %f85, %f52, %f53, %f85; $L__BB0_32: add.s32 %r259, %r259, 1; setp.lt.s32 %p53, %r259, %r110; @%p53 bra $L__BB0_15; bra.uni $L__BB0_57; $L__BB0_33: setp.lt.s32 %p54, %r110, 1; @%p54 bra $L__BB0_57; mad.lo.s32 %r72, %r19, %r255, %r1; mov.f32 %f85, 0f00000000; mov.u32 %r263, 0; $L__BB0_35: setp.lt.s32 %p55, %r111, 1; @%p55 bra $L__BB0_56; setp.lt.u32 %p56, %r30, 3; not.b32 %r181, %r263; add.s32 %r182, %r13, %r181; mul.lo.s32 %r74, %r182, %r122; mov.u32 %r266, 0; @%p56 bra $L__BB0_47; mul.lo.s32 %r245, %r122, %r263; setp.eq.s16 %p57, %rs2, 0; selp.b32 %r184, %r74, %r245, %p57; add.s32 %r77, %r184, %r12; mad.lo.s32 %r78, %r126, %r77, %r72; mov.u32 %r265, %r32; $L__BB0_38: mul.lo.s32 %r241, %r129, %r263; not.b32 %r185, %r266; add.s32 %r186, %r111, %r185; selp.b32 %r187, %r186, %r266, %p57; mad.lo.s32 %r81, %r123, %r187, %r11; setp.le.s32 %p59, %r16, %r81; or.b32 %r188, %r81, %r77; setp.lt.s32 %p60, %r188, 0; or.pred %p61, %p59, %p60; setp.le.s32 %p62, %r124, %r77; add.s32 %r189, %r23, %r266; add.s32 %r190, %r189, %r241; mul.wide.s32 %rd32, %r190, 2; add.s64 %rd11, %rd3, %rd32; or.pred %p63, %p62, %p61; @%p63 bra $L__BB0_40; mad.lo.s32 %r191, %r127, %r81, %r78; mul.wide.s32 %rd33, %r191, 2; add.s64 %rd34, %rd2, %rd33; ld.global.u16 %rs22, [%rd34]; // begin inline asm { cvt.f32.f16 %f57, %rs22;} // end inline asm ld.global.u16 %rs23, [%rd11]; // begin inline asm { cvt.f32.f16 %f58, %rs23;} // end inline asm fma.rn.ftz.f32 %f85, %f57, %f58, %f85; $L__BB0_40: mov.u32 %r192, -2; sub.s32 %r193, %r192, %r266; add.s32 %r194, %r111, %r193; add.s32 %r195, %r266, 1; selp.b32 %r196, %r194, %r195, %p57; mad.lo.s32 %r82, %r123, %r196, %r11; setp.le.s32 %p65, %r16, %r82; or.b32 %r197, %r82, %r77; setp.lt.s32 %p66, %r197, 0; or.pred %p67, %p65, %p66; or.pred %p69, %p62, %p67; @%p69 bra $L__BB0_42; mad.lo.s32 %r198, %r127, %r82, %r78; mul.wide.s32 %rd35, %r198, 2; add.s64 %rd36, %rd2, %rd35; ld.global.u16 %rs25, [%rd36]; // begin inline asm { cvt.f32.f16 %f59, %rs25;} // end inline asm ld.global.u16 %rs26, [%rd11+2]; // begin inline asm { cvt.f32.f16 %f60, %rs26;} // end inline asm fma.rn.ftz.f32 %f85, %f59, %f60, %f85; $L__BB0_42: mov.u32 %r199, -3; sub.s32 %r200, %r199, %r266; add.s32 %r201, %r111, %r200; add.s32 %r202, %r266, 2; selp.b32 %r203, %r201, %r202, %p57; mad.lo.s32 %r83, %r123, %r203, %r11; setp.le.s32 %p71, %r16, %r83; or.b32 %r204, %r83, %r77; setp.lt.s32 %p72, %r204, 0; or.pred %p73, %p71, %p72; or.pred %p75, %p62, %p73; @%p75 bra $L__BB0_44; mad.lo.s32 %r205, %r127, %r83, %r78; mul.wide.s32 %rd37, %r205, 2; add.s64 %rd38, %rd2, %rd37; ld.global.u16 %rs28, [%rd38]; // begin inline asm { cvt.f32.f16 %f61, %rs28;} // end inline asm ld.global.u16 %rs29, [%rd11+4]; // begin inline asm { cvt.f32.f16 %f62, %rs29;} // end inline asm fma.rn.ftz.f32 %f85, %f61, %f62, %f85; $L__BB0_44: mov.u32 %r206, -4; sub.s32 %r207, %r206, %r266; add.s32 %r208, %r111, %r207; add.s32 %r209, %r266, 3; selp.b32 %r210, %r208, %r209, %p57; mad.lo.s32 %r84, %r123, %r210, %r11; setp.le.s32 %p77, %r16, %r84; or.b32 %r211, %r84, %r77; setp.lt.s32 %p78, %r211, 0; or.pred %p79, %p77, %p78; or.pred %p81, %p62, %p79; @%p81 bra $L__BB0_46; mad.lo.s32 %r212, %r127, %r84, %r78; mul.wide.s32 %rd39, %r212, 2; add.s64 %rd40, %rd2, %rd39; ld.global.u16 %rs31, [%rd40]; // begin inline asm { cvt.f32.f16 %f63, %rs31;} // end inline asm ld.global.u16 %rs32, [%rd11+6]; // begin inline asm { cvt.f32.f16 %f64, %rs32;} // end inline asm fma.rn.ftz.f32 %f85, %f63, %f64, %f85; $L__BB0_46: add.s32 %r266, %r266, 4; add.s32 %r265, %r265, -4; setp.ne.s32 %p82, %r265, 0; @%p82 bra $L__BB0_38; $L__BB0_47: setp.eq.s32 %p83, %r31, 0; @%p83 bra $L__BB0_56; mul.lo.s32 %r243, %r129, %r263; mul.lo.s32 %r242, %r122, %r263; setp.eq.s16 %p84, %rs2, 0; not.b32 %r213, %r266; add.s32 %r214, %r111, %r213; selp.b32 %r215, %r74, %r242, %p84; selp.b32 %r216, %r214, %r266, %p84; add.s32 %r88, %r215, %r12; mad.lo.s32 %r89, %r123, %r216, %r11; setp.le.s32 %p85, %r16, %r89; or.b32 %r217, %r89, %r88; setp.lt.s32 %p86, %r217, 0; or.pred %p87, %p85, %p86; setp.le.s32 %p88, %r124, %r88; add.s32 %r218, %r23, %r266; add.s32 %r219, %r218, %r243; mul.wide.s32 %rd41, %r219, 2; add.s64 %rd12, %rd3, %rd41; or.pred %p89, %p88, %p87; @%p89 bra $L__BB0_50; mad.lo.s32 %r220, %r126, %r88, %r72; mad.lo.s32 %r221, %r127, %r89, %r220; mul.wide.s32 %rd42, %r221, 2; add.s64 %rd43, %rd2, %rd42; ld.global.u16 %rs34, [%rd43]; // begin inline asm { cvt.f32.f16 %f65, %rs34;} // end inline asm ld.global.u16 %rs35, [%rd12]; // begin inline asm { cvt.f32.f16 %f66, %rs35;} // end inline asm fma.rn.ftz.f32 %f85, %f65, %f66, %f85; $L__BB0_50: setp.eq.s32 %p90, %r31, 1; @%p90 bra $L__BB0_56; setp.le.s32 %p111, %r124, %r88; setp.eq.s16 %p110, %rs2, 0; mov.u32 %r222, -2; sub.s32 %r223, %r222, %r266; add.s32 %r224, %r111, %r223; add.s32 %r225, %r266, 1; selp.b32 %r226, %r224, %r225, %p110; mad.lo.s32 %r90, %r123, %r226, %r11; setp.le.s32 %p93, %r16, %r90; or.b32 %r227, %r90, %r88; setp.lt.s32 %p94, %r227, 0; or.pred %p95, %p93, %p94; or.pred %p96, %p111, %p95; @%p96 bra $L__BB0_53; mad.lo.s32 %r228, %r126, %r88, %r72; mad.lo.s32 %r229, %r127, %r90, %r228; mul.wide.s32 %rd44, %r229, 2; add.s64 %rd45, %rd2, %rd44; ld.global.u16 %rs37, [%rd45]; // begin inline asm { cvt.f32.f16 %f67, %rs37;} // end inline asm ld.global.u16 %rs38, [%rd12+2]; // begin inline asm { cvt.f32.f16 %f68, %rs38;} // end inline asm fma.rn.ftz.f32 %f85, %f67, %f68, %f85; $L__BB0_53: setp.eq.s32 %p97, %r31, 2; @%p97 bra $L__BB0_56; setp.le.s32 %p113, %r124, %r88; setp.eq.s16 %p112, %rs2, 0; mov.u32 %r230, -3; sub.s32 %r231, %r230, %r266; add.s32 %r232, %r111, %r231; add.s32 %r233, %r266, 2; selp.b32 %r234, %r232, %r233, %p112; mad.lo.s32 %r91, %r123, %r234, %r11; setp.le.s32 %p100, %r16, %r91; or.b32 %r235, %r91, %r88; setp.lt.s32 %p101, %r235, 0; or.pred %p102, %p100, %p101; or.pred %p103, %p113, %p102; @%p103 bra $L__BB0_56; mad.lo.s32 %r236, %r126, %r88, %r72; mad.lo.s32 %r237, %r127, %r91, %r236; mul.wide.s32 %rd46, %r237, 2; add.s64 %rd47, %rd2, %rd46; ld.global.u16 %rs40, [%rd47]; // begin inline asm { cvt.f32.f16 %f69, %rs40;} // end inline asm ld.global.u16 %rs41, [%rd12+4]; // begin inline asm { cvt.f32.f16 %f70, %rs41;} // end inline asm fma.rn.ftz.f32 %f85, %f69, %f70, %f85; $L__BB0_56: add.s32 %r263, %r263, 1; setp.lt.s32 %p104, %r263, %r110; @%p104 bra $L__BB0_35; bra.uni $L__BB0_57; $L__BB0_5: rem.s32 %r36, %r8, %r118; rem.s32 %r256, %r7, %r117; setp.ge.s32 %p14, %r256, %r110; @%p14 bra $L__BB0_57; add.s32 %r40, %r23, %r36; mov.f32 %f85, 0f00000000; $L__BB0_7: setp.ge.s32 %p15, %r36, %r111; @%p15 bra $L__BB0_12; sub.s32 %r257, %r8, %r36; mad.lo.s32 %r246, %r19, %r255, %r1; sub.s32 %r136, %r7, %r256; div.s32 %r42, %r136, %r117; mad.lo.s32 %r137, %r129, %r256, %r40; mul.wide.s32 %rd20, %r137, 2; add.s64 %rd50, %rd3, %rd20; mov.u32 %r258, %r36; $L__BB0_9: div.s32 %r46, %r257, %r118; setp.le.s32 %p16, %r125, %r46; or.b32 %r138, %r46, %r42; setp.lt.s32 %p17, %r138, 0; or.pred %p18, %p17, %p16; setp.le.s32 %p19, %r28, %r42; or.pred %p20, %p18, %p19; @%p20 bra $L__BB0_11; mad.lo.s32 %r248, %r126, %r42, %r246; mad.lo.s32 %r139, %r127, %r46, %r248; mul.wide.s32 %rd21, %r139, 2; add.s64 %rd22, %rd2, %rd21; ld.global.u16 %rs8, [%rd22]; // begin inline asm { cvt.f32.f16 %f43, %rs8;} // end inline asm ld.global.u16 %rs9, [%rd50]; // begin inline asm { cvt.f32.f16 %f44, %rs9;} // end inline asm fma.rn.ftz.f32 %f85, %f43, %f44, %f85; $L__BB0_11: sub.s32 %r257, %r257, %r118; add.s64 %rd50, %rd50, %rd6; add.s32 %r258, %r258, %r118; setp.lt.s32 %p21, %r258, %r29; @%p21 bra $L__BB0_9; $L__BB0_12: add.s32 %r256, %r256, %r117; setp.lt.s32 %p22, %r256, %r110; @%p22 bra $L__BB0_7; $L__BB0_57: @%p105 bra $L__BB0_59; ld.global.u16 %rs42, [%rd4]; // begin inline asm { cvt.f32.f16 %f71, %rs42;} // end inline asm add.ftz.f32 %f85, %f85, %f71; $L__BB0_59: mul.ftz.f32 %f73, %f85, %f40; setp.gtu.ftz.f32 %p106, %f85, %f39; selp.f32 %f72, %f85, %f73, %p106; // begin inline asm { cvt.rn.f16.f32 %rs43, %f72;} // end inline asm mad.lo.s32 %r238, %r24, %r255, %r1; add.s32 %r239, %r238, %r25; add.s32 %r240, %r239, %r26; mul.wide.s32 %rd48, %r240, 2; add.s64 %rd49, %rd5, %rd48; st.global.u16 [%rd49], %rs43; add.s32 %r255, %r255, 1; setp.lt.s32 %p107, %r255, %r112; @%p107 bra $L__BB0_3; $L__BB0_60: ret; }