setp.le.u64 %p1, %rd75, %rd1; @%p1 bra $L__BB0_43; ld.param.u32 %r1, [%rd2+148]; setp.eq.s32 %p2, %r1, 1; @%p2 bra $L__BB0_26; bra.uni $L__BB0_2; $L__BB0_26: and.b64 %rd90, %rd3, -4294967296; setp.eq.s64 %p12, %rd90, 0; @%p12 bra $L__BB0_28; div.u64 %rd153, %rd1, %rd3; mul.lo.s64 %rd91, %rd153, %rd3; sub.s64 %rd159, %rd1, %rd91; bra.uni $L__BB0_29; $L__BB0_2: setp.eq.s32 %p3, %r1, 3; @%p3 bra $L__BB0_16; bra.uni $L__BB0_3; $L__BB0_16: and.b64 %rd84, %rd4, -4294967296; setp.eq.s64 %p9, %rd84, 0; @%p9 bra $L__BB0_18; div.u64 %rd147, %rd1, %rd4; mul.lo.s64 %rd85, %rd147, %rd4; sub.s64 %rd160, %rd1, %rd85; bra.uni $L__BB0_19; $L__BB0_3: and.b64 %rd76, %rd6, -4294967296; setp.eq.s64 %p4, %rd76, 0; @%p4 bra $L__BB0_5; div.u64 %rd143, %rd1, %rd6; mul.lo.s64 %rd77, %rd143, %rd6; sub.s64 %rd144, %rd1, %rd77; bra.uni $L__BB0_6; $L__BB0_28: cvt.u32.u64 %r49, %rd3; cvt.u32.u64 %r50, %rd1; div.u32 %r51, %r50, %r49; mul.lo.s32 %r52, %r51, %r49; sub.s32 %r53, %r50, %r52; cvt.u64.u32 %rd153, %r51; cvt.u64.u32 %rd159, %r53; $L__BB0_29: and.b64 %rd92, %rd6, -4294967296; setp.eq.s64 %p13, %rd92, 0; @%p13 bra $L__BB0_31; div.u64 %rd155, %rd153, %rd6; mul.lo.s64 %rd93, %rd155, %rd6; sub.s64 %rd156, %rd153, %rd93; bra.uni $L__BB0_32; $L__BB0_31: cvt.u32.u64 %r54, %rd6; cvt.u32.u64 %r55, %rd153; div.u32 %r56, %r55, %r54; mul.lo.s32 %r57, %r56, %r54; sub.s32 %r58, %r55, %r57; cvt.u64.u32 %rd155, %r56; cvt.u64.u32 %rd156, %r58; $L__BB0_32: and.b64 %rd94, %rd5, -4294967296; setp.eq.s64 %p14, %rd94, 0; @%p14 bra $L__BB0_34; div.u64 %rd160, %rd155, %rd5; mul.lo.s64 %rd95, %rd160, %rd5; sub.s64 %rd158, %rd155, %rd95; bra.uni $L__BB0_35; $L__BB0_34: cvt.u32.u64 %r59, %rd5; cvt.u32.u64 %r60, %rd155; div.u32 %r61, %r60, %r59; mul.lo.s32 %r62, %r61, %r59; sub.s32 %r63, %r60, %r62; cvt.u64.u32 %rd160, %r61; cvt.u64.u32 %rd158, %r63; $L__BB0_35: cvt.u32.u64 %r65, %rd158; cvt.u32.u64 %r64, %rd156; bra.uni $L__BB0_36; $L__BB0_18: cvt.u32.u64 %r34, %rd4; cvt.u32.u64 %r35, %rd1; div.u32 %r36, %r35, %r34; mul.lo.s32 %r37, %r36, %r34; sub.s32 %r38, %r35, %r37; cvt.u64.u32 %rd147, %r36; cvt.u64.u32 %rd160, %r38; $L__BB0_19: and.b64 %rd86, %rd6, -4294967296; setp.eq.s64 %p10, %rd86, 0; @%p10 bra $L__BB0_21; div.u64 %rd149, %rd147, %rd6; mul.lo.s64 %rd87, %rd149, %rd6; sub.s64 %rd150, %rd147, %rd87; bra.uni $L__BB0_22; $L__BB0_5: cvt.u32.u64 %r14, %rd6; cvt.u32.u64 %r15, %rd1; div.u32 %r16, %r15, %r14; mul.lo.s32 %r17, %r16, %r14; sub.s32 %r18, %r15, %r17; cvt.u64.u32 %rd143, %r16; cvt.u64.u32 %rd144, %r18; $L__BB0_6: and.b64 %rd78, %rd5, -4294967296; setp.eq.s64 %p5, %rd78, 0; @%p5 bra $L__BB0_8; div.u64 %rd17, %rd143, %rd5; mul.lo.s64 %rd79, %rd17, %rd5; sub.s64 %rd146, %rd143, %rd79; bra.uni $L__BB0_9; $L__BB0_21: cvt.u32.u64 %r39, %rd6; cvt.u32.u64 %r40, %rd147; div.u32 %r41, %r40, %r39; mul.lo.s32 %r42, %r41, %r39; sub.s32 %r43, %r40, %r42; cvt.u64.u32 %rd149, %r41; cvt.u64.u32 %rd150, %r43; $L__BB0_22: and.b64 %rd88, %rd5, -4294967296; setp.eq.s64 %p11, %rd88, 0; @%p11 bra $L__BB0_24; div.u64 %rd159, %rd149, %rd5; mul.lo.s64 %rd89, %rd159, %rd5; sub.s64 %rd152, %rd149, %rd89; bra.uni $L__BB0_25; $L__BB0_8: cvt.u32.u64 %r19, %rd5; cvt.u32.u64 %r20, %rd143; div.u32 %r21, %r20, %r19; mul.lo.s32 %r22, %r21, %r19; sub.s32 %r23, %r20, %r22; cvt.u64.u32 %rd17, %r21; cvt.u64.u32 %rd146, %r23; $L__BB0_9: cvt.u32.u64 %r64, %rd144; cvt.u32.u64 %r65, %rd146; setp.eq.s32 %p6, %r1, 2; @%p6 bra $L__BB0_13; bra.uni $L__BB0_10; $L__BB0_13: and.b64 %rd82, %rd4, -4294967296; setp.eq.s64 %p8, %rd82, 0; @%p8 bra $L__BB0_15; div.u64 %rd159, %rd17, %rd4; mul.lo.s64 %rd83, %rd159, %rd4; sub.s64 %rd160, %rd17, %rd83; bra.uni $L__BB0_36; $L__BB0_10: and.b64 %rd80, %rd3, -4294967296; setp.eq.s64 %p7, %rd80, 0; @%p7 bra $L__BB0_12; div.u64 %rd160, %rd17, %rd3; mul.lo.s64 %rd81, %rd160, %rd3; sub.s64 %rd159, %rd17, %rd81; bra.uni $L__BB0_36; $L__BB0_24: cvt.u32.u64 %r44, %rd5; cvt.u32.u64 %r45, %rd149; div.u32 %r46, %r45, %r44; mul.lo.s32 %r47, %r46, %r44; sub.s32 %r48, %r45, %r47; cvt.u64.u32 %rd159, %r46; cvt.u64.u32 %rd152, %r48; $L__BB0_25: cvt.u32.u64 %r65, %rd152; cvt.u32.u64 %r64, %rd150; $L__BB0_36: ld.param.u64 %rd96, [%rd2+8]; cvta.to.global.u64 %rd65, %rd96; and.b64 %rd66, %rd160, 4294967295; ld.param.u64 %rd97, [%rd2+16]; setp.le.u64 %p15, %rd97, %rd66; and.b64 %rd67, %rd159, 4294967295; ld.param.u64 %rd98, [%rd2+24]; setp.le.u64 %p16, %rd98, %rd67; or.pred %p17, %p15, %p16; cvt.u64.u32 %rd68, %r65; cvt.u64.u32 %rd69, %r64; mov.f32 %f7, 0f00000000; @%p17 bra $L__BB0_40; ld.param.u64 %rd99, [%rd2+48]; mul.lo.s64 %rd100, %rd99, %rd66; ld.param.u64 %rd101, [%rd2+56]; mul.lo.s64 %rd102, %rd101, %rd67; ld.param.u64 %rd103, [%rd2+64]; mul.lo.s64 %rd104, %rd103, %rd68; ld.param.u64 %rd105, [%rd2+72]; mul.lo.s64 %rd106, %rd105, %rd69; add.s64 %rd107, %rd100, %rd106; add.s64 %rd108, %rd107, %rd102; add.s64 %rd109, %rd108, %rd104; ld.param.u8 %rs1, [%rd2+152]; setp.eq.s16 %p18, %rs1, 0; and.b64 %rd70, %rd109, 4294967295; @%p18 bra $L__BB0_39; shl.b64 %rd110, %rd70, 1; add.s64 %rd111, %rd65, %rd110; ld.global.u16 %rs2, [%rd111]; // begin inline asm { cvt.f32.f16 %f7, %rs2;} // end inline asm bra.uni $L__BB0_40; $L__BB0_39: shl.b64 %rd112, %rd70, 2; add.s64 %rd113, %rd65, %rd112; ld.global.f32 %f7, [%rd113]; $L__BB0_40: ld.param.u8 %rs3, [%rd2+153]; setp.eq.s16 %p19, %rs3, 0; ld.param.u64 %rd114, [%rd2]; cvta.to.global.u64 %rd71, %rd114; @%p19 bra $L__BB0_42; ld.param.u64 %rd115, [%rd2+112]; mul.lo.s64 %rd116, %rd115, %rd66; ld.param.u64 %rd117, [%rd2+120]; mul.lo.s64 %rd118, %rd117, %rd67; add.s64 %rd119, %rd118, %rd116; ld.param.u64 %rd120, [%rd2+128]; mul.lo.s64 %rd121, %rd120, %rd68; add.s64 %rd122, %rd119, %rd121; ld.param.u64 %rd123, [%rd2+136]; mul.lo.s64 %rd124, %rd123, %rd69; add.s64 %rd125, %rd122, %rd124; shl.b64 %rd126, %rd125, 32; // begin inline asm { cvt.rn.f16.f32 %rs4, %f7;} // end inline asm shr.s64 %rd127, %rd126, 31; add.s64 %rd128, %rd71, %rd127; st.global.u16 [%rd128], %rs4; bra.uni $L__BB0_43; $L__BB0_42: ld.param.u64 %rd129, [%rd2+112]; mul.lo.s64 %rd130, %rd129, %rd66; ld.param.u64 %rd131, [%rd2+120]; mul.lo.s64 %rd132, %rd131, %rd67; add.s64 %rd133, %rd132, %rd130; ld.param.u64 %rd134, [%rd2+128]; mul.lo.s64 %rd135, %rd134, %rd68; add.s64 %rd136, %rd133, %rd135; ld.param.u64 %rd137, [%rd2+136]; mul.lo.s64 %rd138, %rd137, %rd69; add.s64 %rd139, %rd136, %rd138; shl.b64 %rd140, %rd139, 32; shr.s64 %rd141, %rd140, 30; add.s64 %rd142, %rd71, %rd141; st.global.f32 [%rd142], %f7; $L__BB0_43: ret; $L__BB0_15: cvt.u32.u64 %r29, %rd4; cvt.u32.u64 %r30, %rd17; div.u32 %r31, %r30, %r29; mul.lo.s32 %r32, %r31, %r29; sub.s32 %r33, %r30, %r32; cvt.u64.u32 %rd159, %r31; cvt.u64.u32 %rd160, %r33; bra.uni $L__BB0_36; $L__BB0_12: cvt.u32.u64 %r24, %rd3; cvt.u32.u64 %r25, %rd17; div.u32 %r26, %r25, %r24; mul.lo.s32 %r27, %r26, %r24; sub.s32 %r28, %r25, %r27; cvt.u64.u32 %rd160, %r26; cvt.u64.u32 %rd159, %r28; bra.uni $L__BB0_36; }