copy_kv_cu_copy_kv6thrust12placeholders2_4E[1]; .global .align 1 .b8 _ZN48_INTERNAL_9e5093bb_18_genmmha_copy_kv_cu_copy_kv6thrust12placeholders2_5E[1]; .global .align 1 .b8 _ZN48_INTERNAL_9e5093bb_18_genmmha_copy_kv_cu_copy_kv6thrust12placeholders2_6E[1]; .global .align 1 .b8 _ZN48_INTERNAL_9e5093bb_18_genmmha_copy_kv_cu_copy_kv6thrust12placeholders2_7E[1]; .global .align 1 .b8 _ZN48_INTERNAL_9e5093bb_18_genmmha_copy_kv_cu_copy_kv6thrust12placeholders2_8E[1]; .global .align 1 .b8 _ZN48_INTERNAL_9e5093bb_18_genmmha_copy_kv_cu_copy_kv6thrust12placeholders2_9E[1]; .global .align 1 .b8 _ZN48_INTERNAL_9e5093bb_18_genmmha_copy_kv_cu_copy_kv6thrust12placeholders3_10E[1]; .visible .entry copy_kv( .param .align 8 .b8 copy_kv_param_0[144] ) { .reg .pred %p<8>; .reg .b32 %r<95>; .reg .b64 %rd<50>; mov.b64 %rd11, copy_kv_param_0; mov.u64 %rd1, %rd11; ld.param.u64 %rd2, [copy_kv_param_0+64]; ld.param.u64 %rd3, [copy_kv_param_0+8]; ld.param.u64 %rd4, [copy_kv_param_0+72]; ld.param.u64 %rd5, [copy_kv_param_0+16]; ld.param.v2.u32 {%r25, %r26}, [copy_kv_param_0+120]; ld.param.u32 %r4, [copy_kv_param_0+112]; ld.param.u32 %r5, [copy_kv_param_0+136]; mov.u32 %r6, %ctaid.z; ld.param.u64 %rd12, [copy_kv_param_0+80]; setp.eq.s64 %p1, %rd12, 0; mov.u32 %r90, %r4; @%p1 bra $L__BB0_2; ld.param.u64 %rd13, [%rd1+80]; cvta.to.global.u64 %rd14, %rd13; mul.wide.s32 %rd15, %r6, 4; add.s64 %rd16, %rd14, %rd15; ld.global.u32 %r27, [%rd16]; add.s32 %r90, %r27, 1; $L__BB0_2: min.s32 %r28, %r90, %r4; mov.u32 %r29, %ctaid.y; mad.lo.s32 %r30, %r25, %r6, %r29; mul.lo.s32 %r9, %r30, %r5; mul.lo.s32 %r31, %r4, %r26; mul.lo.s32 %r10, %r31, %r30; mov.u32 %r11, WARP_SZ; mov.u32 %r12, %tid.x; div.u32 %r32, %r12, %r11; mov.u32 %r33, %ntid.x; div.u32 %r34, %r33, %r11; mul.lo.s32 %r35, %r32, %r11; sub.s32 %r93, %r12, %r35; mov.u32 %r36, %ctaid.x; mad.lo.s32 %r14, %r34, %r36, %r32; add.s32 %r37, %r28, -1; setp.ge.s32 %p2, %r14, %r37; @%p2 bra $L__BB0_10; cvt.s64.s32 %rd17, %r9; mul.lo.s32 %r38, %r14, %r26; cvt.s64.s32 %rd18, %r38; add.s64 %rd19, %rd18, %rd17; cvta.to.global.u64 %rd20, %rd2; shl.b64 %rd21, %rd19, 1; add.s64 %rd6, %rd20, %rd21; cvt.s64.s32 %rd22, %r10; add.s64 %rd23, %rd18, %rd22; cvta.to.global.u64 %rd24, %rd3; shl.b64 %rd25, %rd23, 1; add.s64 %rd7, %rd24, %rd25; cvta.to.global.u64 %rd26, %rd4; add.s64 %rd8, %rd26, %rd21; cvta.to.global.u64 %rd27, %rd5; add.s64 %rd9, %rd27, %rd25; shr.s32 %r39, %r26, 31; shr.u32 %r40, %r39, 30; add.s32 %r41, %r26, %r40; shr.s32 %r15, %r41, 2; setp.ge.s32 %p3, %r93, %r15; @%p3 bra $L__BB0_10; sub.s32 %r42, %r12, %r93; add.s32 %r43, %r15, %r42; not.b32 %r44, %r12; add.s32 %r45, %r43, %r44; div.u32 %r16, %r45, %r11; add.s32 %r46, %r16, 1; and.b32 %r92, %r46, 3; setp.eq.s32 %p4, %r92, 0; @%p4 bra $L__BB0_7; $L__BB0_6: .pragma "nounroll"; mul.wide.s32 %rd28, %r93, 8; add.s64 %rd29, %rd6, %rd28; add.s64 %rd30, %rd7, %rd28; ld.global.v2.u32 {%r47, %r48}, [%rd29]; st.global.v2.u32 [%rd30], {%r47, %r48}; add.s64 %rd31, %rd8, %rd28; add.s64 %rd32, %rd9, %rd28; ld.global.v2.u32 {%r51, %r52}, [%rd31]; st.global.v2.u32 [%rd32], {%r51, %r52}; add.s32 %r93, %r93, %r11; add.s32 %r92, %r92, -1; setp.ne.s32 %p5, %r92, 0; @%p5 bra $L__BB0_6; $L__BB0_7: setp.lt.u32 %p6, %r16, 3; @%p6 bra $L__BB0_10; mul.wide.s32 %rd10, %r11, 8; $L__BB0_9: mul.wide.s32 %rd33, %r93, 8; add.s64 %rd34, %rd6, %rd33; add.s64 %rd35, %rd7, %rd33; ld.global.v2.u32 {%r55, %r56}, [%rd34]; st.global.v2.u32 [%rd35], {%r55, %r56}; add.s64 %rd36, %rd8, %rd33; add.s64 %rd37, %rd9, %rd33; ld.global.v2.u32 {%r59, %r60}, [%rd36]; st.global.v2.u32 [%rd37], {%r59, %r60}; add.s64 %rd38, %rd34, %rd10; ld.global.v2.u32 {%r63, %r64}, [%rd38]; add.s64 %rd39, %rd35, %rd10; st.global.v2.u32 [%rd39], {%r63, %r64}; add.s64 %rd40, %rd36, %rd10; ld.global.v2.u32 {%r67, %r68}, [%rd40]; add.s64 %rd41, %rd37, %rd10; st.global.v2.u32 [%rd41], {%r67, %r68}; add.s32 %r71, %r93, %r11; add.s32 %r72, %r71, %r11; add.s64 %rd42, %rd38, %rd10; ld.global.v2.u32 {%r73, %r74}, [%rd42]; add.s64 %rd43, %rd39, %rd10; st.global.v2.u32 [%rd43], {%r73, %r74}; add.s64 %rd44, %rd40, %rd10; ld.global.v2.u32 {%r77, %r78}, [%rd44]; add.s64 %rd45, %rd41, %rd10; st.global.v2.u32 [%rd45], {%r77, %r78}; add.s32 %r81, %r72, %r11; add.s64 %rd46, %rd42, %rd10; ld.global.v2.u32 {%r82, %r83}, [%rd46]; add.s64 %rd47, %rd43, %rd10; st.global.v2.u32 [%rd47], {%r82, %r83}; add.s64 %rd48, %rd44, %rd10; ld.global.v2.u32 {%r86, %r87}, [%rd48]; add.s64 %rd49, %rd45, %rd10; st.global.v2.u32 [%rd49], {%r86, %r87}; add.s32 %r93, %r81, %r11; setp.lt.s32 %p7, %r93, %r15; @%p7 bra $L__BB0_9; $L__BB0_10: ret; } // .globl _ZN3cub11EmptyKernelIvEEvv .visible .entry _ZN3cub11EmptyKernelIvEEvv() { ret; }