// Cuda compilation tools, release 11.8, V11.8.85 // Based on NVVM 7.0.1 // .version 7.8 .target sm_80 .address_size 64 // .globl _Z27dequant_gemv_group32_batch223DequantGemvKernelParams // _ZZ9gemv_int4ILi4ELi32ELi2EEvP6__halfPKS0_S3_PKjPKhS3_iiiiffbE12temp_storage has been demoted .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust6system6detail10sequential3seqE[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders2_1E[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders2_2E[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders2_3E[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders2_4E[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders2_5E[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders2_6E[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders2_7E[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders2_8E[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders2_9E[1]; .global .align 1 .b8 _ZN37_INTERNAL_36609967_7_gemv_cu_e5afa8636thrust12placeholders3_10E[1]; .visible .entry _Z27dequant_gemv_group32_batch223DequantGemvKernelParams( .param .align 8 .b8 _Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0[80] ) { .reg .pred %p<15>; .reg .b16 %rs<141>; .reg .f32 %f<196>; .reg .b32 %r<110>; .reg .b64 %rd<43>; // demoted variable .shared .align 16 .b8 _ZZ9gemv_int4ILi4ELi32ELi2EEvP6__halfPKS0_S3_PKjPKhS3_iiiiffbE12temp_storage[1280]; ld.param.v2.u32 {%r25, %r26}, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0+48]; ld.param.v2.u32 {%r27, %r28}, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0+56]; ld.param.v2.f32 {%f29, %f30}, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0+64]; ld.param.v4.u8 {%rs44, %rs45, %rs46, %rs47}, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0+72]; ld.param.u64 %rd14, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0+40]; ld.param.u64 %rd13, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0+32]; ld.param.u64 %rd12, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0+24]; ld.param.u64 %rd11, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0+16]; ld.param.u64 %rd10, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0+8]; ld.param.u64 %rd9, [_Z27dequant_gemv_group32_batch223DequantGemvKernelParams_param_0]; cvta.to.global.u64 %rd2, %rd10; mov.u32 %r1, %ctaid.x; mov.u32 %r109, %tid.y; shl.b32 %r29, %r109, 5; mov.u32 %r3, %tid.x; add.s32 %r4, %r29, %r3; setp.ge.u32 %p1, %r4, %r27; mov.f32 %f188, 0f00000000; mov.f32 %f189, %f188; @%p1 bra $L__BB0_9; mul.lo.s32 %r7, %r27, %r1; shr.u32 %r8, %r3, 2; mul.lo.s32 %r9, %r28, %r1; shl.b16 %rs2, %rs44, 3; cvta.to.global.u64 %rd6, %rd11; mov.u32 %r108, %r4; $L__BB0_2: add.s32 %r31, %r108, %r7; mul.wide.u32 %rd21, %r31, 4; add.s64 %rd16, %rd12, %rd21; // begin inline asm createpolicy.fractional.L2::evict_first.L2::evict_first.b64 %rd15, 1.0; // end inline asm // begin inline asm ld.global.L1::no_allocate.L2::cache_hint.u32 %r30, [%rd16], %rd15; // end inline asm shl.b32 %r32, %r109, 3; add.s32 %r13, %r32, %r8; add.s32 %r14, %r13, %r9; mul.wide.s32 %rd22, %r14, 2; add.s64 %rd19, %rd14, %rd22; // begin inline asm createpolicy.fractional.L2::evict_first.L2::evict_first.b64 %rd18, 1.0; // end inline asm // begin inline asm ld.global.L1::evict_last.L2::cache_hint.u16 %rs52, [%rd19], %rd18; // end inline asm // begin inline asm { cvt.f32.f16 %f33, %rs52;} // end inline asm setp.eq.s64 %p2, %rd13, 0; mov.u16 %rs140, %rs2; @%p2 bra $L__BB0_4; shr.u32 %r33, %r14, 31; add.s32 %r34, %r14, %r33; shr.s32 %r35, %r34, 1; cvt.s64.s32 %rd26, %r35; add.s64 %rd24, %rd13, %rd26; // begin inline asm createpolicy.fractional.L2::evict_first.L2::evict_first.b64 %rd23, 1.0; // end inline asm // begin inline asm ld.global.L1::evict_last.L2::cache_hint.u8 %rs54, [%rd24], %rd23; // end inline asm cvt.u32.u16 %r36, %rs54; and.b32 %r37, %r36, 255; shl.b32 %r38, %r13, 2; and.b32 %r39, %r38, 4; shr.u32 %r40, %r37, %r39; cvt.u16.u32 %rs55, %r40; and.b16 %rs140, %rs55, 15; $L__BB0_4: shl.b32 %r15, %r108, 3; setp.ge.s32 %p3, %r15, %r25; @%p3 bra $L__BB0_8; setp.eq.s16 %p4, %rs44, 0; mul.wide.s32 %rd27, %r15, 2; add.s64 %rd28, %rd6, %rd27; ld.global.v4.u32 {%r41, %r42, %r43, %r44}, [%rd28]; mul.wide.s32 %rd29, %r25, 2; add.s64 %rd30, %rd28, %rd29; ld.global.v4.u32 {%r49, %r50, %r51, %r52}, [%rd30]; shr.u16 %rs57, %rs140, 3; and.b16 %rs58, %rs57, 1; setp.eq.b16 %p5, %rs58, 1; and.pred %p6, %p4, %p5; selp.b16 %rs59, -16, 0, %p6; or.b16 %rs60, %rs59, %rs140; cvt.s16.s8 %rs61, %rs60; cvt.rn.f32.s16 %f4, %rs61; cvt.u16.u32 %rs5, %r30; and.b16 %rs6, %rs5, 15; mov.b32 {%rs7, %rs11}, %r41; mov.b32 {%rs8, %rs12}, %r49; shr.u32 %r57, %r30, 4; cvt.u16.u32 %rs9, %r57; and.b16 %rs10, %rs9, 15; shr.u32 %r58, %r30, 8; cvt.u16.u32 %rs13, %r58; and.b16 %rs14, %rs13, 15; mov.b32 {%rs15, %rs19}, %r42; mov.b32 {%rs16, %rs20}, %r50; shr.u32 %r59, %r30, 12; cvt.u16.u32 %rs17, %r59; and.b16 %rs18, %rs17, 15; shr.u32 %r60, %r30, 16; cvt.u16.u32 %rs21, %r60; and.b16 %rs22, %rs21, 15; mov.b32 {%rs23, %rs27}, %r43; mov.b32 {%rs24, %rs28}, %r51; shr.u32 %r61, %r30, 20; cvt.u16.u32 %rs25, %r61; and.b16 %rs26, %rs25, 15; shr.u32 %r62, %r30, 24; cvt.u16.u32 %rs29, %r62; and.b16 %rs30, %rs29, 15; mov.b32 {%rs31, %rs34}, %r44; mov.b32 {%rs32, %rs35}, %r52; shr.u32 %r63, %r30, 28; cvt.u16.u32 %rs33, %r63; @%p4 bra $L__BB0_7; cvt.rn.f32.s16 %f50, %rs6; sub.ftz.f32 %f51, %f50, %f4; mul.ftz.f32 %f52, %f33, %f51; // begin inline asm { cvt.f32.f16 %f34, %rs7;} // end inline asm fma.rn.ftz.f32 %f53, %f52, %f34, %f188; // begin inline asm { cvt.f32.f16 %f35, %rs8;} // end inline asm fma.rn.ftz.f32 %f54, %f52, %f35, %f189; cvt.rn.f32.s16 %f55, %rs10; sub.ftz.f32 %f56, %f55, %f4; mul.ftz.f32 %f57, %f33, %f56; // begin inline asm { cvt.f32.f16 %f36, %rs11;} // end inline asm fma.rn.ftz.f32 %f58, %f57, %f36, %f53; // begin inline asm { cvt.f32.f16 %f37, %rs12;} // end inline asm fma.rn.ftz.f32 %f59, %f57, %f37, %f54; cvt.rn.f32.s16 %f60, %rs14; sub.ftz.f32 %f61, %f60, %f4; mul.ftz.f32 %f62, %f33, %f61; // begin inline asm { cvt.f32.f16 %f38, %rs15;} // end inline asm fma.rn.ftz.f32 %f63, %f62, %f38, %f58; // begin inline asm { cvt.f32.f16 %f39, %rs16;} // end inline asm fma.rn.ftz.f32 %f64, %f62, %f39, %f59; cvt.rn.f32.s16 %f65, %rs18; sub.ftz.f32 %f66, %f65, %f4; mul.ftz.f32 %f67, %f33, %f66; // begin inline asm { cvt.f32.f16 %f40, %rs19;} // end inline asm fma.rn.ftz.f32 %f68, %f67, %f40, %f63; // begin inline asm { cvt.f32.f16 %f41, %rs20;} // end inline asm fma.rn.ftz.f32 %f69, %f67, %f41, %f64; cvt.rn.f32.s16 %f70, %rs22; sub.ftz.f32 %f71, %f70, %f4; mul.ftz.f32 %f72, %f33, %f71; // begin inline asm { cvt.f32.f16 %f42, %rs23;} // end inline asm fma.rn.ftz.f32 %f73, %f72, %f42, %f68; // begin inline asm { cvt.f32.f16 %f43, %rs24;} // end inline asm fma.rn.ftz.f32 %f74, %f72, %f43, %f69; cvt.rn.f32.s16 %f75, %rs26; sub.ftz.f32 %f76, %f75, %f4; mul.ftz.f32 %f77, %f33, %f76; // begin inline asm { cvt.f32.f16 %f44, %rs27;} // end inline asm fma.rn.ftz.f32 %f78, %f77, %f44, %f73; // begin inline asm { cvt.f32.f16 %f45, %rs28;} // end inline asm fma.rn.ftz.f32 %f79, %f77, %f45, %f74; cvt.rn.f32.s16 %f80, %rs30; sub.ftz.f32 %f81, %f80, %f4; mul.ftz.f32 %f82, %f33, %f81; // begin inline asm { cvt.f32.f16 %f46, %rs31;} // end inline asm fma.rn.ftz.f32 %f83, %f82, %f46, %f78; // begin inline asm { cvt.f32.f16 %f47, %rs32;} // end inline asm fma.rn.ftz.f32 %f84, %f82, %f47, %f79; cvt.rn.f32.s16 %f85, %rs33; sub.ftz.f32 %f86, %f85, %f4; mul.ftz.f32 %f87, %f33, %f86; // begin inline asm { cvt.f32.f16 %f48, %rs34;} // end inline asm fma.rn.ftz.f32 %f188, %f87, %f48, %f83; // begin inline asm { cvt.f32.f16 %f49, %rs35;} // end inline asm fma.rn.ftz.f32 %f189, %f87, %f49, %f84; bra.uni $L__BB0_8; $L__BB0_7: shl.b16 %rs94, %rs5, 4; cvt.s16.s8 %rs95, %rs94; shr.s16 %rs96, %rs95, 7; and.b16 %rs97, %rs96, -16; or.b16 %rs98, %rs97, %rs6; cvt.rn.f32.s16 %f104, %rs98; sub.ftz.f32 %f105, %f104, %f4; mul.ftz.f32 %f106, %f33, %f105; // begin inline asm { cvt.f32.f16 %f88, %rs7;} // end inline asm fma.rn.ftz.f32 %f107, %f106, %f88, %f188; // begin inline asm { cvt.f32.f16 %f89, %rs8;} // end inline asm fma.rn.ftz.f32 %f108, %f106, %f89, %f189; shl.b16 %rs99, %rs9, 4; cvt.s16.s8 %rs100, %rs99; shr.s16 %rs101, %rs100, 7; and.b16 %rs102, %rs101, -16; or.b16 %rs103, %rs102, %rs10; cvt.rn.f32.s16 %f109, %rs103; sub.ftz.f32 %f110, %f109, %f4; mul.ftz.f32 %f111, %f33, %f110; // begin inline asm { cvt.f32.f16 %f90, %rs11;} // end inline asm fma.rn.ftz.f32 %f112, %f111, %f90, %f107; // begin inline asm { cvt.f32.f16 %f91, %rs12;} // end inline asm fma.rn.ftz.f32 %f113, %f111, %f91, %f108; shl.b16 %rs104, %rs13, 4; cvt.s16.s8 %rs105, %rs104; shr.s16 %rs106, %rs105, 7; and.b16 %rs107, %rs106, -16; or.b16 %rs108, %rs107, %rs14; cvt.rn.f32.s16 %f114, %rs108; sub.ftz.f32 %f115, %f114, %f4; mul.ftz.f32 %f116, %f33, %f115; // begin inline asm { cvt.f32.f16 %f92, %rs15;} // end inline asm fma.rn.ftz.f32 %f117, %f116, %f92, %f112; // begin inline asm { cvt.f32.f16 %f93, %rs16;} // end inline asm fma.rn.ftz.f32 %f118, %f116, %f93, %f113; shl.b16 %rs109, %rs17, 4; cvt.s16.s8 %rs110, %rs109; shr.s16 %rs111, %rs110, 7; and.b16 %rs112, %rs111, -16; or.b16 %rs113, %rs112, %rs18; cvt.rn.f32.s16 %f119, %rs113; sub.ftz.f32 %f120, %f119, %f4; mul.ftz.f32 %f121, %f33, %f120; // begin inline asm { cvt.f32.f16 %f94, %rs19;} // end inline asm fma.rn.ftz.f32 %f122, %f121, %f94, %f117; // begin inline asm { cvt.f32.f16 %f95, %rs20;} // end inline asm fma.rn.ftz.f32 %f123, %f121, %f95, %f118; shl.b16 %rs114, %rs21, 4; cvt.s16.s8 %rs115, %rs114; shr.s16 %rs116, %rs115, 7; and.b16 %rs117, %rs116, -16; or.b16 %rs118, %rs117, %rs22; cvt.rn.f32.s16 %f124, %rs118; sub.ftz.f32 %f125, %f124, %f4; mul.ftz.f32 %f126, %f33, %f125; // begin inline asm { cvt.f32.f16 %f96, %rs23;} // end inline asm fma.rn.ftz.f32 %f127, %f126, %f96, %f122; // begin inline asm { cvt.f32.f16 %f97, %rs24;} // end inline asm fma.rn.ftz.f32 %f128, %f126, %f97, %f123; shl.b16 %rs119, %rs25, 4; cvt.s16.s8 %rs120, %rs119; shr.s16 %rs121, %rs120, 7; and.b16 %rs122, %rs121, -16; or.b16 %rs123, %rs122, %rs26; cvt.rn.f32.s16 %f129, %rs123; sub.ftz.f32 %f130, %f129, %f4; mul.ftz.f32 %f131, %f33, %f130; // begin inline asm { cvt.f32.f16 %f98, %rs27;} // end inline asm fma.rn.ftz.f32 %f132, %f131, %f98, %f127; // begin inline asm { cvt.f32.f16 %f99, %rs28;} // end inline asm fma.rn.ftz.f32 %f133, %f131, %f99, %f128; shl.b16 %rs124, %rs29, 4; cvt.s16.s8 %rs125, %rs124; shr.s16 %rs126, %rs125, 7; and.b16 %rs127, %rs126, -16; or.b16 %rs128, %rs127, %rs30; cvt.rn.f32.s16 %f134, %rs128; sub.ftz.f32 %f135, %f134, %f4; mul.ftz.f32 %f136, %f33, %f135; // begin inline asm { cvt.f32.f16 %f100, %rs31;} // end inline asm fma.rn.ftz.f32 %f137, %f136, %f100, %f132; // begin inline asm { cvt.f32.f16 %f101, %rs32;} // end inline asm fma.rn.ftz.f32 %f138, %f136, %f101, %f133; shl.b16 %rs129, %rs33, 4; cvt.s16.s8 %rs130, %rs129; shr.s16 %rs131, %rs130, 7; and.b16 %rs132, %rs131, -16; or.b16 %rs133, %rs132, %rs33; cvt.rn.f32.s16 %f139, %rs133; sub.ftz.f32 %f140, %f139, %f4; mul.ftz.f32 %f141, %f33, %f140; // begin inline asm { cvt.f32.f16 %f102, %rs34;} // end inline asm fma.rn.ftz.f32 %f188, %f141, %f102, %f137; // begin inline asm { cvt.f32.f16 %f103, %rs35;} // end inline asm fma.rn.ftz.f32 %f189, %f141, %f103, %f138; $L__BB0_8: add.s32 %r109, %r109, 4; shl.b32 %r64, %r109, 5; add.s32 %r108, %r64, %r3; setp.lt.u32 %p7, %r108, %r27; @%p7 bra $L__BB0_2; $L__BB0_9: shl.b32 %r65, %r4, 2; mov.u32 %r66, _ZZ9gemv_int4ILi4ELi32ELi2EEvP6__halfPKS0_S3_PKjPKhS3_iiiiffbE12temp_storage; add.s32 %r67, %r66, %r65; setp.lt.u32 %p8, %r4, 32; @%p8 bra $L__BB0_11; add.s32 %r103, %r67, -112; st.shared.f32 [%r103], %f188; $L__BB0_11: setp.gt.u32 %p9, %r4, 31; bar.sync 0; mad.lo.s32 %r19, %r4, 12, %r66; @%p9 bra $L__BB0_13; mov.u32 %r82, 16; ld.shared.f32 %f157, [%r19+16]; add.ftz.f32 %f158, %f188, %f157; ld.shared.f32 %f159, [%r19+20]; add.ftz.f32 %f160, %f158, %f159; ld.shared.f32 %f161, [%r19+24]; add.ftz.f32 %f144, %f160, %f161; mov.u32 %r70, 1; mov.u32 %r83, 31; mov.u32 %r84, -1; // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f144, %r70, %r83, %r84; @p add.f32 r0, r0, %f144; mov.f32 %f142, r0;} // end inline asm mov.u32 %r73, 2; // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f142, %r73, %r83, %r84; @p add.f32 r0, r0, %f142; mov.f32 %f145, r0;} // end inline asm mov.u32 %r76, 4; // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f145, %r76, %r83, %r84; @p add.f32 r0, r0, %f145; mov.f32 %f148, r0;} // end inline asm mov.u32 %r79, 8; // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f148, %r79, %r83, %r84; @p add.f32 r0, r0, %f148; mov.f32 %f151, r0;} // end inline asm // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f151, %r82, %r83, %r84; @p add.f32 r0, r0, %f151; mov.f32 %f188, r0;} // end inline asm $L__BB0_13: @%p8 bra $L__BB0_15; add.s32 %r104, %r67, -112; st.shared.f32 [%r104+640], %f189; $L__BB0_15: bar.sync 0; @%p9 bra $L__BB0_17; ld.shared.f32 %f177, [%r19+656]; add.ftz.f32 %f178, %f189, %f177; ld.shared.f32 %f179, [%r19+660]; add.ftz.f32 %f180, %f178, %f179; ld.shared.f32 %f181, [%r19+664]; add.ftz.f32 %f164, %f180, %f181; mov.u32 %r86, 1; mov.u32 %r99, 31; mov.u32 %r100, -1; // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f164, %r86, %r99, %r100; @p add.f32 r0, r0, %f164; mov.f32 %f162, r0;} // end inline asm mov.u32 %r89, 2; // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f162, %r89, %r99, %r100; @p add.f32 r0, r0, %f162; mov.f32 %f165, r0;} // end inline asm mov.u32 %r92, 4; // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f165, %r92, %r99, %r100; @p add.f32 r0, r0, %f165; mov.f32 %f168, r0;} // end inline asm mov.u32 %r95, 8; // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f168, %r95, %r99, %r100; @p add.f32 r0, r0, %f168; mov.f32 %f171, r0;} // end inline asm mov.u32 %r98, 16; // begin inline asm { .reg .f32 r0; .reg .pred p; shfl.sync.down.b32 r0|p, %f171, %r98, %r99, %r100; @p add.f32 r0, r0, %f171; mov.f32 %f189, r0;} // end inline asm $L__BB0_17: mov.u32 %r105, %tid.y; or.b32 %r101, %r3, %r105; setp.ne.s32 %p12, %r101, 0; @%p12 bra $L__BB0_23; mov.u32 %r106, %ctaid.x; setp.eq.s64 %p13, %rd10, 0; mul.ftz.f32 %f194, %f29, %f188; cvt.s64.s32 %rd7, %r106; @%p13 bra $L__BB0_20; shl.b64 %rd31, %rd7, 1; add.s64 %rd32, %rd2, %rd31; ld.global.u16 %rs134, [%rd32]; // begin inline asm { cvt.f32.f16 %f182, %rs134;} // end inline asm fma.rn.ftz.f32 %f194, %f30, %f182, %f194; $L__BB0_20: // begin inline asm { cvt.rn.f16.f32 %rs135, %f194;} // end inline asm // begin inline asm createpolicy.fractional.L2::evict_last.L2::evict_unchanged.b64 %rd33, 1.0; // end inline asm shl.b64 %rd36, %rd7, 1; add.s64 %rd34, %rd9, %rd36; // begin inline asm st.global.L1::evict_normal.L2::cache_hint.u16 [%rd34], %rs135, %rd33; // end inline asm mul.ftz.f32 %f195, %f29, %f189; @%p13 bra $L__BB0_22; mov.u32 %r107, %ctaid.x; add.s32 %r102, %r26, %r107; mul.wide.s32 %rd37, %r102, 2; add.s64 %rd38, %rd2, %rd37; ld.global.u16 %rs137, [%rd38]; // begin inline asm { cvt.f32.f16 %f184, %rs137;} // end inline asm fma.rn.ftz.f32 %f195, %f30, %f184, %f195; $L__BB0_22: mul.wide.s32 %rd42, %r26, 2; add.s64 %rd40, %rd34, %rd42; // begin inline asm { cvt.rn.f16.f32 %rs138, %f195;} // end inline asm // begin inline asm createpolicy.fractional.L2::evict_last.L2::evict_unchanged.b64 %rd39, 1.0; // end inline asm // begin inline asm st.global.L1::evict_normal.L2::cache_hint.u16 [%rd40], %rs138, %rd39; // end inline asm $L__BB0_23: ret; } // .globl _ZN3cub11EmptyKernelIvEEvv .visible .entry _ZN3cub11EmptyKernelIvEEvv() { ret; }