tionKernel_param_0[112] ) { .reg .pred %p<449>; .reg .b16 %rs<42>; .reg .f32 %f<2701>; .reg .b32 %r<419>; .reg .b64 %rd<148>; ld.param.v2.f32 {%f381, %f382}, [ActivationKernel_param_0+16]; ld.param.v4.u8 {%rs23, %rs24, %rs25, %rs26}, [ActivationKernel_param_0+104]; ld.param.u64 %rd78, [ActivationKernel_param_0+96]; ld.param.u32 %r32, [ActivationKernel_param_0+92]; ld.param.u8 %rs1, [ActivationKernel_param_0+88]; ld.param.u64 %rd77, [ActivationKernel_param_0+80]; ld.param.u64 %rd76, [ActivationKernel_param_0+72]; ld.param.u64 %rd75, [ActivationKernel_param_0+64]; ld.param.u64 %rd74, [ActivationKernel_param_0+56]; ld.param.u64 %rd73, [ActivationKernel_param_0+48]; ld.param.u64 %rd72, [ActivationKernel_param_0+40]; ld.param.u64 %rd71, [ActivationKernel_param_0+32]; ld.param.u64 %rd70, [ActivationKernel_param_0+24]; ld.param.u64 %rd69, [ActivationKernel_param_0+8]; ld.param.u64 %rd68, [ActivationKernel_param_0]; cvta.to.global.u64 %rd1, %rd68; setp.eq.s16 %p1, %rs23, 0; mov.u32 %r33, %ctaid.x; shl.b32 %r34, %r33, 8; mov.u32 %r35, %tid.x; add.s32 %r1, %r34, %r35; @%p1 bra $L__BB0_442; cvt.u64.u32 %rd4, %r1; shr.u64 %rd5, %rd78, 2; setp.lt.u64 %p2, %rd5, %rd4; @%p2 bra $L__BB0_586; setp.gt.u64 %p3, %rd5, %rd4; @%p3 bra $L__BB0_95; bra.uni $L__BB0_3; $L__BB0_95: setp.eq.s16 %p75, %rs1, 0; @%p75 bra $L__BB0_269; shl.b64 %rd85, %rd4, 3; add.s64 %rd14, %rd1, %rd85; ld.global.v2.f32 {%f754, %f755}, [%rd14]; mov.b32 %r91, %f754; mov.b32 {%rs31, %rs3}, %r91; // begin inline asm { cvt.f32.f16 %f2673, %rs31;} // end inline asm setp.gt.s64 %p76, %rd69, 9; @%p76 bra $L__BB0_106; setp.gt.s64 %p86, %rd69, 4; @%p86 bra $L__BB0_101; bra.uni $L__BB0_98; $L__BB0_101: setp.gt.s64 %p87, %rd69, 7; @%p87 bra $L__BB0_104; setp.eq.s64 %p90, %rd69, 5; @%p90 bra $L__BB0_134; setp.eq.s64 %p91, %rd69, 7; @%p91 bra $L__BB0_135; bra.uni $L__BB0_139; $L__BB0_442: shl.b32 %r301, %r1, 2; cvt.u64.u32 %rd20, %r301; mul.lo.s64 %rd87, %rd70, %rd71; mul.lo.s64 %rd88, %rd87, %rd72; mul.lo.s64 %rd89, %rd88, %rd73; setp.le.u64 %p348, %rd89, %rd20; @%p348 bra $L__BB0_586; setp.eq.s32 %p349, %r32, 1; @%p349 bra $L__BB0_454; setp.ne.s32 %p350, %r32, 0; @%p350 bra $L__BB0_463; and.b64 %rd90, %rd73, -4294967296; setp.eq.s64 %p351, %rd90, 0; @%p351 bra $L__BB0_447; div.u64 %rd141, %rd20, %rd73; bra.uni $L__BB0_448; $L__BB0_3: cvt.u32.u64 %r36, %rd78; and.b32 %r409, %r36, 3; setp.eq.s32 %p4, %r409, 0; @%p4 bra $L__BB0_586; mul.f32 %f3, %f382, %f381; and.b32 %r38, %r36, 3; mul.wide.u32 %rd79, %r38, 4; shl.b64 %rd80, %rd78, 2; sub.s64 %rd81, %rd80, %rd79; add.s64 %rd133, %rd1, %rd81; mul.wide.u32 %rd82, %r38, 2; shl.b64 %rd83, %rd78, 1; sub.s64 %rd84, %rd83, %rd82; add.s64 %rd132, %rd1, %rd84; $L__BB0_5: add.s32 %r409, %r409, -1; setp.eq.s16 %p5, %rs1, 0; @%p5 bra $L__BB0_50; ld.global.u16 %rs28, [%rd132]; // begin inline asm { cvt.f32.f16 %f2667, %rs28;} // end inline asm setp.gt.s64 %p6, %rd69, 9; @%p6 bra $L__BB0_16; setp.gt.s64 %p16, %rd69, 4; @%p16 bra $L__BB0_11; bra.uni $L__BB0_8; $L__BB0_11: setp.gt.s64 %p17, %rd69, 7; @%p17 bra $L__BB0_14; setp.eq.s64 %p20, %rd69, 5; @%p20 bra $L__BB0_44; setp.eq.s64 %p21, %rd69, 7; @%p21 bra $L__BB0_45; bra.uni $L__BB0_49; $L__BB0_50: ld.global.f32 %f2670, [%rd133]; setp.gt.s64 %p40, %rd69, 9; @%p40 bra $L__BB0_60; setp.gt.s64 %p50, %rd69, 4; @%p50 bra $L__BB0_55; bra.uni $L__BB0_52; $L__BB0_55: setp.gt.s64 %p51, %rd69, 7; @%p51 bra $L__BB0_58; setp.eq.s64 %p54, %rd69, 5; @%p54 bra $L__BB0_88; setp.eq.s64 %p55, %rd69, 7; @%p55 bra $L__BB0_89; bra.uni $L__BB0_93; $L__BB0_16: setp.gt.s64 %p7, %rd69, 13; @%p7 bra $L__BB0_20; bra.uni $L__BB0_17; $L__BB0_20: setp.gt.s64 %p8, %rd69, 15; @%p8 bra $L__BB0_23; setp.eq.s64 %p11, %rd69, 14; @%p11 bra $L__BB0_30; setp.eq.s64 %p12, %rd69, 15; @%p12 bra $L__BB0_29; bra.uni $L__BB0_49; $L__BB0_29: abs.f32 %f402, %f2667; add.f32 %f403, %f402, 0f3F800000; div.rn.f32 %f2667, %f2667, %f403; bra.uni $L__BB0_49; $L__BB0_60: setp.gt.s64 %p41, %rd69, 13; @%p41 bra $L__BB0_64; bra.uni $L__BB0_61; $L__BB0_64: setp.gt.s64 %p42, %rd69, 15; @%p42 bra $L__BB0_67; setp.eq.s64 %p45, %rd69, 14; @%p45 bra $L__BB0_74; setp.eq.s64 %p46, %rd69, 15; @%p46 bra $L__BB0_73; bra.uni $L__BB0_93; $L__BB0_73: abs.f32 %f587, %f2670; add.f32 %f588, %f587, 0f3F800000; div.rn.f32 %f2670, %f2670, %f588; bra.uni $L__BB0_93; $L__BB0_8: setp.eq.s64 %p22, %rd69, 0; @%p22 bra $L__BB0_47; setp.eq.s64 %p23, %rd69, 2; @%p23 bra $L__BB0_46; setp.eq.s64 %p24, %rd69, 4; @%p24 bra $L__BB0_45; bra.uni $L__BB0_49; $L__BB0_45: setp.lt.f32 %p38, %f2667, 0f00000000; mul.f32 %f547, %f381, %f2667; selp.f32 %f2667, %f547, %f2667, %p38; bra.uni $L__BB0_49; $L__BB0_17: setp.eq.s64 %p13, %rd69, 10; @%p13 bra $L__BB0_37; setp.eq.s64 %p14, %rd69, 11; @%p14 bra $L__BB0_34; setp.eq.s64 %p15, %rd69, 12; @%p15 bra $L__BB0_33; bra.uni $L__BB0_49; $L__BB0_33: neg.f32 %f450, %f2667; mov.f32 %f451, 0f3F000000; mov.f32 %f452, 0f3BBB989D; fma.rn.f32 %f453, %f450, %f452, %f451; mov.f32 %f454, 0f3FB8AA3B; mov.f32 %f455, 0f437C0000; cvt.sat.f32.f32 %f456, %f453; mov.f32 %f457, 0f4B400001; fma.rm.f32 %f458, %f456, %f455, %f457; add.f32 %f459, %f458, 0fCB40007F; neg.f32 %f460, %f459; fma.rn.f32 %f461, %f450, %f454, %f460; mov.f32 %f462, 0f32A57060; fma.rn.f32 %f463, %f450, %f462, %f461; mov.b32 %r49, %f458; shl.b32 %r50, %r49, 23; mov.b32 %f464, %r50; ex2.approx.ftz.f32 %f465, %f463; fma.rn.f32 %f466, %f465, %f464, 0f3F800000; rcp.rn.f32 %f2667, %f466; bra.uni $L__BB0_49; $L__BB0_14: setp.eq.s64 %p18, %rd69, 8; @%p18 bra $L__BB0_41; setp.eq.s64 %p19, %rd69, 9; @%p19 bra $L__BB0_40; bra.uni $L__BB0_49; $L__BB0_40: setp.lt.f32 %p34, %f2667, 0f00000000; selp.f32 %f2667, 0f00000000, %f2667, %p34; bra.uni $L__BB0_49; $L__BB0_23: setp.eq.s64 %p9, %rd69, 16; @%p9 bra $L__BB0_26; setp.ne.s64 %p10, %rd69, 17; @%p10 bra $L__BB0_49; setp.ge.f32 %p25, %f381, %f2667; selp.f32 %f2667, 0f00000000, %f2667, %p25; bra.uni $L__BB0_49; $L__BB0_52: setp.eq.s64 %p56, %rd69, 0; @%p56 bra $L__BB0_91; setp.eq.s64 %p57, %rd69, 2; @%p57 bra $L__BB0_90; setp.eq.s64 %p58, %rd69, 4; @%p58 bra $L__BB0_89; bra.uni $L__BB0_93; $L__BB0_89: setp.lt.f32 %p72, %f2670, 0f00000000; mul.f32 %f732, %f381, %f2670; selp.f32 %f2670, %f732, %f2670, %p72; bra.uni $L__BB0_93; $L__BB0_61: setp.eq.s64 %p47, %rd69, 10; @%p47 bra $L__BB0_81; setp.eq.s64 %p48, %rd69, 11; @%p48 bra $L__BB0_78; setp.eq.s64 %p49, %rd69, 12; @%p49 bra $L__BB0_77; bra.uni $L__BB0_93; $L__BB0_77: neg.f32 %f635, %f2670; mov.f32 %f636, 0f3F000000; mov.f32 %f637, 0f3BBB989D; fma.rn.f32 %f638, %f635, %f637, %f636; mov.f32 %f639, 0f3FB8AA3B; mov.f32 %f640, 0f437C0000; cvt.sat.f32.f32 %f641, %f638; mov.f32 %f642, 0f4B400001; fma.rm.f32 %f643, %f641, %f640, %f642; add.f32 %f644, %f643, 0fCB40007F; neg.f32 %f645, %f644; fma.rn.f32 %f646, %f635, %f639, %f645; mov.f32 %f647, 0f32A57060; fma.rn.f32 %f648, %f635, %f647, %f646; mov.b32 %r75, %f643; shl.b32 %r76, %r75, 23; mov.b32 %f649, %r76; ex2.approx.ftz.f32 %f650, %f648; fma.rn.f32 %f651, %f650, %f649, 0f3F800000; rcp.rn.f32 %f2670, %f651; bra.uni $L__BB0_93; $L__BB0_58: setp.eq.s64 %p52, %rd69, 8; @%p52 bra $L__BB0_85; setp.eq.s64 %p53, %rd69, 9; @%p53 bra $L__BB0_84; bra.uni $L__BB0_93; $L__BB0_84: setp.lt.f32 %p68, %f2670, 0f00000000; selp.f32 %f2670, 0f00000000, %f2670, %p68; bra.uni $L__BB0_93; $L__BB0_67: setp.eq.s64 %p43, %rd69, 16; @%p43 bra $L__BB0_70; setp.ne.s64 %p44, %rd69, 17; @%p44 bra $L__BB0_93; setp.ge.f32 %p59, %f381, %f2670; selp.f32 %f2670, 0f00000000, %f2670, %p59; bra.uni $L__BB0_93; $L__BB0_44: fma.rn.f32 %f2667, %f381, %f2667, %f382; bra.uni $L__BB0_49; $L__BB0_30: mul.f32 %f404, %f381, %f2667; mov.f32 %f405, 0f3F000000; mov.f32 %f406, 0f3BBB989D; fma.rn.f32 %f407, %f404, %f406, %f405; mov.f32 %f408, 0f3FB8AA3B; mov.f32 %f409, 0f437C0000; cvt.sat.f32.f32 %f410, %f407; mov.f32 %f411, 0f4B400001; fma.rm.f32 %f412, %f410, %f409, %f411; add.f32 %f413, %f412, 0fCB40007F; neg.f32 %f414, %f413; fma.rn.f32 %f415, %f404, %f408, %f414; mov.f32 %f416, 0f32A57060; fma.rn.f32 %f417, %f404, %f416, %f415; mov.b32 %r43, %f412; shl.b32 %r44, %r43, 23; mov.b32 %f418, %r44; ex2.approx.ftz.f32 %f419, %f417; fma.rn.f32 %f420, %f419, %f418, 0f3F800000; setp.lt.f32 %p28, %f420, 0f00800000; mul.f32 %f421, %f420, 0f4B000000; selp.f32 %f10, %f421, %f420, %p28; selp.f32 %f422, 0fC1B80000, 0f00000000, %p28; mov.b32 %r45, %f10; add.s32 %r46, %r45, -1059760811; and.b32 %r47, %r46, -8388608; sub.s32 %r48, %r45, %r47; mov.b32 %f423, %r48; cvt.rn.f32.s32 %f424, %r47; mov.f32 %f425, 0f34000000; fma.rn.f32 %f426, %f424, %f425, %f422; add.f32 %f427, %f423, 0fBF800000; mov.f32 %f428, 0f3E1039F6; mov.f32 %f429, 0fBE055027; fma.rn.f32 %f430, %f429, %f427, %f428; mov.f32 %f431, 0fBDF8CDCC; fma.rn.f32 %f432, %f430, %f427, %f431; mov.f32 %f433, 0f3E0F2955; fma.rn.f32 %f434, %f432, %f427, %f433; mov.f32 %f435, 0fBE2AD8B9; fma.rn.f32 %f436, %f434, %f427, %f435; mov.f32 %f437, 0f3E4CED0B; fma.rn.f32 %f438, %f436, %f427, %f437; mov.f32 %f439, 0fBE7FFF22; fma.rn.f32 %f440, %f438, %f427, %f439; mov.f32 %f441, 0f3EAAAA78; fma.rn.f32 %f442, %f440, %f427, %f441; mov.f32 %f443, 0fBF000000; fma.rn.f32 %f444, %f442, %f427, %f443; mul.f32 %f445, %f444, %f427; fma.rn.f32 %f446, %f445, %f427, %f427; mov.f32 %f447, 0f3F317218; fma.rn.f32 %f2665, %f426, %f447, %f446; setp.lt.u32 %p29, %r45, 2139095040; @%p29 bra $L__BB0_32; mov.f32 %f448, 0f7F800000; fma.rn.f32 %f2665, %f10, %f448, %f448; $L__BB0_32: setp.eq.f32 %p30, %f10, 0f00000000; selp.f32 %f449, 0fFF800000, %f2665, %p30; div.rn.f32 %f2667, %f449, %f381; bra.uni $L__BB0_49; $L__BB0_47: setp.geu.f32 %p39, %f2667, 0f00000000; @%p39 bra $L__BB0_49; mov.f32 %f552, 0f3F000000; mov.f32 %f553, 0f3BBB989D; fma.rn.f32 %f554, %f2667, %f553, %f552; mov.f32 %f555, 0f3FB8AA3B; mov.f32 %f556, 0f437C0000; cvt.sat.f32.f32 %f557, %f554; mov.f32 %f558, 0f4B400001; fma.rm.f32 %f559, %f557, %f556, %f558; add.f32 %f560, %f559, 0fCB40007F; neg.f32 %f561, %f560; fma.rn.f32 %f562, %f2667, %f555, %f561; mov.f32 %f563, 0f32A57060; fma.rn.f32 %f564, %f2667, %f563, %f562; mov.b32 %r63, %f559; shl.b32 %r64, %r63, 23; mov.b32 %f565, %r64; ex2.approx.ftz.f32 %f566, %f564; fma.rn.f32 %f567, %f566, %f565, 0fBF800000; mul.f32 %f2667, %f381, %f567; bra.uni $L__BB0_49; $L__BB0_46: fma.rn.f32 %f548, %f381, %f2667, %f382; mov.f32 %f549, 0f3F800000; min.f32 %f550, %f549, %f548; mov.f32 %f551, 0f00000000; max.f32 %f2667, %f551, %f550; bra.uni $L__BB0_49; $L__BB0_37: setp.gt.f32 %p33, %f2667, 0f00000000; @%p33 bra $L__BB0_39; bra.uni $L__BB0_38; $L__BB0_39: mul.f32 %f2667, %f382, %f2667; bra.uni $L__BB0_49; $L__BB0_34: mul.f32 %f16, %f382, %f2667; abs.f32 %f17, %f16; setp.ltu.f32 %p31, %f17, 0f3F19999A; @%p31 bra $L__BB0_36; bra.uni $L__BB0_35; $L__BB0_36: mul.f32 %f475, %f16, %f16; mov.f32 %f476, 0fBD563CAE; mov.f32 %f477, 0f3C80F082; fma.rn.f32 %f478, %f477, %f475, %f476; mov.f32 %f479, 0f3E085941; fma.rn.f32 %f480, %f478, %f475, %f479; mov.f32 %f481, 0fBEAAA9ED; fma.rn.f32 %f482, %f480, %f475, %f481; mov.f32 %f483, 0f00000000; fma.rn.f32 %f484, %f482, %f475, %f483; fma.rn.f32 %f19, %f484, %f16, %f16; mul.f32 %f2667, %f381, %f19; bra.uni $L__BB0_49; $L__BB0_41: mul.f32 %f501, %f382, %f2667; mov.f32 %f502, 0f3F000000; mov.f32 %f503, 0f3BBB989D; fma.rn.f32 %f504, %f501, %f503, %f502; mov.f32 %f505, 0f3FB8AA3B; mov.f32 %f506, 0f437C0000; cvt.sat.f32.f32 %f507, %f504; mov.f32 %f508, 0f4B400001; fma.rm.f32 %f509, %f507, %f506, %f508; add.f32 %f510, %f509, 0fCB40007F; neg.f32 %f511, %f510; fma.rn.f32 %f512, %f501, %f505, %f511; mov.f32 %f513, 0f32A57060; fma.rn.f32 %f514, %f501, %f513, %f512; mov.b32 %r57, %f509; shl.b32 %r58, %r57, 23; mov.b32 %f515, %r58; ex2.approx.ftz.f32 %f516, %f514; fma.rn.f32 %f517, %f516, %f515, 0f3F800000; setp.lt.f32 %p35, %f517, 0f00800000; mul.f32 %f518, %f517, 0f4B000000; selp.f32 %f25, %f518, %f517, %p35; selp.f32 %f519, 0fC1B80000, 0f00000000, %p35; mov.b32 %r59, %f25; add.s32 %r60, %r59, -1059760811; and.b32 %r61, %r60, -8388608; sub.s32 %r62, %r59, %r61; mov.b32 %f520, %r62; cvt.rn.f32.s32 %f521, %r61; mov.f32 %f522, 0f34000000; fma.rn.f32 %f523, %f521, %f522, %f519; add.f32 %f524, %f520, 0fBF800000; mov.f32 %f525, 0f3E1039F6; mov.f32 %f526, 0fBE055027; fma.rn.f32 %f527, %f526, %f524, %f525; mov.f32 %f528, 0fBDF8CDCC; fma.rn.f32 %f529, %f527, %f524, %f528; mov.f32 %f530, 0f3E0F2955; fma.rn.f32 %f531, %f529, %f524, %f530; mov.f32 %f532, 0fBE2AD8B9; fma.rn.f32 %f533, %f531, %f524, %f532; mov.f32 %f534, 0f3E4CED0B; fma.rn.f32 %f535, %f533, %f524, %f534; mov.f32 %f536, 0fBE7FFF22; fma.rn.f32 %f537, %f535, %f524, %f536; mov.f32 %f538, 0f3EAAAA78; fma.rn.f32 %f539, %f537, %f524, %f538; mov.f32 %f540, 0fBF000000; fma.rn.f32 %f541, %f539, %f524, %f540; mul.f32 %f542, %f541, %f524; fma.rn.f32 %f543, %f542, %f524, %f524; mov.f32 %f544, 0f3F317218; fma.rn.f32 %f2666, %f523, %f544, %f543; setp.lt.u32 %p36, %r59, 2139095040; @%p36 bra $L__BB0_43; mov.f32 %f545, 0f7F800000; fma.rn.f32 %f2666, %f25, %f545, %f545; $L__BB0_43: setp.eq.f32 %p37, %f25, 0f00000000; selp.f32 %f546, 0fFF800000, %f2666, %p37; mul.f32 %f2667, %f381, %f546; bra.uni $L__BB0_49; $L__BB0_26: abs.f32 %f6, %f2667; setp.ltu.f32 %p26, %f6, 0f3F19999A; @%p26 bra $L__BB0_28; bra.uni $L__BB0_27; $L__BB0_28: mul.f32 %f392, %f2667, %f2667; mov.f32 %f393, 0fBD563CAE; mov.f32 %f394, 0f3C80F082; fma.rn.f32 %f395, %f394, %f392, %f393; mov.f32 %f396, 0f3E085941; fma.rn.f32 %f397, %f395, %f392, %f396; mov.f32 %f398, 0fBEAAA9ED; fma.rn.f32 %f399, %f397, %f392, %f398; mov.f32 %f400, 0f00000000; fma.rn.f32 %f401, %f399, %f392, %f400; fma.rn.f32 %f2667, %f401, %f2667, %f2667; bra.uni $L__BB0_49; $L__BB0_88: fma.rn.f32 %f2670, %f381, %f2670, %f382; bra.uni $L__BB0_93; $L__BB0_74: mul.f32 %f589, %f381, %f2670; mov.f32 %f590, 0f3F000000; mov.f32 %f591, 0f3BBB989D; fma.rn.f32 %f592, %f589, %f591, %f590; mov.f32 %f593, 0f3FB8AA3B; mov.f32 %f594, 0f437C0000; cvt.sat.f32.f32 %f595, %f592; mov.f32 %f596, 0f4B400001; fma.rm.f32 %f597, %f595, %f594, %f596; add.f32 %f598, %f597, 0fCB40007F; neg.f32 %f599, %f598; fma.rn.f32 %f600, %f589, %f593, %f599; mov.f32 %f601, 0f32A57060; fma.rn.f32 %f602, %f589, %f601, %f600; mov.b32 %r69, %f597; shl.b32 %r70, %r69, 23; mov.b32 %f603, %r70; ex2.approx.ftz.f32 %f604, %f602; fma.rn.f32 %f605, %f604, %f603, 0f3F800000; setp.lt.f32 %p62, %f605, 0f00800000; mul.f32 %f606, %f605, 0f4B000000; selp.f32 %f41, %f606, %f605, %p62; selp.f32 %f607, 0fC1B80000, 0f00000000, %p62; mov.b32 %r71, %f41; add.s32 %r72, %r71, -1059760811; and.b32 %r73, %r72, -8388608; sub.s32 %r74, %r71, %r73; mov.b32 %f608, %r74; cvt.rn.f32.s32 %f609, %r73; mov.f32 %f610, 0f34000000; fma.rn.f32 %f611, %f609, %f610, %f607; add.f32 %f612, %f608, 0fBF800000; mov.f32 %f613, 0f3E1039F6; mov.f32 %f614, 0fBE055027; fma.rn.f32 %f615, %f614, %f612, %f613; mov.f32 %f616, 0fBDF8CDCC; fma.rn.f32 %f617, %f615, %f612, %f616; mov.f32 %f618, 0f3E0F2955; fma.rn.f32 %f619, %f617, %f612, %f618; mov.f32 %f620, 0fBE2AD8B9; fma.rn.f32 %f621, %f619, %f612, %f620; mov.f32 %f622, 0f3E4CED0B; fma.rn.f32 %f623, %f621, %f612, %f622; mov.f32 %f624, 0fBE7FFF22; fma.rn.f32 %f625, %f623, %f612, %f624; mov.f32 %f626, 0f3EAAAA78; fma.rn.f32 %f627, %f625, %f612, %f626; mov.f32 %f628, 0fBF000000; fma.rn.f32 %f629, %f627, %f612, %f628; mul.f32 %f630, %f629, %f612; fma.rn.f32 %f631, %f630, %f612, %f612; mov.f32 %f632, 0f3F317218; fma.rn.f32 %f2668, %f611, %f632, %f631; setp.lt.u32 %p63, %r71, 2139095040; @%p63 bra $L__BB0_76; mov.f32 %f633, 0f7F800000; fma.rn.f32 %f2668, %f41, %f633, %f633; $L__BB0_76: setp.eq.f32 %p64, %f41, 0f00000000; selp.f32 %f634, 0fFF800000, %f2668, %p64; div.rn.f32 %f2670, %f634, %f381; bra.uni $L__BB0_93; $L__BB0_91: setp.geu.f32 %p73, %f2670, 0f00000000; @%p73 bra $L__BB0_93; mov.f32 %f737, 0f3F000000; mov.f32 %f738, 0f3BBB989D; fma.rn.f32 %f739, %f2670, %f738, %f737; mov.f32 %f740, 0f3FB8AA3B; mov.f32 %f741, 0f437C0000; cvt.sat.f32.f32 %f742, %f739; mov.f32 %f743, 0f4B400001; fma.rm.f32 %f744, %f742, %f741, %f743; add.f32 %f745, %f744, 0fCB40007F; neg.f32 %f746, %f745; fma.rn.f32 %f747, %f2670, %f740, %f746; mov.f32 %f748, 0f32A57060; fma.rn.f32 %f749, %f2670, %f748, %f747; mov.b32 %r89, %f744; shl.b32 %r90, %r89, 23; mov.b32 %f750, %r90; ex2.approx.ftz.f32 %f751, %f749; fma.rn.f32 %f752, %f751, %f750, 0fBF800000; mul.f32 %f2670, %f381, %f752; bra.uni $L__BB0_93; $L__BB0_81: setp.gt.f32 %p67, %f2670, 0f00000000; @%p67 bra $L__BB0_83; bra.uni $L__BB0_82; $L__BB0_83: mul.f32 %f2670, %f382, %f2670; bra.uni $L__BB0_93; $L__BB0_85: mul.f32 %f686, %f382, %f2670; mov.f32 %f687, 0f3F000000; mov.f32 %f688, 0f3BBB989D; fma.rn.f32 %f689, %f686, %f688, %f687; mov.f32 %f690, 0f3FB8AA3B; mov.f32 %f691, 0f437C0000; cvt.sat.f32.f32 %f692, %f689; mov.f32 %f693, 0f4B400001; fma.rm.f32 %f694, %f692, %f691, %f693; add.f32 %f695, %f694, 0fCB40007F; neg.f32 %f696, %f695; fma.rn.f32 %f697, %f686, %f690, %f696; mov.f32 %f698, 0f32A57060; fma.rn.f32 %f699, %f686, %f698, %f697; mov.b32 %r83, %f694; shl.b32 %r84, %r83, 23; mov.b32 %f700, %r84; ex2.approx.ftz.f32 %f701, %f699; fma.rn.f32 %f702, %f701, %f700, 0f3F800000; setp.lt.f32 %p69, %f702, 0f00800000; mul.f32 %f703, %f702, 0f4B000000; selp.f32 %f56, %f703, %f702, %p69; selp.f32 %f704, 0fC1B80000, 0f00000000, %p69; mov.b32 %r85, %f56; add.s32 %r86, %r85, -1059760811; and.b32 %r87, %r86, -8388608; sub.s32 %r88, %r85, %r87; mov.b32 %f705, %r88; cvt.rn.f32.s32 %f706, %r87; mov.f32 %f707, 0f34000000; fma.rn.f32 %f708, %f706, %f707, %f704; add.f32 %f709, %f705, 0fBF800000; mov.f32 %f710, 0f3E1039F6; mov.f32 %f711, 0fBE055027; fma.rn.f32 %f712, %f711, %f709, %f710; mov.f32 %f713, 0fBDF8CDCC; fma.rn.f32 %f714, %f712, %f709, %f713; mov.f32 %f715, 0f3E0F2955; fma.rn.f32 %f716, %f714, %f709, %f715; mov.f32 %f717, 0fBE2AD8B9; fma.rn.f32 %f718, %f716, %f709, %f717; mov.f32 %f719, 0f3E4CED0B; fma.rn.f32 %f720, %f718, %f709, %f719; mov.f32 %f721, 0fBE7FFF22; fma.rn.f32 %f722, %f720, %f709, %f721; mov.f32 %f723, 0f3EAAAA78; fma.rn.f32 %f724, %f722, %f709, %f723; mov.f32 %f725, 0fBF000000; fma.rn.f32 %f726, %f724, %f709, %f725; mul.f32 %f727, %f726, %f709; fma.rn.f32 %f728, %f727, %f709, %f709; mov.f32 %f729, 0f3F317218; fma.rn.f32 %f2669, %f708, %f729, %f728; setp.lt.u32 %p70, %r85, 2139095040; @%p70 bra $L__BB0_87; mov.f32 %f730, 0f7F800000; fma.rn.f32 %f2669, %f56, %f730, %f730; $L__BB0_87: setp.eq.f32 %p71, %f56, 0f00000000; selp.f32 %f731, 0fFF800000, %f2669, %p71; mul.f32 %f2670, %f381, %f731; bra.uni $L__BB0_93; $L__BB0_70: abs.f32 %f37, %f2670; setp.ltu.f32 %p60, %f37, 0f3F19999A; @%p60 bra $L__BB0_72; bra.uni $L__BB0_71; $L__BB0_72: mul.f32 %f577, %f2670, %f2670; mov.f32 %f578, 0fBD563CAE; mov.f32 %f579, 0f3C80F082; fma.rn.f32 %f580, %f579, %f577, %f578; mov.f32 %f581, 0f3E085941; fma.rn.f32 %f582, %f580, %f577, %f581; mov.f32 %f583, 0fBEAAA9ED; fma.rn.f32 %f584, %f582, %f577, %f583; mov.f32 %f585, 0f00000000; fma.rn.f32 %f586, %f584, %f577, %f585; fma.rn.f32 %f2670, %f586, %f2670, %f2670; bra.uni $L__BB0_93; $L__BB0_90: fma.rn.f32 %f733, %f381, %f2670, %f382; mov.f32 %f734, 0f3F800000; min.f32 %f735, %f734, %f733; mov.f32 %f736, 0f00000000; max.f32 %f2670, %f736, %f735; bra.uni $L__BB0_93; $L__BB0_78: mul.f32 %f47, %f382, %f2670; abs.f32 %f48, %f47; setp.ltu.f32 %p65, %f48, 0f3F19999A; @%p65 bra $L__BB0_80; bra.uni $L__BB0_79; $L__BB0_80: mul.f32 %f660, %f47, %f47; mov.f32 %f661, 0fBD563CAE; mov.f32 %f662, 0f3C80F082; fma.rn.f32 %f663, %f662, %f660, %f661; mov.f32 %f664, 0f3E085941; fma.rn.f32 %f665, %f663, %f660, %f664; mov.f32 %f666, 0fBEAAA9ED; fma.rn.f32 %f667, %f665, %f660, %f666; mov.f32 %f668, 0f00000000; fma.rn.f32 %f669, %f667, %f660, %f668; fma.rn.f32 %f50, %f669, %f47, %f47; mul.f32 %f2670, %f381, %f50; bra.uni $L__BB0_93; $L__BB0_38: mov.f32 %f485, 0f3F000000; mov.f32 %f486, 0f3BBB989D; fma.rn.f32 %f487, %f2667, %f486, %f485; mov.f32 %f488, 0f3FB8AA3B; mov.f32 %f489, 0f437C0000; cvt.sat.f32.f32 %f490, %f487; mov.f32 %f491, 0f4B400001; fma.rm.f32 %f492, %f490, %f489, %f491; add.f32 %f493, %f492, 0fCB40007F; neg.f32 %f494, %f493; fma.rn.f32 %f495, %f2667, %f488, %f494; mov.f32 %f496, 0f32A57060; fma.rn.f32 %f497, %f2667, %f496, %f495; mov.b32 %r55, %f492; shl.b32 %r56, %r55, 23; mov.b32 %f498, %r56; ex2.approx.ftz.f32 %f499, %f497; fma.rn.f32 %f500, %f499, %f498, 0fBF800000; mul.f32 %f2667, %f3, %f500; bra.uni $L__BB0_49; $L__BB0_35: mul.f32 %f467, %f17, 0f4038AA3B; ex2.approx.ftz.f32 %f468, %f467; add.f32 %f469, %f468, 0f3F800000; mov.f32 %f470, 0f3F800000; rcp.approx.ftz.f32 %f471, %f469; mov.f32 %f472, 0fC0000000; fma.rn.f32 %f473, %f471, %f472, %f470; setp.ge.f32 %p32, %f17, 0f41102CB4; selp.f32 %f474, 0f3F800000, %f473, %p32; mov.b32 %r51, %f474; mov.b32 %r52, %f16; and.b32 %r53, %r52, -2147483648; or.b32 %r54, %r53, %r51; mov.b32 %f18, %r54; mul.f32 %f2667, %f381, %f18; bra.uni $L__BB0_49; $L__BB0_27: mul.f32 %f384, %f6, 0f4038AA3B; ex2.approx.ftz.f32 %f385, %f384; add.f32 %f386, %f385, 0f3F800000; mov.f32 %f387, 0f3F800000; rcp.approx.ftz.f32 %f388, %f386; mov.f32 %f389, 0fC0000000; fma.rn.f32 %f390, %f388, %f389, %f387; setp.ge.f32 %p27, %f6, 0f41102CB4; selp.f32 %f391, 0f3F800000, %f390, %p27; mov.b32 %r39, %f391; mov.b32 %r40, %f2667; and.b32 %r41, %r40, -2147483648; or.b32 %r42, %r41, %r39; mov.b32 %f2667, %r42; $L__BB0_49: // begin inline asm { cvt.rn.f16.f32 %rs29, %f2667;} // end inline asm st.global.u16 [%rd132], %rs29; bra.uni $L__BB0_94; $L__BB0_82: mov.f32 %f670, 0f3F000000; mov.f32 %f671, 0f3BBB989D; fma.rn.f32 %f672, %f2670, %f671, %f670; mov.f32 %f673, 0f3FB8AA3B; mov.f32 %f674, 0f437C0000; cvt.sat.f32.f32 %f675, %f672; mov.f32 %f676, 0f4B400001; fma.rm.f32 %f677, %f675, %f674, %f676; add.f32 %f678, %f677, 0fCB40007F; neg.f32 %f679, %f678; fma.rn.f32 %f680, %f2670, %f673, %f679; mov.f32 %f681, 0f32A57060; fma.rn.f32 %f682, %f2670, %f681, %f680; mov.b32 %r81, %f677; shl.b32 %r82, %r81, 23; mov.b32 %f683, %r82; ex2.approx.ftz.f32 %f684, %f682; fma.rn.f32 %f685, %f684, %f683, 0fBF800000; mul.f32 %f2670, %f3, %f685; bra.uni $L__BB0_93; $L__BB0_71: mul.f32 %f569, %f37, 0f4038AA3B; ex2.approx.ftz.f32 %f570, %f569; add.f32 %f571, %f570, 0f3F800000; mov.f32 %f572, 0f3F800000; rcp.approx.ftz.f32 %f573, %f571; mov.f32 %f574, 0fC0000000; fma.rn.f32 %f575, %f573, %f574, %f572; setp.ge.f32 %p61, %f37, 0f41102CB4; selp.f32 %f576, 0f3F800000, %f575, %p61; mov.b32 %r65, %f576; mov.b32 %r66, %f2670; and.b32 %r67, %r66, -2147483648; or.b32 %r68, %r67, %r65; mov.b32 %f2670, %r68; bra.uni $L__BB0_93; $L__BB0_79: mul.f32 %f652, %f48, 0f4038AA3B; ex2.approx.ftz.f32 %f653, %f652; add.f32 %f654, %f653, 0f3F800000; mov.f32 %f655, 0f3F800000; rcp.approx.ftz.f32 %f656, %f654; mov.f32 %f657, 0fC0000000; fma.rn.f32 %f658, %f656, %f657, %f655; setp.ge.f32 %p66, %f48, 0f41102CB4; selp.f32 %f659, 0f3F800000, %f658, %p66; mov.b32 %r77, %f659; mov.b32 %r78, %f47; and.b32 %r79, %r78, -2147483648; or.b32 %r80, %r79, %r77; mov.b32 %f49, %r80; mul.f32 %f2670, %f381, %f49; $L__BB0_93: st.global.f32 [%rd133], %f2670; $L__BB0_94: add.s64 %rd133, %rd133, 4; add.s64 %rd132, %rd132, 2; setp.eq.s32 %p74, %r409, 0; @%p74 bra $L__BB0_586; bra.uni $L__BB0_5; $L__BB0_454: and.b64 %rd93, %rd71, -4294967296; setp.eq.s64 %p354, %rd93, 0; @%p354 bra $L__BB0_456; div.u64 %rd140, %rd20, %rd71; bra.uni $L__BB0_457; $L__BB0_463: and.b64 %rd96, %rd72, -4294967296; setp.eq.s64 %p357, %rd96, 0; @%p357 bra $L__BB0_465; div.u64 %rd140, %rd20, %rd72; bra.uni $L__BB0_466; $L__BB0_269: shl.b64 %rd86, %rd4, 4; add.s64 %rd15, %rd1, %rd86; ld.global.v4.f32 {%f2685, %f2688, %f2691, %f2694}, [%rd15]; setp.gt.s64 %p212, %rd69, 9; @%p212 bra $L__BB0_279; setp.gt.s64 %p222, %rd69, 4; @%p222 bra $L__BB0_274; bra.uni $L__BB0_271; $L__BB0_274: setp.gt.s64 %p223, %rd69, 7; @%p223 bra $L__BB0_277; setp.eq.s64 %p226, %rd69, 5; @%p226 bra $L__BB0_307; setp.eq.s64 %p227, %rd69, 7; @%p227 bra $L__BB0_308; bra.uni $L__BB0_312; $L__BB0_106: setp.gt.s64 %p77, %rd69, 13; @%p77 bra $L__BB0_110; bra.uni $L__BB0_107; $L__BB0_110: setp.gt.s64 %p78, %rd69, 15; @%p78 bra $L__BB0_113; setp.eq.s64 %p81, %rd69, 14; @%p81 bra $L__BB0_120; setp.eq.s64 %p82, %rd69, 15; @%p82 bra $L__BB0_119; bra.uni $L__BB0_139; $L__BB0_119: abs.f32 %f774, %f2673; add.f32 %f775, %f774, 0f3F800000; div.rn.f32 %f2673, %f2673, %f775; bra.uni $L__BB0_139; $L__BB0_279: setp.gt.s64 %p213, %rd69, 13; @%p213 bra $L__BB0_283; bra.uni $L__BB0_280; $L__BB0_283: setp.gt.s64 %p214, %rd69, 15; @%p214 bra $L__BB0_286; setp.eq.s64 %p217, %rd69, 14; @%p217 bra $L__BB0_293; setp.eq.s64 %p218, %rd69, 15; @%p218 bra $L__BB0_292; bra.uni $L__BB0_312; $L__BB0_292: abs.f32 %f1525, %f2685; add.f32 %f1526, %f1525, 0f3F800000; div.rn.f32 %f2685, %f2685, %f1526; bra.uni $L__BB0_312; $L__BB0_447: cvt.u32.u64 %r302, %rd73; cvt.u32.u64 %r303, %rd20; div.u32 %r304, %r303, %r302; cvt.u64.u32 %rd141, %r304; $L__BB0_448: and.b64 %rd91, %rd72, -4294967296; setp.eq.s64 %p352, %rd91, 0; @%p352 bra $L__BB0_450; div.u64 %rd142, %rd141, %rd72; bra.uni $L__BB0_451; $L__BB0_456: cvt.u32.u64 %r311, %rd71; cvt.u32.u64 %r312, %rd20; div.u32 %r313, %r312, %r311; cvt.u64.u32 %rd140, %r313; $L__BB0_457: and.b64 %rd94, %rd73, -4294967296; setp.eq.s64 %p355, %rd94, 0; @%p355 bra $L__BB0_459; div.u64 %rd141, %rd140, %rd73; bra.uni $L__BB0_460; $L__BB0_450: cvt.u32.u64 %r305, %rd72; cvt.u32.u64 %r306, %rd141; div.u32 %r307, %r306, %r305; cvt.u64.u32 %rd142, %r307; $L__BB0_451: and.b64 %rd92, %rd71, -4294967296; setp.eq.s64 %p353, %rd92, 0; @%p353 bra $L__BB0_453; div.u64 %rd143, %rd142, %rd71; mov.u64 %rd140, %rd20; bra.uni $L__BB0_472; $L__BB0_459: cvt.u32.u64 %r314, %rd73; cvt.u32.u64 %r315, %rd140; div.u32 %r316, %r315, %r314; cvt.u64.u32 %rd141, %r316; $L__BB0_460: and.b64 %rd95, %rd72, -4294967296; setp.eq.s64 %p356, %rd95, 0; @%p356 bra $L__BB0_462; div.u64 %rd143, %rd141, %rd72; mov.u64 %rd142, %rd20; bra.uni $L__BB0_472; $L__BB0_453: cvt.u32.u64 %r308, %rd71; cvt.u32.u64 %r309, %rd142; div.u32 %r310, %r309, %r308; cvt.u64.u32 %rd143, %r310; mov.u64 %rd140, %rd20; bra.uni $L__BB0_472; $L__BB0_462: cvt.u32.u64 %r317, %rd72; cvt.u32.u64 %r318, %rd141; div.u32 %r319, %r318, %r317; cvt.u64.u32 %rd143, %r319; mov.u64 %rd142, %rd20; bra.uni $L__BB0_472; $L__BB0_465: cvt.u32.u64 %r320, %rd72; cvt.u32.u64 %r321, %rd20; div.u32 %r322, %r321, %r320; cvt.u64.u32 %rd140, %r322; $L__BB0_466: and.b64 %rd97, %rd73, -4294967296; setp.eq.s64 %p358, %rd97, 0; @%p358 bra $L__BB0_468; div.u64 %rd142, %rd140, %rd73; bra.uni $L__BB0_469; $L__BB0_468: cvt.u32.u64 %r323, %rd73; cvt.u32.u64 %r324, %rd140; div.u32 %r325, %r324, %r323; cvt.u64.u32 %rd142, %r325; $L__BB0_469: and.b64 %rd98, %rd71, -4294967296; setp.eq.s64 %p359, %rd98, 0; @%p359 bra $L__BB0_471; div.u64 %rd143, %rd142, %rd71; mov.u64 %rd141, %rd20; bra.uni $L__BB0_472; $L__BB0_471: cvt.u32.u64 %r326, %rd71; cvt.u32.u64 %r327, %rd142; div.u32 %r328, %r327, %r326; cvt.u64.u32 %rd143, %r328; mov.u64 %rd141, %rd20; $L__BB0_472: and.b64 %rd99, %rd71, -4294967296; setp.eq.s64 %p360, %rd99, 0; @%p360 bra $L__BB0_474; rem.u64 %rd144, %rd142, %rd71; bra.uni $L__BB0_475; $L__BB0_474: cvt.u32.u64 %r329, %rd71; cvt.u32.u64 %r330, %rd142; rem.u32 %r331, %r330, %r329; cvt.u64.u32 %rd144, %r331; $L__BB0_475: and.b64 %rd100, %rd72, -4294967296; setp.eq.s64 %p361, %rd100, 0; @%p361 bra $L__BB0_477; rem.u64 %rd145, %rd141, %rd72; bra.uni $L__BB0_478; $L__BB0_477: cvt.u32.u64 %r332, %rd72; cvt.u32.u64 %r333, %rd141; rem.u32 %r334, %r333, %r332; cvt.u64.u32 %rd145, %r334; $L__BB0_478: and.b64 %rd101, %rd73, -4294967296; setp.eq.s64 %p362, %rd101, 0; @%p362 bra $L__BB0_480; rem.u64 %rd146, %rd140, %rd73; bra.uni $L__BB0_481; $L__BB0_480: cvt.u32.u64 %r335, %rd73; cvt.u32.u64 %r336, %rd140; rem.u32 %r337, %r336, %r335; cvt.u64.u32 %rd146, %r337; $L__BB0_481: cvt.u32.u64 %r418, %rd143; cvt.u32.u64 %r417, %rd144; cvt.u32.u64 %r416, %rd145; cvt.u32.u64 %r415, %rd146; mul.lo.s64 %rd102, %rd143, %rd74; mul.lo.s64 %rd103, %rd144, %rd75; add.s64 %rd104, %rd103, %rd102; mul.lo.s64 %rd105, %rd145, %rd76; add.s64 %rd106, %rd104, %rd105; mul.lo.s64 %rd107, %rd146, %rd77; add.s64 %rd147, %rd106, %rd107; mul.f32 %f316, %f382, %f381; mov.u32 %r410, 0; $L__BB0_482: and.b64 %rd64, %rd147, 4294967295; setp.eq.s16 %p363, %rs1, 0; @%p363 bra $L__BB0_527; shl.b64 %rd108, %rd64, 1; add.s64 %rd65, %rd1, %rd108; ld.global.u16 %rs40, [%rd65]; // begin inline asm { cvt.f32.f16 %f2697, %rs40;} // end inline asm setp.gt.s64 %p364, %rd69, 9; @%p364 bra $L__BB0_493; setp.gt.s64 %p374, %rd69, 4; @%p374 bra $L__BB0_488; bra.uni $L__BB0_485; $L__BB0_488: setp.gt.s64 %p375, %rd69, 7; @%p375 bra $L__BB0_491; setp.eq.s64 %p378, %rd69, 5; @%p378 bra $L__BB0_521; setp.eq.s64 %p379, %rd69, 7; @%p379 bra $L__BB0_522; bra.uni $L__BB0_526; $L__BB0_527: shl.b64 %rd109, %rd64, 2; add.s64 %rd66, %rd1, %rd109; ld.global.f32 %f2700, [%rd66]; setp.gt.s64 %p398, %rd69, 9; @%p398 bra $L__BB0_537; setp.gt.s64 %p408, %rd69, 4; @%p408 bra $L__BB0_532; bra.uni $L__BB0_529; $L__BB0_532: setp.gt.s64 %p409, %rd69, 7; @%p409 bra $L__BB0_535; setp.eq.s64 %p412, %rd69, 5; @%p412 bra $L__BB0_565; setp.eq.s64 %p413, %rd69, 7; @%p413 bra $L__BB0_566; bra.uni $L__BB0_570; $L__BB0_493: setp.gt.s64 %p365, %rd69, 13; @%p365 bra $L__BB0_497; bra.uni $L__BB0_494; $L__BB0_497: setp.gt.s64 %p366, %rd69, 15; @%p366 bra $L__BB0_500; setp.eq.s64 %p369, %rd69, 14; @%p369 bra $L__BB0_507; setp.eq.s64 %p370, %rd69, 15; @%p370 bra $L__BB0_506; bra.uni $L__BB0_526; $L__BB0_506: abs.f32 %f2266, %f2697; add.f32 %f2267, %f2266, 0f3F800000; div.rn.f32 %f2697, %f2697, %f2267; bra.uni $L__BB0_526; $L__BB0_537: setp.gt.s64 %p399, %rd69, 13; @%p399 bra $L__BB0_541; bra.uni $L__BB0_538; $L__BB0_541: setp.gt.s64 %p400, %rd69, 15; @%p400 bra $L__BB0_544; setp.eq.s64 %p403, %rd69, 14; @%p403 bra $L__BB0_551; setp.eq.s64 %p404, %rd69, 15; @%p404 bra $L__BB0_550; bra.uni $L__BB0_570; $L__BB0_550: abs.f32 %f2451, %f2700; add.f32 %f2452, %f2451, 0f3F800000; div.rn.f32 %f2700, %f2700, %f2452; bra.uni $L__BB0_570; $L__BB0_485: setp.eq.s64 %p380, %rd69, 0; @%p380 bra $L__BB0_524; setp.eq.s64 %p381, %rd69, 2; @%p381 bra $L__BB0_523; setp.eq.s64 %p382, %rd69, 4; @%p382 bra $L__BB0_522; bra.uni $L__BB0_526; $L__BB0_522: setp.lt.f32 %p396, %f2697, 0f00000000; mul.f32 %f2411, %f381, %f2697; selp.f32 %f2697, %f2411, %f2697, %p396; bra.uni $L__BB0_526; $L__BB0_494: setp.eq.s64 %p371, %rd69, 10; @%p371 bra $L__BB0_514; setp.eq.s64 %p372, %rd69, 11; @%p372 bra $L__BB0_511; setp.eq.s64 %p373, %rd69, 12; @%p373 bra $L__BB0_510; bra.uni $L__BB0_526; $L__BB0_510: neg.f32 %f2314, %f2697; mov.f32 %f2315, 0f3F000000; mov.f32 %f2316, 0f3BBB989D; fma.rn.f32 %f2317, %f2314, %f2316, %f2315; mov.f32 %f2318, 0f3FB8AA3B; mov.f32 %f2319, 0f437C0000; cvt.sat.f32.f32 %f2320, %f2317; mov.f32 %f2321, 0f4B400001; fma.rm.f32 %f2322, %f2320, %f2319, %f2321; add.f32 %f2323, %f2322, 0fCB40007F; neg.f32 %f2324, %f2323; fma.rn.f32 %f2325, %f2314, %f2318, %f2324; mov.f32 %f2326, 0f32A57060; fma.rn.f32 %f2327, %f2314, %f2326, %f2325; mov.b32 %r349, %f2322; shl.b32 %r350, %r349, 23; mov.b32 %f2328, %r350; ex2.approx.ftz.f32 %f2329, %f2327; fma.rn.f32 %f2330, %f2329, %f2328, 0f3F800000; rcp.rn.f32 %f2697, %f2330; bra.uni $L__BB0_526; $L__BB0_491: setp.eq.s64 %p376, %rd69, 8; @%p376 bra $L__BB0_518; setp.eq.s64 %p377, %rd69, 9; @%p377 bra $L__BB0_517; bra.uni $L__BB0_526; $L__BB0_517: setp.lt.f32 %p392, %f2697, 0f00000000; selp.f32 %f2697, 0f00000000, %f2697, %p392; bra.uni $L__BB0_526; $L__BB0_500: setp.eq.s64 %p367, %rd69, 16; @%p367 bra $L__BB0_503; setp.ne.s64 %p368, %rd69, 17; @%p368 bra $L__BB0_526; setp.ge.f32 %p383, %f381, %f2697; selp.f32 %f2697, 0f00000000, %f2697, %p383; bra.uni $L__BB0_526; $L__BB0_529: setp.eq.s64 %p414, %rd69, 0; @%p414 bra $L__BB0_568; setp.eq.s64 %p415, %rd69, 2; @%p415 bra $L__BB0_567; setp.eq.s64 %p416, %rd69, 4; @%p416 bra $L__BB0_566; bra.uni $L__BB0_570; $L__BB0_566: setp.lt.f32 %p430, %f2700, 0f00000000; mul.f32 %f2596, %f381, %f2700; selp.f32 %f2700, %f2596, %f2700, %p430; bra.uni $L__BB0_570; $L__BB0_538: setp.eq.s64 %p405, %rd69, 10; @%p405 bra $L__BB0_558; setp.eq.s64 %p406, %rd69, 11; @%p406 bra $L__BB0_555; setp.eq.s64 %p407, %rd69, 12; @%p407 bra $L__BB0_554; bra.uni $L__BB0_570; $L__BB0_554: neg.f32 %f2499, %f2700; mov.f32 %f2500, 0f3F000000; mov.f32 %f2501, 0f3BBB989D; fma.rn.f32 %f2502, %f2499, %f2501, %f2500; mov.f32 %f2503, 0f3FB8AA3B; mov.f32 %f2504, 0f437C0000; cvt.sat.f32.f32 %f2505, %f2502; mov.f32 %f2506, 0f4B400001; fma.rm.f32 %f2507, %f2505, %f2504, %f2506; add.f32 %f2508, %f2507, 0fCB40007F; neg.f32 %f2509, %f2508; fma.rn.f32 %f2510, %f2499, %f2503, %f2509; mov.f32 %f2511, 0f32A57060; fma.rn.f32 %f2512, %f2499, %f2511, %f2510; mov.b32 %r375, %f2507; shl.b32 %r376, %r375, 23; mov.b32 %f2513, %r376; ex2.approx.ftz.f32 %f2514, %f2512; fma.rn.f32 %f2515, %f2514, %f2513, 0f3F800000; rcp.rn.f32 %f2700, %f2515; bra.uni $L__BB0_570; $L__BB0_535: setp.eq.s64 %p410, %rd69, 8; @%p410 bra $L__BB0_562; setp.eq.s64 %p411, %rd69, 9; @%p411 bra $L__BB0_561; bra.uni $L__BB0_570; $L__BB0_561: setp.lt.f32 %p426, %f2700, 0f00000000; selp.f32 %f2700, 0f00000000, %f2700, %p426; bra.uni $L__BB0_570; $L__BB0_544: setp.eq.s64 %p401, %rd69, 16; @%p401 bra $L__BB0_547; setp.ne.s64 %p402, %rd69, 17; @%p402 bra $L__BB0_570; setp.ge.f32 %p417, %f381, %f2700; selp.f32 %f2700, 0f00000000, %f2700, %p417; bra.uni $L__BB0_570; $L__BB0_521: fma.rn.f32 %f2697, %f381, %f2697, %f382; bra.uni $L__BB0_526; $L__BB0_507: mul.f32 %f2268, %f381, %f2697; mov.f32 %f2269, 0f3F000000; mov.f32 %f2270, 0f3BBB989D; fma.rn.f32 %f2271, %f2268, %f2270, %f2269; mov.f32 %f2272, 0f3FB8AA3B; mov.f32 %f2273, 0f437C0000; cvt.sat.f32.f32 %f2274, %f2271; mov.f32 %f2275, 0f4B400001; fma.rm.f32 %f2276, %f2274, %f2273, %f2275; add.f32 %f2277, %f2276, 0fCB40007F; neg.f32 %f2278, %f2277; fma.rn.f32 %f2279, %f2268, %f2272, %f2278; mov.f32 %f2280, 0f32A57060; fma.rn.f32 %f2281, %f2268, %f2280, %f2279; mov.b32 %r343, %f2276; shl.b32 %r344, %r343, 23; mov.b32 %f2282, %r344; ex2.approx.ftz.f32 %f2283, %f2281; fma.rn.f32 %f2284, %f2283, %f2282, 0f3F800000; setp.lt.f32 %p386, %f2284, 0f00800000; mul.f32 %f2285, %f2284, 0f4B000000; selp.f32 %f323, %f2285, %f2284, %p386; selp.f32 %f2286, 0fC1B80000, 0f00000000, %p386; mov.b32 %r345, %f323; add.s32 %r346, %r345, -1059760811; and.b32 %r347, %r346, -8388608; sub.s32 %r348, %r345, %r347; mov.b32 %f2287, %r348; cvt.rn.f32.s32 %f2288, %r347; mov.f32 %f2289, 0f34000000; fma.rn.f32 %f2290, %f2288, %f2289, %f2286; add.f32 %f2291, %f2287, 0fBF800000; mov.f32 %f2292, 0f3E1039F6; mov.f32 %f2293, 0fBE055027; fma.rn.f32 %f2294, %f2293, %f2291, %f2292; mov.f32 %f2295, 0fBDF8CDCC; fma.rn.f32 %f2296, %f2294, %f2291, %f2295; mov.f32 %f2297, 0f3E0F2955; fma.rn.f32 %f2298, %f2296, %f2291, %f2297; mov.f32 %f2299, 0fBE2AD8B9; fma.rn.f32 %f2300, %f2298, %f2291, %f2299; mov.f32 %f2301, 0f3E4CED0B; fma.rn.f32 %f2302, %f2300, %f2291, %f2301; mov.f32 %f2303, 0fBE7FFF22; fma.rn.f32 %f2304, %f2302, %f2291, %f2303; mov.f32 %f2305, 0f3EAAAA78; fma.rn.f32 %f2306, %f2304, %f2291, %f2305; mov.f32 %f2307, 0fBF000000; fma.rn.f32 %f2308, %f2306, %f2291, %f2307; mul.f32 %f2309, %f2308, %f2291; fma.rn.f32 %f2310, %f2309, %f2291, %f2291; mov.f32 %f2311, 0f3F317218; fma.rn.f32 %f2695, %f2290, %f2311, %f2310; setp.lt.u32 %p387, %r345, 2139095040; @%p387 bra $L__BB0_509; mov.f32 %f2312, 0f7F800000; fma.rn.f32 %f2695, %f323, %f2312, %f2312; $L__BB0_509: setp.eq.f32 %p388, %f323, 0f00000000; selp.f32 %f2313, 0fFF800000, %f2695, %p388; div.rn.f32 %f2697, %f2313, %f381; bra.uni $L__BB0_526; $L__BB0_524: setp.geu.f32 %p397, %f2697, 0f00000000; @%p397 bra $L__BB0_526; mov.f32 %f2416, 0f3F000000; mov.f32 %f2417, 0f3BBB989D; fma.rn.f32 %f2418, %f2697, %f2417, %f2416; mov.f32 %f2419, 0f3FB8AA3B; mov.f32 %f2420, 0f437C0000; cvt.sat.f32.f32 %f2421, %f2418; mov.f32 %f2422, 0f4B400001; fma.rm.f32 %f2423, %f2421, %f2420, %f2422; add.f32 %f2424, %f2423, 0fCB40007F; neg.f32 %f2425, %f2424; fma.rn.f32 %f2426, %f2697, %f2419, %f2425; mov.f32 %f2427, 0f32A57060; fma.rn.f32 %f2428, %f2697, %f2427, %f2426; mov.b32 %r363, %f2423; shl.b32 %r364, %r363, 23; mov.b32 %f2429, %r364; ex2.approx.ftz.f32 %f2430, %f2428; fma.rn.f32 %f2431, %f2430, %f2429, 0fBF800000; mul.f32 %f2697, %f381, %f2431; bra.uni $L__BB0_526; $L__BB0_523: fma.rn.f32 %f2412, %f381, %f2697, %f382; mov.f32 %f2413, 0f3F800000; min.f32 %f2414, %f2413, %f2412; mov.f32 %f2415, 0f00000000; max.f32 %f2697, %f2415, %f2414; bra.uni $L__BB0_526; $L__BB0_514: setp.gt.f32 %p391, %f2697, 0f00000000; @%p391 bra $L__BB0_516; bra.uni $L__BB0_515; $L__BB0_516: mul.f32 %f2697, %f382, %f2697; bra.uni $L__BB0_526; $L__BB0_511: mul.f32 %f329, %f382, %f2697; abs.f32 %f330, %f329; setp.ltu.f32 %p389, %f330, 0f3F19999A; @%p389 bra $L__BB0_513; bra.uni $L__BB0_512; $L__BB0_513: mul.f32 %f2339, %f329, %f329; mov.f32 %f2340, 0fBD563CAE; mov.f32 %f2341, 0f3C80F082; fma.rn.f32 %f2342, %f2341, %f2339, %f2340; mov.f32 %f2343, 0f3E085941; fma.rn.f32 %f2344, %f2342, %f2339, %f2343; mov.f32 %f2345, 0fBEAAA9ED; fma.rn.f32 %f2346, %f2344, %f2339, %f2345; mov.f32 %f2347, 0f00000000; fma.rn.f32 %f2348, %f2346, %f2339, %f2347; fma.rn.f32 %f332, %f2348, %f329, %f329; mul.f32 %f2697, %f381, %f332; bra.uni $L__BB0_526; $L__BB0_518: mul.f32 %f2365, %f382, %f2697; mov.f32 %f2366, 0f3F000000; mov.f32 %f2367, 0f3BBB989D; fma.rn.f32 %f2368, %f2365, %f2367, %f2366; mov.f32 %f2369, 0f3FB8AA3B; mov.f32 %f2370, 0f437C0000; cvt.sat.f32.f32 %f2371, %f2368; mov.f32 %f2372, 0f4B400001; fma.rm.f32 %f2373, %f2371, %f2370, %f2372; add.f32 %f2374, %f2373, 0fCB40007F; neg.f32 %f2375, %f2374; fma.rn.f32 %f2376, %f2365, %f2369, %f2375; mov.f32 %f2377, 0f32A57060; fma.rn.f32 %f2378, %f2365, %f2377, %f2376; mov.b32 %r357, %f2373; shl.b32 %r358, %r357, 23; mov.b32 %f2379, %r358; ex2.approx.ftz.f32 %f2380, %f2378; fma.rn.f32 %f2381, %f2380, %f2379, 0f3F800000; setp.lt.f32 %p393, %f2381, 0f00800000; mul.f32 %f2382, %f2381, 0f4B000000; selp.f32 %f338, %f2382, %f2381, %p393; selp.f32 %f2383, 0fC1B80000, 0f00000000, %p393; mov.b32 %r359, %f338; add.s32 %r360, %r359, -1059760811; and.b32 %r361, %r360, -8388608; sub.s32 %r362, %r359, %r361; mov.b32 %f2384, %r362; cvt.rn.f32.s32 %f2385, %r361; mov.f32 %f2386, 0f34000000; fma.rn.f32 %f2387, %f2385, %f2386, %f2383; add.f32 %f2388, %f2384, 0fBF800000; mov.f32 %f2389, 0f3E1039F6; mov.f32 %f2390, 0fBE055027; fma.rn.f32 %f2391, %f2390, %f2388, %f2389; mov.f32 %f2392, 0fBDF8CDCC; fma.rn.f32 %f2393, %f2391, %f2388, %f2392; mov.f32 %f2394, 0f3E0F2955; fma.rn.f32 %f2395, %f2393, %f2388, %f2394; mov.f32 %f2396, 0fBE2AD8B9; fma.rn.f32 %f2397, %f2395, %f2388, %f2396; mov.f32 %f2398, 0f3E4CED0B; fma.rn.f32 %f2399, %f2397, %f2388, %f2398; mov.f32 %f2400, 0fBE7FFF22; fma.rn.f32 %f2401, %f2399, %f2388, %f2400; mov.f32 %f2402, 0f3EAAAA78; fma.rn.f32 %f2403, %f2401, %f2388, %f2402; mov.f32 %f2404, 0fBF000000; fma.rn.f32 %f2405, %f2403, %f2388, %f2404; mul.f32 %f2406, %f2405, %f2388; fma.rn.f32 %f2407, %f2406, %f2388, %f2388; mov.f32 %f2408, 0f3F317218; fma.rn.f32 %f2696, %f2387, %f2408, %f2407; setp.lt.u32 %p394, %r359, 2139095040; @%p394 bra $L__BB0_520; mov.f32 %f2409, 0f7F800000; fma.rn.f32 %f2696, %f338, %f2409, %f2409; $L__BB0_520: setp.eq.f32 %p395, %f338, 0f00000000; selp.f32 %f2410, 0fFF800000, %f2696, %p395; mul.f32 %f2697, %f381, %f2410; bra.uni $L__BB0_526; $L__BB0_503: abs.f32 %f319, %f2697; setp.ltu.f32 %p384, %f319, 0f3F19999A; @%p384 bra $L__BB0_505; bra.uni $L__BB0_504; $L__BB0_505: mul.f32 %f2256, %f2697, %f2697; mov.f32 %f2257, 0fBD563CAE; mov.f32 %f2258, 0f3C80F082; fma.rn.f32 %f2259, %f2258, %f2256, %f2257; mov.f32 %f2260, 0f3E085941; fma.rn.f32 %f2261, %f2259, %f2256, %f2260; mov.f32 %f2262, 0fBEAAA9ED; fma.rn.f32 %f2263, %f2261, %f2256, %f2262; mov.f32 %f2264, 0f00000000; fma.rn.f32 %f2265, %f2263, %f2256, %f2264; fma.rn.f32 %f2697, %f2265, %f2697, %f2697; bra.uni $L__BB0_526; $L__BB0_565: fma.rn.f32 %f2700, %f381, %f2700, %f382; bra.uni $L__BB0_570; $L__BB0_551: mul.f32 %f2453, %f381, %f2700; mov.f32 %f2454, 0f3F000000; mov.f32 %f2455, 0f3BBB989D; fma.rn.f32 %f2456, %f2453, %f2455, %f2454; mov.f32 %f2457, 0f3FB8AA3B; mov.f32 %f2458, 0f437C0000; cvt.sat.f32.f32 %f2459, %f2456; mov.f32 %f2460, 0f4B400001; fma.rm.f32 %f2461, %f2459, %f2458, %f2460; add.f32 %f2462, %f2461, 0fCB40007F; neg.f32 %f2463, %f2462; fma.rn.f32 %f2464, %f2453, %f2457, %f2463; mov.f32 %f2465, 0f32A57060; fma.rn.f32 %f2466, %f2453, %f2465, %f2464; mov.b32 %r369, %f2461; shl.b32 %r370, %r369, 23; mov.b32 %f2467, %r370; ex2.approx.ftz.f32 %f2468, %f2466; fma.rn.f32 %f2469, %f2468, %f2467, 0f3F800000; setp.lt.f32 %p420, %f2469, 0f00800000; mul.f32 %f2470, %f2469, 0f4B000000; selp.f32 %f354, %f2470, %f2469, %p420; selp.f32 %f2471, 0fC1B80000, 0f00000000, %p420; mov.b32 %r371, %f354; add.s32 %r372, %r371, -1059760811; and.b32 %r373, %r372, -8388608; sub.s32 %r374, %r371, %r373; mov.b32 %f2472, %r374; cvt.rn.f32.s32 %f2473, %r373; mov.f32 %f2474, 0f34000000; fma.rn.f32 %f2475, %f2473, %f2474, %f2471; add.f32 %f2476, %f2472, 0fBF800000; mov.f32 %f2477, 0f3E1039F6; mov.f32 %f2478, 0fBE055027; fma.rn.f32 %f2479, %f2478, %f2476, %f2477; mov.f32 %f2480, 0fBDF8CDCC; fma.rn.f32 %f2481, %f2479, %f2476, %f2480; mov.f32 %f2482, 0f3E0F2955; fma.rn.f32 %f2483, %f2481, %f2476, %f2482; mov.f32 %f2484, 0fBE2AD8B9; fma.rn.f32 %f2485, %f2483, %f2476, %f2484; mov.f32 %f2486, 0f3E4CED0B; fma.rn.f32 %f2487, %f2485, %f2476, %f2486; mov.f32 %f2488, 0fBE7FFF22; fma.rn.f32 %f2489, %f2487, %f2476, %f2488; mov.f32 %f2490, 0f3EAAAA78; fma.rn.f32 %f2491, %f2489, %f2476, %f2490; mov.f32 %f2492, 0fBF000000; fma.rn.f32 %f2493, %f2491, %f2476, %f2492; mul.f32 %f2494, %f2493, %f2476; fma.rn.f32 %f2495, %f2494, %f2476, %f2476; mov.f32 %f2496, 0f3F317218; fma.rn.f32 %f2698, %f2475, %f2496, %f2495; setp.lt.u32 %p421, %r371, 2139095040; @%p421 bra $L__BB0_553; mov.f32 %f2497, 0f7F800000; fma.rn.f32 %f2698, %f354, %f2497, %f2497; $L__BB0_553: setp.eq.f32 %p422, %f354, 0f00000000; selp.f32 %f2498, 0fFF800000, %f2698, %p422; div.rn.f32 %f2700, %f2498, %f381; bra.uni $L__BB0_570; $L__BB0_568: setp.geu.f32 %p431, %f2700, 0f00000000; @%p431 bra $L__BB0_570; mov.f32 %f2601, 0f3F000000; mov.f32 %f2602, 0f3BBB989D; fma.rn.f32 %f2603, %f2700, %f2602, %f2601; mov.f32 %f2604, 0f3FB8AA3B; mov.f32 %f2605, 0f437C0000; cvt.sat.f32.f32 %f2606, %f2603; mov.f32 %f2607, 0f4B400001; fma.rm.f32 %f2608, %f2606, %f2605, %f2607; add.f32 %f2609, %f2608, 0fCB40007F; neg.f32 %f2610, %f2609; fma.rn.f32 %f2611, %f2700, %f2604, %f2610; mov.f32 %f2612, 0f32A57060; fma.rn.f32 %f2613, %f2700, %f2612, %f2611; mov.b32 %r389, %f2608; shl.b32 %r390, %r389, 23; mov.b32 %f2614, %r390; ex2.approx.ftz.f32 %f2615, %f2613; fma.rn.f32 %f2616, %f2615, %f2614, 0fBF800000; mul.f32 %f2700, %f381, %f2616; bra.uni $L__BB0_570; $L__BB0_567: fma.rn.f32 %f2597, %f381, %f2700, %f382; mov.f32 %f2598, 0f3F800000; min.f32 %f2599, %f2598, %f2597; mov.f32 %f2600, 0f00000000; max.f32 %f2700, %f2600, %f2599; bra.uni $L__BB0_570; $L__BB0_558: setp.gt.f32 %p425, %f2700, 0f00000000; @%p425 bra $L__BB0_560; bra.uni $L__BB0_559; $L__BB0_560: mul.f32 %f2700, %f382, %f2700; bra.uni $L__BB0_570; $L__BB0_555: mul.f32 %f360, %f382, %f2700; abs.f32 %f361, %f360; setp.ltu.f32 %p423, %f361, 0f3F19999A; @%p423 bra $L__BB0_557; bra.uni $L__BB0_556; $L__BB0_557: mul.f32 %f2524, %f360, %f360; mov.f32 %f2525, 0fBD563CAE; mov.f32 %f2526, 0f3C80F082; fma.rn.f32 %f2527, %f2526, %f2524, %f2525; mov.f32 %f2528, 0f3E085941; fma.rn.f32 %f2529, %f2527, %f2524, %f2528; mov.f32 %f2530, 0fBEAAA9ED; fma.rn.f32 %f2531, %f2529, %f2524, %f2530; mov.f32 %f2532, 0f00000000; fma.rn.f32 %f2533, %f2531, %f2524, %f2532; fma.rn.f32 %f363, %f2533, %f360, %f360; mul.f32 %f2700, %f381, %f363; bra.uni $L__BB0_570; $L__BB0_562: mul.f32 %f2550, %f382, %f2700; mov.f32 %f2551, 0f3F000000; mov.f32 %f2552, 0f3BBB989D; fma.rn.f32 %f2553, %f2550, %f2552, %f2551; mov.f32 %f2554, 0f3FB8AA3B; mov.f32 %f2555, 0f437C0000; cvt.sat.f32.f32 %f2556, %f2553; mov.f32 %f2557, 0f4B400001; fma.rm.f32 %f2558, %f2556, %f2555, %f2557; add.f32 %f2559, %f2558, 0fCB40007F; neg.f32 %f2560, %f2559; fma.rn.f32 %f2561, %f2550, %f2554, %f2560; mov.f32 %f2562, 0f32A57060; fma.rn.f32 %f2563, %f2550, %f2562, %f2561; mov.b32 %r383, %f2558; shl.b32 %r384, %r383, 23; mov.b32 %f2564, %r384; ex2.approx.ftz.f32 %f2565, %f2563; fma.rn.f32 %f2566, %f2565, %f2564, 0f3F800000; setp.lt.f32 %p427, %f2566, 0f00800000; mul.f32 %f2567, %f2566, 0f4B000000; selp.f32 %f369, %f2567, %f2566, %p427; selp.f32 %f2568, 0fC1B80000, 0f00000000, %p427; mov.b32 %r385, %f369; add.s32 %r386, %r385, -1059760811; and.b32 %r387, %r386, -8388608; sub.s32 %r388, %r385, %r387; mov.b32 %f2569, %r388; cvt.rn.f32.s32 %f2570, %r387; mov.f32 %f2571, 0f34000000; fma.rn.f32 %f2572, %f2570, %f2571, %f2568; add.f32 %f2573, %f2569, 0fBF800000; mov.f32 %f2574, 0f3E1039F6; mov.f32 %f2575, 0fBE055027; fma.rn.f32 %f2576, %f2575, %f2573, %f2574; mov.f32 %f2577, 0fBDF8CDCC; fma.rn.f32 %f2578, %f2576, %f2573, %f2577; mov.f32 %f2579, 0f3E0F2955; fma.rn.f32 %f2580, %f2578, %f2573, %f2579; mov.f32 %f2581, 0fBE2AD8B9; fma.rn.f32 %f2582, %f2580, %f2573, %f2581; mov.f32 %f2583, 0f3E4CED0B; fma.rn.f32 %f2584, %f2582, %f2573, %f2583; mov.f32 %f2585, 0fBE7FFF22; fma.rn.f32 %f2586, %f2584, %f2573, %f2585; mov.f32 %f2587, 0f3EAAAA78; fma.rn.f32 %f2588, %f2586, %f2573, %f2587; mov.f32 %f2589, 0fBF000000; fma.rn.f32 %f2590, %f2588, %f2573, %f2589; mul.f32 %f2591, %f2590, %f2573; fma.rn.f32 %f2592, %f2591, %f2573, %f2573; mov.f32 %f2593, 0f3F317218; fma.rn.f32 %f2699, %f2572, %f2593, %f2592; setp.lt.u32 %p428, %r385, 2139095040; @%p428 bra $L__BB0_564; mov.f32 %f2594, 0f7F800000; fma.rn.f32 %f2699, %f369, %f2594, %f2594; $L__BB0_564: setp.eq.f32 %p429, %f369, 0f00000000; selp.f32 %f2595, 0fFF800000, %f2699, %p429; mul.f32 %f2700, %f381, %f2595; bra.uni $L__BB0_570; $L__BB0_547: abs.f32 %f350, %f2700; setp.ltu.f32 %p418, %f350, 0f3F19999A; @%p418 bra $L__BB0_549; bra.uni $L__BB0_548; $L__BB0_549: mul.f32 %f2441, %f2700, %f2700; mov.f32 %f2442, 0fBD563CAE; mov.f32 %f2443, 0f3C80F082; fma.rn.f32 %f2444, %f2443, %f2441, %f2442; mov.f32 %f2445, 0f3E085941; fma.rn.f32 %f2446, %f2444, %f2441, %f2445; mov.f32 %f2447, 0fBEAAA9ED; fma.rn.f32 %f2448, %f2446, %f2441, %f2447; mov.f32 %f2449, 0f00000000; fma.rn.f32 %f2450, %f2448, %f2441, %f2449; fma.rn.f32 %f2700, %f2450, %f2700, %f2700; bra.uni $L__BB0_570; $L__BB0_515: mov.f32 %f2349, 0f3F000000; mov.f32 %f2350, 0f3BBB989D; fma.rn.f32 %f2351, %f2697, %f2350, %f2349; mov.f32 %f2352, 0f3FB8AA3B; mov.f32 %f2353, 0f437C0000; cvt.sat.f32.f32 %f2354, %f2351; mov.f32 %f2355, 0f4B400001; fma.rm.f32 %f2356, %f2354, %f2353, %f2355; add.f32 %f2357, %f2356, 0fCB40007F; neg.f32 %f2358, %f2357; fma.rn.f32 %f2359, %f2697, %f2352, %f2358; mov.f32 %f2360, 0f32A57060; fma.rn.f32 %f2361, %f2697, %f2360, %f2359; mov.b32 %r355, %f2356; shl.b32 %r356, %r355, 23; mov.b32 %f2362, %r356; ex2.approx.ftz.f32 %f2363, %f2361; fma.rn.f32 %f2364, %f2363, %f2362, 0fBF800000; mul.f32 %f2697, %f316, %f2364; bra.uni $L__BB0_526; $L__BB0_512: mul.f32 %f2331, %f330, 0f4038AA3B; ex2.approx.ftz.f32 %f2332, %f2331; add.f32 %f2333, %f2332, 0f3F800000; mov.f32 %f2334, 0f3F800000; rcp.approx.ftz.f32 %f2335, %f2333; mov.f32 %f2336, 0fC0000000; fma.rn.f32 %f2337, %f2335, %f2336, %f2334; setp.ge.f32 %p390, %f330, 0f41102CB4; selp.f32 %f2338, 0f3F800000, %f2337, %p390; mov.b32 %r351, %f2338; mov.b32 %r352, %f329; and.b32 %r353, %r352, -2147483648; or.b32 %r354, %r353, %r351; mov.b32 %f331, %r354; mul.f32 %f2697, %f381, %f331; bra.uni $L__BB0_526; $L__BB0_504: mul.f32 %f2248, %f319, 0f4038AA3B; ex2.approx.ftz.f32 %f2249, %f2248; add.f32 %f2250, %f2249, 0f3F800000; mov.f32 %f2251, 0f3F800000; rcp.approx.ftz.f32 %f2252, %f2250; mov.f32 %f2253, 0fC0000000; fma.rn.f32 %f2254, %f2252, %f2253, %f2251; setp.ge.f32 %p385, %f319, 0f41102CB4; selp.f32 %f2255, 0f3F800000, %f2254, %p385; mov.b32 %r339, %f2255; mov.b32 %r340, %f2697; and.b32 %r341, %r340, -2147483648; or.b32 %r342, %r341, %r339; mov.b32 %f2697, %r342; $L__BB0_526: // begin inline asm { cvt.rn.f16.f32 %rs41, %f2697;} // end inline asm st.global.u16 [%rd65], %rs41; bra.uni $L__BB0_571; $L__BB0_559: mov.f32 %f2534, 0f3F000000; mov.f32 %f2535, 0f3BBB989D; fma.rn.f32 %f2536, %f2700, %f2535, %f2534; mov.f32 %f2537, 0f3FB8AA3B; mov.f32 %f2538, 0f437C0000; cvt.sat.f32.f32 %f2539, %f2536; mov.f32 %f2540, 0f4B400001; fma.rm.f32 %f2541, %f2539, %f2538, %f2540; add.f32 %f2542, %f2541, 0fCB40007F; neg.f32 %f2543, %f2542; fma.rn.f32 %f2544, %f2700, %f2537, %f2543; mov.f32 %f2545, 0f32A57060; fma.rn.f32 %f2546, %f2700, %f2545, %f2544; mov.b32 %r381, %f2541; shl.b32 %r382, %r381, 23; mov.b32 %f2547, %r382; ex2.approx.ftz.f32 %f2548, %f2546; fma.rn.f32 %f2549, %f2548, %f2547, 0fBF800000; mul.f32 %f2700, %f316, %f2549; bra.uni $L__BB0_570; $L__BB0_556: mul.f32 %f2516, %f361, 0f4038AA3B; ex2.approx.ftz.f32 %f2517, %f2516; add.f32 %f2518, %f2517, 0f3F800000; mov.f32 %f2519, 0f3F800000; rcp.approx.ftz.f32 %f2520, %f2518; mov.f32 %f2521, 0fC0000000; fma.rn.f32 %f2522, %f2520, %f2521, %f2519; setp.ge.f32 %p424, %f361, 0f41102CB4; selp.f32 %f2523, 0f3F800000, %f2522, %p424; mov.b32 %r377, %f2523; mov.b32 %r378, %f360; and.b32 %r379, %r378, -2147483648; or.b32 %r380, %r379, %r377; mov.b32 %f362, %r380; mul.f32 %f2700, %f381, %f362; bra.uni $L__BB0_570; $L__BB0_548: mul.f32 %f2433, %f350, 0f4038AA3B; ex2.approx.ftz.f32 %f2434, %f2433; add.f32 %f2435, %f2434, 0f3F800000; mov.f32 %f2436, 0f3F800000; rcp.approx.ftz.f32 %f2437, %f2435; mov.f32 %f2438, 0fC0000000; fma.rn.f32 %f2439, %f2437, %f2438, %f2436; setp.ge.f32 %p419, %f350, 0f41102CB4; selp.f32 %f2440, 0f3F800000, %f2439, %p419; mov.b32 %r365, %f2440; mov.b32 %r366, %f2700; and.b32 %r367, %r366, -2147483648; or.b32 %r368, %r367, %r365; mov.b32 %f2700, %r368; $L__BB0_570: st.global.f32 [%rd66], %f2700; $L__BB0_571: @%p349 bra $L__BB0_577; setp.ne.s32 %p433, %r32, 0; @%p433 bra $L__BB0_581; add.s32 %r415, %r415, 1; cvt.u64.u32 %rd110, %r415; setp.gt.u64 %p434, %rd73, %rd110; @%p434 bra $L__BB0_585; add.s32 %r416, %r416, 1; cvt.u64.u32 %rd111, %r416; setp.gt.u64 %p435, %rd72, %rd111; mov.u32 %r415, 0; @%p435 bra $L__BB0_585; add.s32 %r417, %r417, 1; cvt.u64.u32 %rd112, %r417; setp.gt.u64 %p436, %rd71, %rd112; mov.u32 %r416, %r415; @%p436 bra $L__BB0_585; add.s32 %r418, %r418, 1; cvt.u64.u32 %rd113, %r418; setp.gt.u64 %p437, %rd70, %rd113; mov.u32 %r415, 0; mov.u32 %r416, %r415; mov.u32 %r417, %r415; @%p437 bra $L__BB0_585; bra.uni $L__BB0_586; $L__BB0_577: add.s32 %r417, %r417, 1; cvt.u64.u32 %rd114, %r417; setp.gt.u64 %p438, %rd71, %rd114; @%p438 bra $L__BB0_585; add.s32 %r415, %r415, 1; cvt.u64.u32 %rd115, %r415; setp.gt.u64 %p439, %rd73, %rd115; mov.u32 %r417, 0; @%p439 bra $L__BB0_585; setp.ne.s64 %p440, %rd72, 0; add.s32 %r416, %r416, 1; mov.u32 %r415, %r417; @%p440 bra $L__BB0_585; add.s32 %r418, %r418, 1; cvt.u64.u32 %rd116, %r418; setp.gt.u64 %p441, %rd70, %rd116; mov.u32 %r415, 0; mov.u32 %r416, %r415; mov.u32 %r417, %r415; @%p441 bra $L__BB0_585; bra.uni $L__BB0_586; $L__BB0_581: add.s32 %r416, %r416, 1; cvt.u64.u32 %rd117, %r416; setp.gt.u64 %p442, %rd72, %rd117; @%p442 bra $L__BB0_585; add.s32 %r415, %r415, 1; cvt.u64.u32 %rd118, %r415; setp.gt.u64 %p443, %rd73, %rd118; mov.u32 %r416, 0; @%p443 bra $L__BB0_585; add.s32 %r417, %r417, 1; cvt.u64.u32 %rd119, %r417; setp.gt.u64 %p444, %rd71, %rd119; mov.u32 %r415, %r416; @%p444 bra $L__BB0_585; add.s32 %r418, %r418, 1; cvt.u64.u32 %rd120, %r418; setp.le.u64 %p445, %rd70, %rd120; mov.u32 %r415, 0; mov.u32 %r416, %r415; mov.u32 %r417, %r415; @%p445 bra $L__BB0_586; $L__BB0_585: cvt.u64.u32 %rd121, %r418; mul.lo.s64 %rd122, %rd74, %rd121; cvt.u64.u32 %rd123, %r417; mul.lo.s64 %rd124, %rd75, %rd123; cvt.u64.u32 %rd125, %r416; mul.lo.s64 %rd126, %rd76, %rd125; cvt.u64.u32 %rd127, %r415; mul.lo.s64 %rd128, %rd77, %rd127; add.s64 %rd129, %rd126, %rd128; add.s64 %rd130, %rd129, %rd124; add.s64 %rd147, %rd130, %rd122; and.b64 %rd131, %rd147, 4294967295; setp.ge.u64 %p446, %rd78, %rd131; add.s32 %r410, %r410, 1; setp.lt.u32 %p447, %r410, 4; and.pred %p448, %p446, %p447; @%p448 bra $L__BB0_482; bra.uni $L__BB0_586; $L__BB0_98: setp.eq.s64 %p92, %rd69, 0; @%p92 bra $L__BB0_137; setp.eq.s64 %p93, %rd69, 2; @%p93 bra $L__BB0_136; setp.eq.s64 %p94, %rd69, 4; @%p94 bra $L__BB0_135; bra.uni $L__BB0_139; $L__BB0_135: setp.lt.f32 %p108, %f2673, 0f00000000; mul.f32 %f920, %f381, %f2673; selp.f32 %f2673, %f920, %f2673, %p108; bra.uni $L__BB0_139; $L__BB0_107: setp.eq.s64 %p83, %rd69, 10; @%p83 bra $L__BB0_127; setp.eq.s64 %p84, %rd69, 11; @%p84 bra $L__BB0_124; setp.eq.s64 %p85, %rd69, 12; @%p85 bra $L__BB0_123; bra.uni $L__BB0_139; $L__BB0_123: neg.f32 %f822, %f2673; mov.f32 %f823, 0f3F000000; mov.f32 %f824, 0f3BBB989D; fma.rn.f32 %f825, %f822, %f824, %f823; mov.f32 %f826, 0f3FB8AA3B; mov.f32 %f827, 0f437C0000; cvt.sat.f32.f32 %f828, %f825; mov.f32 %f829, 0f4B400001; fma.rm.f32 %f830, %f828, %f827, %f829; add.f32 %f831, %f830, 0fCB40007F; neg.f32 %f832, %f831; fma.rn.f32 %f833, %f822, %f826, %f832; mov.f32 %f834, 0f32A57060; fma.rn.f32 %f835, %f822, %f834, %f833; mov.b32 %r102, %f830; shl.b32 %r103, %r102, 23; mov.b32 %f836, %r103; ex2.approx.ftz.f32 %f837, %f835; fma.rn.f32 %f838, %f837, %f836, 0f3F800000; rcp.rn.f32 %f2673, %f838; bra.uni $L__BB0_139; $L__BB0_104: setp.eq.s64 %p88, %rd69, 8; @%p88 bra $L__BB0_131; setp.eq.s64 %p89, %rd69, 9; @%p89 bra $L__BB0_130; bra.uni $L__BB0_139; $L__BB0_130: setp.lt.f32 %p104, %f2673, 0f00000000; selp.f32 %f2673, 0f00000000, %f2673, %p104; bra.uni $L__BB0_139; $L__BB0_113: setp.eq.s64 %p79, %rd69, 16; @%p79 bra $L__BB0_116; setp.ne.s64 %p80, %rd69, 17; @%p80 bra $L__BB0_139; setp.ge.f32 %p95, %f381, %f2673; selp.f32 %f2673, 0f00000000, %f2673, %p95; bra.uni $L__BB0_139; $L__BB0_271: setp.eq.s64 %p228, %rd69, 0; @%p228 bra $L__BB0_310; setp.eq.s64 %p229, %rd69, 2; @%p229 bra $L__BB0_309; setp.eq.s64 %p230, %rd69, 4; @%p230 bra $L__BB0_308; bra.uni $L__BB0_312; $L__BB0_308: setp.lt.f32 %p244, %f2685, 0f00000000; mul.f32 %f1671, %f381, %f2685; selp.f32 %f2685, %f1671, %f2685, %p244; bra.uni $L__BB0_312; $L__BB0_280: setp.eq.s64 %p219, %rd69, 10; @%p219 bra $L__BB0_300; setp.eq.s64 %p220, %rd69, 11; @%p220 bra $L__BB0_297; setp.eq.s64 %p221, %rd69, 12; @%p221 bra $L__BB0_296; bra.uni $L__BB0_312; $L__BB0_296: neg.f32 %f1573, %f2685; mov.f32 %f1574, 0f3F000000; mov.f32 %f1575, 0f3BBB989D; fma.rn.f32 %f1576, %f1573, %f1575, %f1574; mov.f32 %f1577, 0f3FB8AA3B; mov.f32 %f1578, 0f437C0000; cvt.sat.f32.f32 %f1579, %f1576; mov.f32 %f1580, 0f4B400001; fma.rm.f32 %f1581, %f1579, %f1578, %f1580; add.f32 %f1582, %f1581, 0fCB40007F; neg.f32 %f1583, %f1582; fma.rn.f32 %f1584, %f1573, %f1577, %f1583; mov.f32 %f1585, 0f32A57060; fma.rn.f32 %f1586, %f1573, %f1585, %f1584; mov.b32 %r207, %f1581; shl.b32 %r208, %r207, 23; mov.b32 %f1587, %r208; ex2.approx.ftz.f32 %f1588, %f1586; fma.rn.f32 %f1589, %f1588, %f1587, 0f3F800000; rcp.rn.f32 %f2685, %f1589; bra.uni $L__BB0_312; $L__BB0_277: setp.eq.s64 %p224, %rd69, 8; @%p224 bra $L__BB0_304; setp.eq.s64 %p225, %rd69, 9; @%p225 bra $L__BB0_303; bra.uni $L__BB0_312; $L__BB0_303: setp.lt.f32 %p240, %f2685, 0f00000000; selp.f32 %f2685, 0f00000000, %f2685, %p240; bra.uni $L__BB0_312; $L__BB0_286: setp.eq.s64 %p215, %rd69, 16; @%p215 bra $L__BB0_289; setp.ne.s64 %p216, %rd69, 17; @%p216 bra $L__BB0_312; setp.ge.f32 %p231, %f381, %f2685; selp.f32 %f2685, 0f00000000, %f2685, %p231; bra.uni $L__BB0_312; $L__BB0_134: fma.rn.f32 %f2673, %f381, %f2673, %f382; bra.uni $L__BB0_139; $L__BB0_120: mul.f32 %f776, %f381, %f2673; mov.f32 %f777, 0f3F000000; mov.f32 %f778, 0f3BBB989D; fma.rn.f32 %f779, %f776, %f778, %f777; mov.f32 %f780, 0f3FB8AA3B; mov.f32 %f781, 0f437C0000; cvt.sat.f32.f32 %f782, %f779; mov.f32 %f783, 0f4B400001; fma.rm.f32 %f784, %f782, %f781, %f783; add.f32 %f785, %f784, 0fCB40007F; neg.f32 %f786, %f785; fma.rn.f32 %f787, %f776, %f780, %f786; mov.f32 %f788, 0f32A57060; fma.rn.f32 %f789, %f776, %f788, %f787; mov.b32 %r96, %f784; shl.b32 %r97, %r96, 23; mov.b32 %f790, %r97; ex2.approx.ftz.f32 %f791, %f789; fma.rn.f32 %f792, %f791, %f790, 0f3F800000; setp.lt.f32 %p98, %f792, 0f00800000; mul.f32 %f793, %f792, 0f4B000000; selp.f32 %f74, %f793, %f792, %p98; selp.f32 %f794, 0fC1B80000, 0f00000000, %p98; mov.b32 %r98, %f74; add.s32 %r99, %r98, -1059760811; and.b32 %r100, %r99, -8388608; sub.s32 %r101, %r98, %r100; mov.b32 %f795, %r101; cvt.rn.f32.s32 %f796, %r100; mov.f32 %f797, 0f34000000; fma.rn.f32 %f798, %f796, %f797, %f794; add.f32 %f799, %f795, 0fBF800000; mov.f32 %f800, 0f3E1039F6; mov.f32 %f801, 0fBE055027; fma.rn.f32 %f802, %f801, %f799, %f800; mov.f32 %f803, 0fBDF8CDCC; fma.rn.f32 %f804, %f802, %f799, %f803; mov.f32 %f805, 0f3E0F2955; fma.rn.f32 %f806, %f804, %f799, %f805; mov.f32 %f807, 0fBE2AD8B9; fma.rn.f32 %f808, %f806, %f799, %f807; mov.f32 %f809, 0f3E4CED0B; fma.rn.f32 %f810, %f808, %f799, %f809; mov.f32 %f811, 0fBE7FFF22; fma.rn.f32 %f812, %f810, %f799, %f811; mov.f32 %f813, 0f3EAAAA78; fma.rn.f32 %f814, %f812, %f799, %f813; mov.f32 %f815, 0fBF000000; fma.rn.f32 %f816, %f814, %f799, %f815; mul.f32 %f817, %f816, %f799; fma.rn.f32 %f818, %f817, %f799, %f799; mov.f32 %f819, 0f3F317218; fma.rn.f32 %f2671, %f798, %f819, %f818; setp.lt.u32 %p99, %r98, 2139095040; @%p99 bra $L__BB0_122; mov.f32 %f820, 0f7F800000; fma.rn.f32 %f2671, %f74, %f820, %f820; $L__BB0_122: setp.eq.f32 %p100, %f74, 0f00000000; selp.f32 %f821, 0fFF800000, %f2671, %p100; div.rn.f32 %f2673, %f821, %f381; bra.uni $L__BB0_139; $L__BB0_137: setp.geu.f32 %p109, %f2673, 0f00000000; @%p109 bra $L__BB0_139; mov.f32 %f925, 0f3F000000; mov.f32 %f926, 0f3BBB989D; fma.rn.f32 %f927, %f2673, %f926, %f925; mov.f32 %f928, 0f3FB8AA3B; mov.f32 %f929, 0f437C0000; cvt.sat.f32.f32 %f930, %f927; mov.f32 %f931, 0f4B400001; fma.rm.f32 %f932, %f930, %f929, %f931; add.f32 %f933, %f932, 0fCB40007F; neg.f32 %f934, %f933; fma.rn.f32 %f935, %f2673, %f928, %f934; mov.f32 %f936, 0f32A57060; fma.rn.f32 %f937, %f2673, %f936, %f935; mov.b32 %r116, %f932; shl.b32 %r117, %r116, 23; mov.b32 %f938, %r117; ex2.approx.ftz.f32 %f939, %f937; fma.rn.f32 %f940, %f939, %f938, 0fBF800000; mul.f32 %f2673, %f381, %f940; bra.uni $L__BB0_139; $L__BB0_136: fma.rn.f32 %f921, %f381, %f2673, %f382; mov.f32 %f922, 0f3F800000; min.f32 %f923, %f922, %f921; mov.f32 %f924, 0f00000000; max.f32 %f2673, %f924, %f923; bra.uni $L__BB0_139; $L__BB0_127: setp.gt.f32 %p103, %f2673, 0f00000000; @%p103 bra $L__BB0_129; bra.uni $L__BB0_128; $L__BB0_129: mul.f32 %f2673, %f382, %f2673; bra.uni $L__BB0_139; $L__BB0_124: mul.f32 %f80, %f382, %f2673; abs.f32 %f81, %f80; setp.ltu.f32 %p101, %f81, 0f3F19999A; @%p101 bra $L__BB0_126; bra.uni $L__BB0_125; $L__BB0_126: mul.f32 %f847, %f80, %f80; mov.f32 %f848, 0fBD563CAE; mov.f32 %f849, 0f3C80F082; fma.rn.f32 %f850, %f849, %f847, %f848; mov.f32 %f851, 0f3E085941; fma.rn.f32 %f852, %f850, %f847, %f851; mov.f32 %f853, 0fBEAAA9ED; fma.rn.f32 %f854, %f852, %f847, %f853; mov.f32 %f855, 0f00000000; fma.rn.f32 %f856, %f854, %f847, %f855; fma.rn.f32 %f83, %f856, %f80, %f80; mul.f32 %f2673, %f381, %f83; bra.uni $L__BB0_139; $L__BB0_131: mul.f32 %f874, %f382, %f2673; mov.f32 %f875, 0f3F000000; mov.f32 %f876, 0f3BBB989D; fma.rn.f32 %f877, %f874, %f876, %f875; mov.f32 %f878, 0f3FB8AA3B; mov.f32 %f879, 0f437C0000; cvt.sat.f32.f32 %f880, %f877; mov.f32 %f881, 0f4B400001; fma.rm.f32 %f882, %f880, %f879, %f881; add.f32 %f883, %f882, 0fCB40007F; neg.f32 %f884, %f883; fma.rn.f32 %f885, %f874, %f878, %f884; mov.f32 %f886, 0f32A57060; fma.rn.f32 %f887, %f874, %f886, %f885; mov.b32 %r110, %f882; shl.b32 %r111, %r110, 23; mov.b32 %f888, %r111; ex2.approx.ftz.f32 %f889, %f887; fma.rn.f32 %f890, %f889, %f888, 0f3F800000; setp.lt.f32 %p105, %f890, 0f00800000; mul.f32 %f891, %f890, 0f4B000000; selp.f32 %f89, %f891, %f890, %p105; selp.f32 %f892, 0fC1B80000, 0f00000000, %p105; mov.b32 %r112, %f89; add.s32 %r113, %r112, -1059760811; and.b32 %r114, %r113, -8388608; sub.s32 %r115, %r112, %r114; mov.b32 %f893, %r115; cvt.rn.f32.s32 %f894, %r114; mov.f32 %f895, 0f34000000; fma.rn.f32 %f896, %f894, %f895, %f892; add.f32 %f897, %f893, 0fBF800000; mov.f32 %f898, 0f3E1039F6; mov.f32 %f899, 0fBE055027; fma.rn.f32 %f900, %f899, %f897, %f898; mov.f32 %f901, 0fBDF8CDCC; fma.rn.f32 %f902, %f900, %f897, %f901; mov.f32 %f903, 0f3E0F2955; fma.rn.f32 %f904, %f902, %f897, %f903; mov.f32 %f905, 0fBE2AD8B9; fma.rn.f32 %f906, %f904, %f897, %f905; mov.f32 %f907, 0f3E4CED0B; fma.rn.f32 %f908, %f906, %f897, %f907; mov.f32 %f909, 0fBE7FFF22; fma.rn.f32 %f910, %f908, %f897, %f909; mov.f32 %f911, 0f3EAAAA78; fma.rn.f32 %f912, %f910, %f897, %f911; mov.f32 %f913, 0fBF000000; fma.rn.f32 %f914, %f912, %f897, %f913; mul.f32 %f915, %f914, %f897; fma.rn.f32 %f916, %f915, %f897, %f897; mov.f32 %f917, 0f3F317218; fma.rn.f32 %f2672, %f896, %f917, %f916; setp.lt.u32 %p106, %r112, 2139095040; @%p106 bra $L__BB0_133; mov.f32 %f918, 0f7F800000; fma.rn.f32 %f2672, %f89, %f918, %f918; $L__BB0_133: setp.eq.f32 %p107, %f89, 0f00000000; selp.f32 %f919, 0fFF800000, %f2672, %p107; mul.f32 %f2673, %f381, %f919; bra.uni $L__BB0_139; $L__BB0_116: abs.f32 %f70, %f2673; setp.ltu.f32 %p96, %f70, 0f3F19999A; @%p96 bra $L__BB0_118; bra.uni $L__BB0_117; $L__BB0_118: mul.f32 %f764, %f2673, %f2673; mov.f32 %f765, 0fBD563CAE; mov.f32 %f766, 0f3C80F082; fma.rn.f32 %f767, %f766, %f764, %f765; mov.f32 %f768, 0f3E085941; fma.rn.f32 %f769, %f767, %f764, %f768; mov.f32 %f770, 0fBEAAA9ED; fma.rn.f32 %f771, %f769, %f764, %f770; mov.f32 %f772, 0f00000000; fma.rn.f32 %f773, %f771, %f764, %f772; fma.rn.f32 %f2673, %f773, %f2673, %f2673; bra.uni $L__BB0_139; $L__BB0_307: fma.rn.f32 %f2685, %f381, %f2685, %f382; bra.uni $L__BB0_312; $L__BB0_293: mul.f32 %f1527, %f381, %f2685; mov.f32 %f1528, 0f3F000000; mov.f32 %f1529, 0f3BBB989D; fma.rn.f32 %f1530, %f1527, %f1529, %f1528; mov.f32 %f1531, 0f3FB8AA3B; mov.f32 %f1532, 0f437C0000; cvt.sat.f32.f32 %f1533, %f1530; mov.f32 %f1534, 0f4B400001; fma.rm.f32 %f1535, %f1533, %f1532, %f1534; add.f32 %f1536, %f1535, 0fCB40007F; neg.f32 %f1537, %f1536; fma.rn.f32 %f1538, %f1527, %f1531, %f1537; mov.f32 %f1539, 0f32A57060; fma.rn.f32 %f1540, %f1527, %f1539, %f1538; mov.b32 %r201, %f1535; shl.b32 %r202, %r201, 23; mov.b32 %f1541, %r202; ex2.approx.ftz.f32 %f1542, %f1540; fma.rn.f32 %f1543, %f1542, %f1541, 0f3F800000; setp.lt.f32 %p234, %f1543, 0f00800000; mul.f32 %f1544, %f1543, 0f4B000000; selp.f32 %f201, %f1544, %f1543, %p234; selp.f32 %f1545, 0fC1B80000, 0f00000000, %p234; mov.b32 %r203, %f201; add.s32 %r204, %r203, -1059760811; and.b32 %r205, %r204, -8388608; sub.s32 %r206, %r203, %r205; mov.b32 %f1546, %r206; cvt.rn.f32.s32 %f1547, %r205; mov.f32 %f1548, 0f34000000; fma.rn.f32 %f1549, %f1547, %f1548, %f1545; add.f32 %f1550, %f1546, 0fBF800000; mov.f32 %f1551, 0f3E1039F6; mov.f32 %f1552, 0fBE055027; fma.rn.f32 %f1553, %f1552, %f1550, %f1551; mov.f32 %f1554, 0fBDF8CDCC; fma.rn.f32 %f1555, %f1553, %f1550, %f1554; mov.f32 %f1556, 0f3E0F2955; fma.rn.f32 %f1557, %f1555, %f1550, %f1556; mov.f32 %f1558, 0fBE2AD8B9; fma.rn.f32 %f1559, %f1557, %f1550, %f1558; mov.f32 %f1560, 0f3E4CED0B; fma.rn.f32 %f1561, %f1559, %f1550, %f1560; mov.f32 %f1562, 0fBE7FFF22; fma.rn.f32 %f1563, %f1561, %f1550, %f1562; mov.f32 %f1564, 0f3EAAAA78; fma.rn.f32 %f1565, %f1563, %f1550, %f1564; mov.f32 %f1566, 0fBF000000; fma.rn.f32 %f1567, %f1565, %f1550, %f1566; mul.f32 %f1568, %f1567, %f1550; fma.rn.f32 %f1569, %f1568, %f1550, %f1550; mov.f32 %f1570, 0f3F317218; fma.rn.f32 %f2683, %f1549, %f1570, %f1569; setp.lt.u32 %p235, %r203, 2139095040; @%p235 bra $L__BB0_295; mov.f32 %f1571, 0f7F800000; fma.rn.f32 %f2683, %f201, %f1571, %f1571; $L__BB0_295: setp.eq.f32 %p236, %f201, 0f00000000; selp.f32 %f1572, 0fFF800000, %f2683, %p236; div.rn.f32 %f2685, %f1572, %f381; bra.uni $L__BB0_312; $L__BB0_310: setp.geu.f32 %p245, %f2685, 0f00000000; @%p245 bra $L__BB0_312; mov.f32 %f1676, 0f3F000000; mov.f32 %f1677, 0f3BBB989D; fma.rn.f32 %f1678, %f2685, %f1677, %f1676; mov.f32 %f1679, 0f3FB8AA3B; mov.f32 %f1680, 0f437C0000; cvt.sat.f32.f32 %f1681, %f1678; mov.f32 %f1682, 0f4B400001; fma.rm.f32 %f1683, %f1681, %f1680, %f1682; add.f32 %f1684, %f1683, 0fCB40007F; neg.f32 %f1685, %f1684; fma.rn.f32 %f1686, %f2685, %f1679, %f1685; mov.f32 %f1687, 0f32A57060; fma.rn.f32 %f1688, %f2685, %f1687, %f1686; mov.b32 %r221, %f1683; shl.b32 %r222, %r221, 23; mov.b32 %f1689, %r222; ex2.approx.ftz.f32 %f1690, %f1688; fma.rn.f32 %f1691, %f1690, %f1689, 0fBF800000; mul.f32 %f2685, %f381, %f1691; bra.uni $L__BB0_312; $L__BB0_300: setp.gt.f32 %p239, %f2685, 0f00000000; @%p239 bra $L__BB0_302; bra.uni $L__BB0_301; $L__BB0_302: mul.f32 %f2685, %f382, %f2685; bra.uni $L__BB0_312; $L__BB0_304: mul.f32 %f1625, %f382, %f2685; mov.f32 %f1626, 0f3F000000; mov.f32 %f1627, 0f3BBB989D; fma.rn.f32 %f1628, %f1625, %f1627, %f1626; mov.f32 %f1629, 0f3FB8AA3B; mov.f32 %f1630, 0f437C0000; cvt.sat.f32.f32 %f1631, %f1628; mov.f32 %f1632, 0f4B400001; fma.rm.f32 %f1633, %f1631, %f1630, %f1632; add.f32 %f1634, %f1633, 0fCB40007F; neg.f32 %f1635, %f1634; fma.rn.f32 %f1636, %f1625, %f1629, %f1635; mov.f32 %f1637, 0f32A57060; fma.rn.f32 %f1638, %f1625, %f1637, %f1636; mov.b32 %r215, %f1633; shl.b32 %r216, %r215, 23; mov.b32 %f1639, %r216; ex2.approx.ftz.f32 %f1640, %f1638; fma.rn.f32 %f1641, %f1640, %f1639, 0f3F800000; setp.lt.f32 %p241, %f1641, 0f00800000; mul.f32 %f1642, %f1641, 0f4B000000; selp.f32 %f216, %f1642, %f1641, %p241; selp.f32 %f1643, 0fC1B80000, 0f00000000, %p241; mov.b32 %r217, %f216; add.s32 %r218, %r217, -1059760811; and.b32 %r219, %r218, -8388608; sub.s32 %r220, %r217, %r219; mov.b32 %f1644, %r220; cvt.rn.f32.s32 %f1645, %r219; mov.f32 %f1646, 0f34000000; fma.rn.f32 %f1647, %f1645, %f1646, %f1643; add.f32 %f1648, %f1644, 0fBF800000; mov.f32 %f1649, 0f3E1039F6; mov.f32 %f1650, 0fBE055027; fma.rn.f32 %f1651, %f1650, %f1648, %f1649; mov.f32 %f1652, 0fBDF8CDCC; fma.rn.f32 %f1653, %f1651, %f1648, %f1652; mov.f32 %f1654, 0f3E0F2955; fma.rn.f32 %f1655, %f1653, %f1648, %f1654; mov.f32 %f1656, 0fBE2AD8B9; fma.rn.f32 %f1657, %f1655, %f1648, %f1656; mov.f32 %f1658, 0f3E4CED0B; fma.rn.f32 %f1659, %f1657, %f1648, %f1658; mov.f32 %f1660, 0fBE7FFF22; fma.rn.f32 %f1661, %f1659, %f1648, %f1660; mov.f32 %f1662, 0f3EAAAA78; fma.rn.f32 %f1663, %f1661, %f1648, %f1662; mov.f32 %f1664, 0fBF000000; fma.rn.f32 %f1665, %f1663, %f1648, %f1664; mul.f32 %f1666, %f1665, %f1648; fma.rn.f32 %f1667, %f1666, %f1648, %f1648; mov.f32 %f1668, 0f3F317218; fma.rn.f32 %f2684, %f1647, %f1668, %f1667; setp.lt.u32 %p242, %r217, 2139095040; @%p242 bra $L__BB0_306; mov.f32 %f1669, 0f7F800000; fma.rn.f32 %f2684, %f216, %f1669, %f1669; $L__BB0_306: setp.eq.f32 %p243, %f216, 0f00000000; selp.f32 %f1670, 0fFF800000, %f2684, %p243; mul.f32 %f2685, %f381, %f1670; bra.uni $L__BB0_312; $L__BB0_289: abs.f32 %f197, %f2685; setp.ltu.f32 %p232, %f197, 0f3F19999A; @%p232 bra $L__BB0_291; bra.uni $L__BB0_290; $L__BB0_291: mul.f32 %f1515, %f2685, %f2685; mov.f32 %f1516, 0fBD563CAE; mov.f32 %f1517, 0f3C80F082; fma.rn.f32 %f1518, %f1517, %f1515, %f1516; mov.f32 %f1519, 0f3E085941; fma.rn.f32 %f1520, %f1518, %f1515, %f1519; mov.f32 %f1521, 0fBEAAA9ED; fma.rn.f32 %f1522, %f1520, %f1515, %f1521; mov.f32 %f1523, 0f00000000; fma.rn.f32 %f1524, %f1522, %f1515, %f1523; fma.rn.f32 %f2685, %f1524, %f2685, %f2685; bra.uni $L__BB0_312; $L__BB0_309: fma.rn.f32 %f1672, %f381, %f2685, %f382; mov.f32 %f1673, 0f3F800000; min.f32 %f1674, %f1673, %f1672; mov.f32 %f1675, 0f00000000; max.f32 %f2685, %f1675, %f1674; bra.uni $L__BB0_312; $L__BB0_297: mul.f32 %f207, %f382, %f2685; abs.f32 %f208, %f207; setp.ltu.f32 %p237, %f208, 0f3F19999A; @%p237 bra $L__BB0_299; bra.uni $L__BB0_298; $L__BB0_299: mul.f32 %f1598, %f207, %f207; mov.f32 %f1599, 0fBD563CAE; mov.f32 %f1600, 0f3C80F082; fma.rn.f32 %f1601, %f1600, %f1598, %f1599; mov.f32 %f1602, 0f3E085941; fma.rn.f32 %f1603, %f1601, %f1598, %f1602; mov.f32 %f1604, 0fBEAAA9ED; fma.rn.f32 %f1605, %f1603, %f1598, %f1604; mov.f32 %f1606, 0f00000000; fma.rn.f32 %f1607, %f1605, %f1598, %f1606; fma.rn.f32 %f210, %f1607, %f207, %f207; mul.f32 %f2685, %f381, %f210; bra.uni $L__BB0_312; $L__BB0_128: mul.f32 %f857, %f382, %f381; mov.f32 %f858, 0f3F000000; mov.f32 %f859, 0f3BBB989D; fma.rn.f32 %f860, %f2673, %f859, %f858; mov.f32 %f861, 0f3FB8AA3B; mov.f32 %f862, 0f437C0000; cvt.sat.f32.f32 %f863, %f860; mov.f32 %f864, 0f4B400001; fma.rm.f32 %f865, %f863, %f862, %f864; add.f32 %f866, %f865, 0fCB40007F; neg.f32 %f867, %f866; fma.rn.f32 %f868, %f2673, %f861, %f867; mov.f32 %f869, 0f32A57060; fma.rn.f32 %f870, %f2673, %f869, %f868; mov.b32 %r108, %f865; shl.b32 %r109, %r108, 23; mov.b32 %f871, %r109; ex2.approx.ftz.f32 %f872, %f870; fma.rn.f32 %f873, %f872, %f871, 0fBF800000; mul.f32 %f2673, %f857, %f873; bra.uni $L__BB0_139; $L__BB0_125: mul.f32 %f839, %f81, 0f4038AA3B; ex2.approx.ftz.f32 %f840, %f839; add.f32 %f841, %f840, 0f3F800000; mov.f32 %f842, 0f3F800000; rcp.approx.ftz.f32 %f843, %f841; mov.f32 %f844, 0fC0000000; fma.rn.f32 %f845, %f843, %f844, %f842; setp.ge.f32 %p102, %f81, 0f41102CB4; selp.f32 %f846, 0f3F800000, %f845, %p102; mov.b32 %r104, %f846; mov.b32 %r105, %f80; and.b32 %r106, %r105, -2147483648; or.b32 %r107, %r106, %r104; mov.b32 %f82, %r107; mul.f32 %f2673, %f381, %f82; bra.uni $L__BB0_139; $L__BB0_117: mul.f32 %f756, %f70, 0f4038AA3B; ex2.approx.ftz.f32 %f757, %f756; add.f32 %f758, %f757, 0f3F800000; mov.f32 %f759, 0f3F800000; rcp.approx.ftz.f32 %f760, %f758; mov.f32 %f761, 0fC0000000; fma.rn.f32 %f762, %f760, %f761, %f759; setp.ge.f32 %p97, %f70, 0f41102CB4; selp.f32 %f763, 0f3F800000, %f762, %p97; mov.b32 %r92, %f763; mov.b32 %r93, %f2673; and.b32 %r94, %r93, -2147483648; or.b32 %r95, %r94, %r92; mov.b32 %f2673, %r95; $L__BB0_139: // begin inline asm { cvt.rn.f16.f32 %rs32, %f2673;} // end inline asm // begin inline asm { cvt.f32.f16 %f2676, %rs3;} // end inline asm @%p76 bra $L__BB0_149; setp.gt.s64 %p120, %rd69, 4; @%p120 bra $L__BB0_144; bra.uni $L__BB0_141; $L__BB0_144: setp.gt.s64 %p121, %rd69, 7; @%p121 bra $L__BB0_147; setp.eq.s64 %p124, %rd69, 5; @%p124 bra $L__BB0_177; setp.eq.s64 %p125, %rd69, 7; @%p125 bra $L__BB0_178; bra.uni $L__BB0_182; $L__BB0_149: setp.gt.s64 %p111, %rd69, 13; @%p111 bra $L__BB0_153; bra.uni $L__BB0_150; $L__BB0_153: setp.gt.s64 %p112, %rd69, 15; @%p112 bra $L__BB0_156; setp.eq.s64 %p115, %rd69, 14; @%p115 bra $L__BB0_163; setp.eq.s64 %p116, %rd69, 15; @%p116 bra $L__BB0_162; bra.uni $L__BB0_182; $L__BB0_162: abs.f32 %f961, %f2676; add.f32 %f962, %f961, 0f3F800000; div.rn.f32 %f2676, %f2676, %f962; bra.uni $L__BB0_182; $L__BB0_141: setp.eq.s64 %p126, %rd69, 0; @%p126 bra $L__BB0_180; setp.eq.s64 %p127, %rd69, 2; @%p127 bra $L__BB0_179; setp.eq.s64 %p128, %rd69, 4; @%p128 bra $L__BB0_178; bra.uni $L__BB0_182; $L__BB0_178: setp.lt.f32 %p142, %f2676, 0f00000000; mul.f32 %f1107, %f381, %f2676; selp.f32 %f2676, %f1107, %f2676, %p142; bra.uni $L__BB0_182; $L__BB0_150: setp.eq.s64 %p117, %rd69, 10; @%p117 bra $L__BB0_170; setp.eq.s64 %p118, %rd69, 11; @%p118 bra $L__BB0_167; setp.eq.s64 %p119, %rd69, 12; @%p119 bra $L__BB0_166; bra.uni $L__BB0_182; $L__BB0_166: neg.f32 %f1009, %f2676; mov.f32 %f1010, 0f3F000000; mov.f32 %f1011, 0f3BBB989D; fma.rn.f32 %f1012, %f1009, %f1011, %f1010; mov.f32 %f1013, 0f3FB8AA3B; mov.f32 %f1014, 0f437C0000; cvt.sat.f32.f32 %f1015, %f1012; mov.f32 %f1016, 0f4B400001; fma.rm.f32 %f1017, %f1015, %f1014, %f1016; add.f32 %f1018, %f1017, 0fCB40007F; neg.f32 %f1019, %f1018; fma.rn.f32 %f1020, %f1009, %f1013, %f1019; mov.f32 %f1021, 0f32A57060; fma.rn.f32 %f1022, %f1009, %f1021, %f1020; mov.b32 %r128, %f1017; shl.b32 %r129, %r128, 23; mov.b32 %f1023, %r129; ex2.approx.ftz.f32 %f1024, %f1022; fma.rn.f32 %f1025, %f1024, %f1023, 0f3F800000; rcp.rn.f32 %f2676, %f1025; bra.uni $L__BB0_182; $L__BB0_147: setp.eq.s64 %p122, %rd69, 8; @%p122 bra $L__BB0_174; setp.eq.s64 %p123, %rd69, 9; @%p123 bra $L__BB0_173; bra.uni $L__BB0_182; $L__BB0_173: setp.lt.f32 %p138, %f2676, 0f00000000; selp.f32 %f2676, 0f00000000, %f2676, %p138; bra.uni $L__BB0_182; $L__BB0_156: setp.eq.s64 %p113, %rd69, 16; @%p113 bra $L__BB0_159; setp.ne.s64 %p114, %rd69, 17; @%p114 bra $L__BB0_182; setp.ge.f32 %p129, %f381, %f2676; selp.f32 %f2676, 0f00000000, %f2676, %p129; bra.uni $L__BB0_182; $L__BB0_177: fma.rn.f32 %f2676, %f381, %f2676, %f382; bra.uni $L__BB0_182; $L__BB0_163: mul.f32 %f963, %f381, %f2676; mov.f32 %f964, 0f3F000000; mov.f32 %f965, 0f3BBB989D; fma.rn.f32 %f966, %f963, %f965, %f964; mov.f32 %f967, 0f3FB8AA3B; mov.f32 %f968, 0f437C0000; cvt.sat.f32.f32 %f969, %f966; mov.f32 %f970, 0f4B400001; fma.rm.f32 %f971, %f969, %f968, %f970; add.f32 %f972, %f971, 0fCB40007F; neg.f32 %f973, %f972; fma.rn.f32 %f974, %f963, %f967, %f973; mov.f32 %f975, 0f32A57060; fma.rn.f32 %f976, %f963, %f975, %f974; mov.b32 %r122, %f971; shl.b32 %r123, %r122, 23; mov.b32 %f977, %r123; ex2.approx.ftz.f32 %f978, %f976; fma.rn.f32 %f979, %f978, %f977, 0f3F800000; setp.lt.f32 %p132, %f979, 0f00800000; mul.f32 %f980, %f979, 0f4B000000; selp.f32 %f105, %f980, %f979, %p132; selp.f32 %f981, 0fC1B80000, 0f00000000, %p132; mov.b32 %r124, %f105; add.s32 %r125, %r124, -1059760811; and.b32 %r126, %r125, -8388608; sub.s32 %r127, %r124, %r126; mov.b32 %f982, %r127; cvt.rn.f32.s32 %f983, %r126; mov.f32 %f984, 0f34000000; fma.rn.f32 %f985, %f983, %f984, %f981; add.f32 %f986, %f982, 0fBF800000; mov.f32 %f987, 0f3E1039F6; mov.f32 %f988, 0fBE055027; fma.rn.f32 %f989, %f988, %f986, %f987; mov.f32 %f990, 0fBDF8CDCC; fma.rn.f32 %f991, %f989, %f986, %f990; mov.f32 %f992, 0f3E0F2955; fma.rn.f32 %f993, %f991, %f986, %f992; mov.f32 %f994, 0fBE2AD8B9; fma.rn.f32 %f995, %f993, %f986, %f994; mov.f32 %f996, 0f3E4CED0B; fma.rn.f32 %f997, %f995, %f986, %f996; mov.f32 %f998, 0fBE7FFF22; fma.rn.f32 %f999, %f997, %f986, %f998; mov.f32 %f1000, 0f3EAAAA78; fma.rn.f32 %f1001, %f999, %f986, %f1000; mov.f32 %f1002, 0fBF000000; fma.rn.f32 %f1003, %f1001, %f986, %f1002; mul.f32 %f1004, %f1003, %f986; fma.rn.f32 %f1005, %f1004, %f986, %f986; mov.f32 %f1006, 0f3F317218; fma.rn.f32 %f2674, %f985, %f1006, %f1005; setp.lt.u32 %p133, %r124, 2139095040; @%p133 bra $L__BB0_165; mov.f32 %f1007, 0f7F800000; fma.rn.f32 %f2674, %f105, %f1007, %f1007; $L__BB0_165: setp.eq.f32 %p134, %f105, 0f00000000; selp.f32 %f1008, 0fFF800000, %f2674, %p134; div.rn.f32 %f2676, %f1008, %f381; bra.uni $L__BB0_182; $L__BB0_180: setp.geu.f32 %p143, %f2676, 0f00000000; @%p143 bra $L__BB0_182; mov.f32 %f1112, 0f3F000000; mov.f32 %f1113, 0f3BBB989D; fma.rn.f32 %f1114, %f2676, %f1113, %f1112; mov.f32 %f1115, 0f3FB8AA3B; mov.f32 %f1116, 0f437C0000; cvt.sat.f32.f32 %f1117, %f1114; mov.f32 %f1118, 0f4B400001; fma.rm.f32 %f1119, %f1117, %f1116, %f1118; add.f32 %f1120, %f1119, 0fCB40007F; neg.f32 %f1121, %f1120; fma.rn.f32 %f1122, %f2676, %f1115, %f1121; mov.f32 %f1123, 0f32A57060; fma.rn.f32 %f1124, %f2676, %f1123, %f1122; mov.b32 %r142, %f1119; shl.b32 %r143, %r142, 23; mov.b32 %f1125, %r143; ex2.approx.ftz.f32 %f1126, %f1124; fma.rn.f32 %f1127, %f1126, %f1125, 0fBF800000; mul.f32 %f2676, %f381, %f1127; bra.uni $L__BB0_182; $L__BB0_179: fma.rn.f32 %f1108, %f381, %f2676, %f382; mov.f32 %f1109, 0f3F800000; min.f32 %f1110, %f1109, %f1108; mov.f32 %f1111, 0f00000000; max.f32 %f2676, %f1111, %f1110; bra.uni $L__BB0_182; $L__BB0_170: setp.gt.f32 %p137, %f2676, 0f00000000; @%p137 bra $L__BB0_172; bra.uni $L__BB0_171; $L__BB0_172: mul.f32 %f2676, %f382, %f2676; bra.uni $L__BB0_182; $L__BB0_167: mul.f32 %f111, %f382, %f2676; abs.f32 %f112, %f111; setp.ltu.f32 %p135, %f112, 0f3F19999A; @%p135 bra $L__BB0_169; bra.uni $L__BB0_168; $L__BB0_169: mul.f32 %f1034, %f111, %f111; mov.f32 %f1035, 0fBD563CAE; mov.f32 %f1036, 0f3C80F082; fma.rn.f32 %f1037, %f1036, %f1034, %f1035; mov.f32 %f1038, 0f3E085941; fma.rn.f32 %f1039, %f1037, %f1034, %f1038; mov.f32 %f1040, 0fBEAAA9ED; fma.rn.f32 %f1041, %f1039, %f1034, %f1040; mov.f32 %f1042, 0f00000000; fma.rn.f32 %f1043, %f1041, %f1034, %f1042; fma.rn.f32 %f114, %f1043, %f111, %f111; mul.f32 %f2676, %f381, %f114; bra.uni $L__BB0_182; $L__BB0_174: mul.f32 %f1061, %f382, %f2676; mov.f32 %f1062, 0f3F000000; mov.f32 %f1063, 0f3BBB989D; fma.rn.f32 %f1064, %f1061, %f1063, %f1062; mov.f32 %f1065, 0f3FB8AA3B; mov.f32 %f1066, 0f437C0000; cvt.sat.f32.f32 %f1067, %f1064; mov.f32 %f1068, 0f4B400001; fma.rm.f32 %f1069, %f1067, %f1066, %f1068; add.f32 %f1070, %f1069, 0fCB40007F; neg.f32 %f1071, %f1070; fma.rn.f32 %f1072, %f1061, %f1065, %f1071; mov.f32 %f1073, 0f32A57060; fma.rn.f32 %f1074, %f1061, %f1073, %f1072; mov.b32 %r136, %f1069; shl.b32 %r137, %r136, 23; mov.b32 %f1075, %r137; ex2.approx.ftz.f32 %f1076, %f1074; fma.rn.f32 %f1077, %f1076, %f1075, 0f3F800000; setp.lt.f32 %p139, %f1077, 0f00800000; mul.f32 %f1078, %f1077, 0f4B000000; selp.f32 %f120, %f1078, %f1077, %p139; selp.f32 %f1079, 0fC1B80000, 0f00000000, %p139; mov.b32 %r138, %f120; add.s32 %r139, %r138, -1059760811; and.b32 %r140, %r139, -8388608; sub.s32 %r141, %r138, %r140; mov.b32 %f1080, %r141; cvt.rn.f32.s32 %f1081, %r140; mov.f32 %f1082, 0f34000000; fma.rn.f32 %f1083, %f1081, %f1082, %f1079; add.f32 %f1084, %f1080, 0fBF800000; mov.f32 %f1085, 0f3E1039F6; mov.f32 %f1086, 0fBE055027; fma.rn.f32 %f1087, %f1086, %f1084, %f1085; mov.f32 %f1088, 0fBDF8CDCC; fma.rn.f32 %f1089, %f1087, %f1084, %f1088; mov.f32 %f1090, 0f3E0F2955; fma.rn.f32 %f1091, %f1089, %f1084, %f1090; mov.f32 %f1092, 0fBE2AD8B9; fma.rn.f32 %f1093, %f1091, %f1084, %f1092; mov.f32 %f1094, 0f3E4CED0B; fma.rn.f32 %f1095, %f1093, %f1084, %f1094; mov.f32 %f1096, 0fBE7FFF22; fma.rn.f32 %f1097, %f1095, %f1084, %f1096; mov.f32 %f1098, 0f3EAAAA78; fma.rn.f32 %f1099, %f1097, %f1084, %f1098; mov.f32 %f1100, 0fBF000000; fma.rn.f32 %f1101, %f1099, %f1084, %f1100; mul.f32 %f1102, %f1101, %f1084; fma.rn.f32 %f1103, %f1102, %f1084, %f1084; mov.f32 %f1104, 0f3F317218; fma.rn.f32 %f2675, %f1083, %f1104, %f1103; setp.lt.u32 %p140, %r138, 2139095040; @%p140 bra $L__BB0_176; mov.f32 %f1105, 0f7F800000; fma.rn.f32 %f2675, %f120, %f1105, %f1105; $L__BB0_176: setp.eq.f32 %p141, %f120, 0f00000000; selp.f32 %f1106, 0fFF800000, %f2675, %p141; mul.f32 %f2676, %f381, %f1106; bra.uni $L__BB0_182; $L__BB0_159: abs.f32 %f101, %f2676; setp.ltu.f32 %p130, %f101, 0f3F19999A; @%p130 bra $L__BB0_161; bra.uni $L__BB0_160; $L__BB0_161: mul.f32 %f951, %f2676, %f2676; mov.f32 %f952, 0fBD563CAE; mov.f32 %f953, 0f3C80F082; fma.rn.f32 %f954, %f953, %f951, %f952; mov.f32 %f955, 0f3E085941; fma.rn.f32 %f956, %f954, %f951, %f955; mov.f32 %f957, 0fBEAAA9ED; fma.rn.f32 %f958, %f956, %f951, %f957; mov.f32 %f959, 0f00000000; fma.rn.f32 %f960, %f958, %f951, %f959; fma.rn.f32 %f2676, %f960, %f2676, %f2676; bra.uni $L__BB0_182; $L__BB0_171: mul.f32 %f1044, %f382, %f381; mov.f32 %f1045, 0f3F000000; mov.f32 %f1046, 0f3BBB989D; fma.rn.f32 %f1047, %f2676, %f1046, %f1045; mov.f32 %f1048, 0f3FB8AA3B; mov.f32 %f1049, 0f437C0000; cvt.sat.f32.f32 %f1050, %f1047; mov.f32 %f1051, 0f4B400001; fma.rm.f32 %f1052, %f1050, %f1049, %f1051; add.f32 %f1053, %f1052, 0fCB40007F; neg.f32 %f1054, %f1053; fma.rn.f32 %f1055, %f2676, %f1048, %f1054; mov.f32 %f1056, 0f32A57060; fma.rn.f32 %f1057, %f2676, %f1056, %f1055; mov.b32 %r134, %f1052; shl.b32 %r135, %r134, 23; mov.b32 %f1058, %r135; ex2.approx.ftz.f32 %f1059, %f1057; fma.rn.f32 %f1060, %f1059, %f1058, 0fBF800000; mul.f32 %f2676, %f1044, %f1060; bra.uni $L__BB0_182; $L__BB0_168: mul.f32 %f1026, %f112, 0f4038AA3B; ex2.approx.ftz.f32 %f1027, %f1026; add.f32 %f1028, %f1027, 0f3F800000; mov.f32 %f1029, 0f3F800000; rcp.approx.ftz.f32 %f1030, %f1028; mov.f32 %f1031, 0fC0000000; fma.rn.f32 %f1032, %f1030, %f1031, %f1029; setp.ge.f32 %p136, %f112, 0f41102CB4; selp.f32 %f1033, 0f3F800000, %f1032, %p136; mov.b32 %r130, %f1033; mov.b32 %r131, %f111; and.b32 %r132, %r131, -2147483648; or.b32 %r133, %r132, %r130; mov.b32 %f113, %r133; mul.f32 %f2676, %f381, %f113; bra.uni $L__BB0_182; $L__BB0_160: mul.f32 %f943, %f101, 0f4038AA3B; ex2.approx.ftz.f32 %f944, %f943; add.f32 %f945, %f944, 0f3F800000; mov.f32 %f946, 0f3F800000; rcp.approx.ftz.f32 %f947, %f945; mov.f32 %f948, 0fC0000000; fma.rn.f32 %f949, %f947, %f948, %f946; setp.ge.f32 %p131, %f101, 0f41102CB4; selp.f32 %f950, 0f3F800000, %f949, %p131; mov.b32 %r118, %f950; mov.b32 %r119, %f2676; and.b32 %r120, %r119, -2147483648; or.b32 %r121, %r120, %r118; mov.b32 %f2676, %r121; $L__BB0_182: // begin inline asm { cvt.rn.f16.f32 %rs34, %f2676;} // end inline asm mov.b32 %r144, %f755; mov.b32 {%rs35, %rs8}, %r144; // begin inline asm { cvt.f32.f16 %f2679, %rs35;} // end inline asm @%p76 bra $L__BB0_192; setp.gt.s64 %p154, %rd69, 4; @%p154 bra $L__BB0_187; bra.uni $L__BB0_184; $L__BB0_187: setp.gt.s64 %p155, %rd69, 7; @%p155 bra $L__BB0_190; setp.eq.s64 %p158, %rd69, 5; @%p158 bra $L__BB0_220; setp.eq.s64 %p159, %rd69, 7; @%p159 bra $L__BB0_221; bra.uni $L__BB0_225; $L__BB0_192: setp.gt.s64 %p145, %rd69, 13; @%p145 bra $L__BB0_196; bra.uni $L__BB0_193; $L__BB0_196: setp.gt.s64 %p146, %rd69, 15; @%p146 bra $L__BB0_199; setp.eq.s64 %p149, %rd69, 14; @%p149 bra $L__BB0_206; setp.eq.s64 %p150, %rd69, 15; @%p150 bra $L__BB0_205; bra.uni $L__BB0_225; $L__BB0_205: abs.f32 %f1148, %f2679; add.f32 %f1149, %f1148, 0f3F800000; div.rn.f32 %f2679, %f2679, %f1149; bra.uni $L__BB0_225; $L__BB0_184: setp.eq.s64 %p160, %rd69, 0; @%p160 bra $L__BB0_223; setp.eq.s64 %p161, %rd69, 2; @%p161 bra $L__BB0_222; setp.eq.s64 %p162, %rd69, 4; @%p162 bra $L__BB0_221; bra.uni $L__BB0_225; $L__BB0_221: setp.lt.f32 %p176, %f2679, 0f00000000; mul.f32 %f1294, %f381, %f2679; selp.f32 %f2679, %f1294, %f2679, %p176; bra.uni $L__BB0_225; $L__BB0_193: setp.eq.s64 %p151, %rd69, 10; @%p151 bra $L__BB0_213; setp.eq.s64 %p152, %rd69, 11; @%p152 bra $L__BB0_210; setp.eq.s64 %p153, %rd69, 12; @%p153 bra $L__BB0_209; bra.uni $L__BB0_225; $L__BB0_209: neg.f32 %f1196, %f2679; mov.f32 %f1197, 0f3F000000; mov.f32 %f1198, 0f3BBB989D; fma.rn.f32 %f1199, %f1196, %f1198, %f1197; mov.f32 %f1200, 0f3FB8AA3B; mov.f32 %f1201, 0f437C0000; cvt.sat.f32.f32 %f1202, %f1199; mov.f32 %f1203, 0f4B400001; fma.rm.f32 %f1204, %f1202, %f1201, %f1203; add.f32 %f1205, %f1204, 0fCB40007F; neg.f32 %f1206, %f1205; fma.rn.f32 %f1207, %f1196, %f1200, %f1206; mov.f32 %f1208, 0f32A57060; fma.rn.f32 %f1209, %f1196, %f1208, %f1207; mov.b32 %r155, %f1204; shl.b32 %r156, %r155, 23; mov.b32 %f1210, %r156; ex2.approx.ftz.f32 %f1211, %f1209; fma.rn.f32 %f1212, %f1211, %f1210, 0f3F800000; rcp.rn.f32 %f2679, %f1212; bra.uni $L__BB0_225; $L__BB0_190: setp.eq.s64 %p156, %rd69, 8; @%p156 bra $L__BB0_217; setp.eq.s64 %p157, %rd69, 9; @%p157 bra $L__BB0_216; bra.uni $L__BB0_225; $L__BB0_216: setp.lt.f32 %p172, %f2679, 0f00000000; selp.f32 %f2679, 0f00000000, %f2679, %p172; bra.uni $L__BB0_225; $L__BB0_199: setp.eq.s64 %p147, %rd69, 16; @%p147 bra $L__BB0_202; setp.ne.s64 %p148, %rd69, 17; @%p148 bra $L__BB0_225; setp.ge.f32 %p163, %f381, %f2679; selp.f32 %f2679, 0f00000000, %f2679, %p163; bra.uni $L__BB0_225; $L__BB0_220: fma.rn.f32 %f2679, %f381, %f2679, %f382; bra.uni $L__BB0_225; $L__BB0_206: mul.f32 %f1150, %f381, %f2679; mov.f32 %f1151, 0f3F000000; mov.f32 %f1152, 0f3BBB989D; fma.rn.f32 %f1153, %f1150, %f1152, %f1151; mov.f32 %f1154, 0f3FB8AA3B; mov.f32 %f1155, 0f437C0000; cvt.sat.f32.f32 %f1156, %f1153; mov.f32 %f1157, 0f4B400001; fma.rm.f32 %f1158, %f1156, %f1155, %f1157; add.f32 %f1159, %f1158, 0fCB40007F; neg.f32 %f1160, %f1159; fma.rn.f32 %f1161, %f1150, %f1154, %f1160; mov.f32 %f1162, 0f32A57060; fma.rn.f32 %f1163, %f1150, %f1162, %f1161; mov.b32 %r149, %f1158; shl.b32 %r150, %r149, 23; mov.b32 %f1164, %r150; ex2.approx.ftz.f32 %f1165, %f1163; fma.rn.f32 %f1166, %f1165, %f1164, 0f3F800000; setp.lt.f32 %p166, %f1166, 0f00800000; mul.f32 %f1167, %f1166, 0f4B000000; selp.f32 %f136, %f1167, %f1166, %p166; selp.f32 %f1168, 0fC1B80000, 0f00000000, %p166; mov.b32 %r151, %f136; add.s32 %r152, %r151, -1059760811; and.b32 %r153, %r152, -8388608; sub.s32 %r154, %r151, %r153; mov.b32 %f1169, %r154; cvt.rn.f32.s32 %f1170, %r153; mov.f32 %f1171, 0f34000000; fma.rn.f32 %f1172, %f1170, %f1171, %f1168; add.f32 %f1173, %f1169, 0fBF800000; mov.f32 %f1174, 0f3E1039F6; mov.f32 %f1175, 0fBE055027; fma.rn.f32 %f1176, %f1175, %f1173, %f1174; mov.f32 %f1177, 0fBDF8CDCC; fma.rn.f32 %f1178, %f1176, %f1173, %f1177; mov.f32 %f1179, 0f3E0F2955; fma.rn.f32 %f1180, %f1178, %f1173, %f1179; mov.f32 %f1181, 0fBE2AD8B9; fma.rn.f32 %f1182, %f1180, %f1173, %f1181; mov.f32 %f1183, 0f3E4CED0B; fma.rn.f32 %f1184, %f1182, %f1173, %f1183; mov.f32 %f1185, 0fBE7FFF22; fma.rn.f32 %f1186, %f1184, %f1173, %f1185; mov.f32 %f1187, 0f3EAAAA78; fma.rn.f32 %f1188, %f1186, %f1173, %f1187; mov.f32 %f1189, 0fBF000000; fma.rn.f32 %f1190, %f1188, %f1173, %f1189; mul.f32 %f1191, %f1190, %f1173; fma.rn.f32 %f1192, %f1191, %f1173, %f1173; mov.f32 %f1193, 0f3F317218; fma.rn.f32 %f2677, %f1172, %f1193, %f1192; setp.lt.u32 %p167, %r151, 2139095040; @%p167 bra $L__BB0_208; mov.f32 %f1194, 0f7F800000; fma.rn.f32 %f2677, %f136, %f1194, %f1194; $L__BB0_208: setp.eq.f32 %p168, %f136, 0f00000000; selp.f32 %f1195, 0fFF800000, %f2677, %p168; div.rn.f32 %f2679, %f1195, %f381; bra.uni $L__BB0_225; $L__BB0_223: setp.geu.f32 %p177, %f2679, 0f00000000; @%p177 bra $L__BB0_225; mov.f32 %f1299, 0f3F000000; mov.f32 %f1300, 0f3BBB989D; fma.rn.f32 %f1301, %f2679, %f1300, %f1299; mov.f32 %f1302, 0f3FB8AA3B; mov.f32 %f1303, 0f437C0000; cvt.sat.f32.f32 %f1304, %f1301; mov.f32 %f1305, 0f4B400001; fma.rm.f32 %f1306, %f1304, %f1303, %f1305; add.f32 %f1307, %f1306, 0fCB40007F; neg.f32 %f1308, %f1307; fma.rn.f32 %f1309, %f2679, %f1302, %f1308; mov.f32 %f1310, 0f32A57060; fma.rn.f32 %f1311, %f2679, %f1310, %f1309; mov.b32 %r169, %f1306; shl.b32 %r170, %r169, 23; mov.b32 %f1312, %r170; ex2.approx.ftz.f32 %f1313, %f1311; fma.rn.f32 %f1314, %f1313, %f1312, 0fBF800000; mul.f32 %f2679, %f381, %f1314; bra.uni $L__BB0_225; $L__BB0_222: fma.rn.f32 %f1295, %f381, %f2679, %f382; mov.f32 %f1296, 0f3F800000; min.f32 %f1297, %f1296, %f1295; mov.f32 %f1298, 0f00000000; max.f32 %f2679, %f1298, %f1297; bra.uni $L__BB0_225; $L__BB0_213: setp.gt.f32 %p171, %f2679, 0f00000000; @%p171 bra $L__BB0_215; bra.uni $L__BB0_214; $L__BB0_215: mul.f32 %f2679, %f382, %f2679; bra.uni $L__BB0_225; $L__BB0_210: mul.f32 %f142, %f382, %f2679; abs.f32 %f143, %f142; setp.ltu.f32 %p169, %f143, 0f3F19999A; @%p169 bra $L__BB0_212; bra.uni $L__BB0_211; $L__BB0_212: mul.f32 %f1221, %f142, %f142; mov.f32 %f1222, 0fBD563CAE; mov.f32 %f1223, 0f3C80F082; fma.rn.f32 %f1224, %f1223, %f1221, %f1222; mov.f32 %f1225, 0f3E085941; fma.rn.f32 %f1226, %f1224, %f1221, %f1225; mov.f32 %f1227, 0fBEAAA9ED; fma.rn.f32 %f1228, %f1226, %f1221, %f1227; mov.f32 %f1229, 0f00000000; fma.rn.f32 %f1230, %f1228, %f1221, %f1229; fma.rn.f32 %f145, %f1230, %f142, %f142; mul.f32 %f2679, %f381, %f145; bra.uni $L__BB0_225; $L__BB0_217: mul.f32 %f1248, %f382, %f2679; mov.f32 %f1249, 0f3F000000; mov.f32 %f1250, 0f3BBB989D; fma.rn.f32 %f1251, %f1248, %f1250, %f1249; mov.f32 %f1252, 0f3FB8AA3B; mov.f32 %f1253, 0f437C0000; cvt.sat.f32.f32 %f1254, %f1251; mov.f32 %f1255, 0f4B400001; fma.rm.f32 %f1256, %f1254, %f1253, %f1255; add.f32 %f1257, %f1256, 0fCB40007F; neg.f32 %f1258, %f1257; fma.rn.f32 %f1259, %f1248, %f1252, %f1258; mov.f32 %f1260, 0f32A57060; fma.rn.f32 %f1261, %f1248, %f1260, %f1259; mov.b32 %r163, %f1256; shl.b32 %r164, %r163, 23; mov.b32 %f1262, %r164; ex2.approx.ftz.f32 %f1263, %f1261; fma.rn.f32 %f1264, %f1263, %f1262, 0f3F800000; setp.lt.f32 %p173, %f1264, 0f00800000; mul.f32 %f1265, %f1264, 0f4B000000; selp.f32 %f151, %f1265, %f1264, %p173; selp.f32 %f1266, 0fC1B80000, 0f00000000, %p173; mov.b32 %r165, %f151; add.s32 %r166, %r165, -1059760811; and.b32 %r167, %r166, -8388608; sub.s32 %r168, %r165, %r167; mov.b32 %f1267, %r168; cvt.rn.f32.s32 %f1268, %r167; mov.f32 %f1269, 0f34000000; fma.rn.f32 %f1270, %f1268, %f1269, %f1266; add.f32 %f1271, %f1267, 0fBF800000; mov.f32 %f1272, 0f3E1039F6; mov.f32 %f1273, 0fBE055027; fma.rn.f32 %f1274, %f1273, %f1271, %f1272; mov.f32 %f1275, 0fBDF8CDCC; fma.rn.f32 %f1276, %f1274, %f1271, %f1275; mov.f32 %f1277, 0f3E0F2955; fma.rn.f32 %f1278, %f1276, %f1271, %f1277; mov.f32 %f1279, 0fBE2AD8B9; fma.rn.f32 %f1280, %f1278, %f1271, %f1279; mov.f32 %f1281, 0f3E4CED0B; fma.rn.f32 %f1282, %f1280, %f1271, %f1281; mov.f32 %f1283, 0fBE7FFF22; fma.rn.f32 %f1284, %f1282, %f1271, %f1283; mov.f32 %f1285, 0f3EAAAA78; fma.rn.f32 %f1286, %f1284, %f1271, %f1285; mov.f32 %f1287, 0fBF000000; fma.rn.f32 %f1288, %f1286, %f1271, %f1287; mul.f32 %f1289, %f1288, %f1271; fma.rn.f32 %f1290, %f1289, %f1271, %f1271; mov.f32 %f1291, 0f3F317218; fma.rn.f32 %f2678, %f1270, %f1291, %f1290; setp.lt.u32 %p174, %r165, 2139095040; @%p174 bra $L__BB0_219; mov.f32 %f1292, 0f7F800000; fma.rn.f32 %f2678, %f151, %f1292, %f1292; $L__BB0_219: setp.eq.f32 %p175, %f151, 0f00000000; selp.f32 %f1293, 0fFF800000, %f2678, %p175; mul.f32 %f2679, %f381, %f1293; bra.uni $L__BB0_225; $L__BB0_202: abs.f32 %f132, %f2679; setp.ltu.f32 %p164, %f132, 0f3F19999A; @%p164 bra $L__BB0_204; bra.uni $L__BB0_203; $L__BB0_204: mul.f32 %f1138, %f2679, %f2679; mov.f32 %f1139, 0fBD563CAE; mov.f32 %f1140, 0f3C80F082; fma.rn.f32 %f1141, %f1140, %f1138, %f1139; mov.f32 %f1142, 0f3E085941; fma.rn.f32 %f1143, %f1141, %f1138, %f1142; mov.f32 %f1144, 0fBEAAA9ED; fma.rn.f32 %f1145, %f1143, %f1138, %f1144; mov.f32 %f1146, 0f00000000; fma.rn.f32 %f1147, %f1145, %f1138, %f1146; fma.rn.f32 %f2679, %f1147, %f2679, %f2679; bra.uni $L__BB0_225; $L__BB0_214: mul.f32 %f1231, %f382, %f381; mov.f32 %f1232, 0f3F000000; mov.f32 %f1233, 0f3BBB989D; fma.rn.f32 %f1234, %f2679, %f1233, %f1232; mov.f32 %f1235, 0f3FB8AA3B; mov.f32 %f1236, 0f437C0000; cvt.sat.f32.f32 %f1237, %f1234; mov.f32 %f1238, 0f4B400001; fma.rm.f32 %f1239, %f1237, %f1236, %f1238; add.f32 %f1240, %f1239, 0fCB40007F; neg.f32 %f1241, %f1240; fma.rn.f32 %f1242, %f2679, %f1235, %f1241; mov.f32 %f1243, 0f32A57060; fma.rn.f32 %f1244, %f2679, %f1243, %f1242; mov.b32 %r161, %f1239; shl.b32 %r162, %r161, 23; mov.b32 %f1245, %r162; ex2.approx.ftz.f32 %f1246, %f1244; fma.rn.f32 %f1247, %f1246, %f1245, 0fBF800000; mul.f32 %f2679, %f1231, %f1247; bra.uni $L__BB0_225; $L__BB0_211: mul.f32 %f1213, %f143, 0f4038AA3B; ex2.approx.ftz.f32 %f1214, %f1213; add.f32 %f1215, %f1214, 0f3F800000; mov.f32 %f1216, 0f3F800000; rcp.approx.ftz.f32 %f1217, %f1215; mov.f32 %f1218, 0fC0000000; fma.rn.f32 %f1219, %f1217, %f1218, %f1216; setp.ge.f32 %p170, %f143, 0f41102CB4; selp.f32 %f1220, 0f3F800000, %f1219, %p170; mov.b32 %r157, %f1220; mov.b32 %r158, %f142; and.b32 %r159, %r158, -2147483648; or.b32 %r160, %r159, %r157; mov.b32 %f144, %r160; mul.f32 %f2679, %f381, %f144; bra.uni $L__BB0_225; $L__BB0_203: mul.f32 %f1130, %f132, 0f4038AA3B; ex2.approx.ftz.f32 %f1131, %f1130; add.f32 %f1132, %f1131, 0f3F800000; mov.f32 %f1133, 0f3F800000; rcp.approx.ftz.f32 %f1134, %f1132; mov.f32 %f1135, 0fC0000000; fma.rn.f32 %f1136, %f1134, %f1135, %f1133; setp.ge.f32 %p165, %f132, 0f41102CB4; selp.f32 %f1137, 0f3F800000, %f1136, %p165; mov.b32 %r145, %f1137; mov.b32 %r146, %f2679; and.b32 %r147, %r146, -2147483648; or.b32 %r148, %r147, %r145; mov.b32 %f2679, %r148; $L__BB0_225: // begin inline asm { cvt.rn.f16.f32 %rs36, %f2679;} // end inline asm // begin inline asm { cvt.f32.f16 %f2682, %rs8;} // end inline asm @%p76 bra $L__BB0_235; setp.gt.s64 %p188, %rd69, 4; @%p188 bra $L__BB0_230; bra.uni $L__BB0_227; $L__BB0_230: setp.gt.s64 %p189, %rd69, 7; @%p189 bra $L__BB0_233; setp.eq.s64 %p192, %rd69, 5; @%p192 bra $L__BB0_263; setp.eq.s64 %p193, %rd69, 7; @%p193 bra $L__BB0_264; bra.uni $L__BB0_268; $L__BB0_235: setp.gt.s64 %p179, %rd69, 13; @%p179 bra $L__BB0_239; bra.uni $L__BB0_236; $L__BB0_239: setp.gt.s64 %p180, %rd69, 15; @%p180 bra $L__BB0_242; setp.eq.s64 %p183, %rd69, 14; @%p183 bra $L__BB0_249; setp.eq.s64 %p184, %rd69, 15; @%p184 bra $L__BB0_248; bra.uni $L__BB0_268; $L__BB0_248: abs.f32 %f1335, %f2682; add.f32 %f1336, %f1335, 0f3F800000; div.rn.f32 %f2682, %f2682, %f1336; bra.uni $L__BB0_268; $L__BB0_227: setp.eq.s64 %p194, %rd69, 0; @%p194 bra $L__BB0_266; setp.eq.s64 %p195, %rd69, 2; @%p195 bra $L__BB0_265; setp.eq.s64 %p196, %rd69, 4; @%p196 bra $L__BB0_264; bra.uni $L__BB0_268; $L__BB0_264: setp.lt.f32 %p210, %f2682, 0f00000000; mul.f32 %f1481, %f381, %f2682; selp.f32 %f2682, %f1481, %f2682, %p210; bra.uni $L__BB0_268; $L__BB0_236: setp.eq.s64 %p185, %rd69, 10; @%p185 bra $L__BB0_256; setp.eq.s64 %p186, %rd69, 11; @%p186 bra $L__BB0_253; setp.eq.s64 %p187, %rd69, 12; @%p187 bra $L__BB0_252; bra.uni $L__BB0_268; $L__BB0_252: neg.f32 %f1383, %f2682; mov.f32 %f1384, 0f3F000000; mov.f32 %f1385, 0f3BBB989D; fma.rn.f32 %f1386, %f1383, %f1385, %f1384; mov.f32 %f1387, 0f3FB8AA3B; mov.f32 %f1388, 0f437C0000; cvt.sat.f32.f32 %f1389, %f1386; mov.f32 %f1390, 0f4B400001; fma.rm.f32 %f1391, %f1389, %f1388, %f1390; add.f32 %f1392, %f1391, 0fCB40007F; neg.f32 %f1393, %f1392; fma.rn.f32 %f1394, %f1383, %f1387, %f1393; mov.f32 %f1395, 0f32A57060; fma.rn.f32 %f1396, %f1383, %f1395, %f1394; mov.b32 %r181, %f1391; shl.b32 %r182, %r181, 23; mov.b32 %f1397, %r182; ex2.approx.ftz.f32 %f1398, %f1396; fma.rn.f32 %f1399, %f1398, %f1397, 0f3F800000; rcp.rn.f32 %f2682, %f1399; bra.uni $L__BB0_268; $L__BB0_233: setp.eq.s64 %p190, %rd69, 8; @%p190 bra $L__BB0_260; setp.eq.s64 %p191, %rd69, 9; @%p191 bra $L__BB0_259; bra.uni $L__BB0_268; $L__BB0_259: setp.lt.f32 %p206, %f2682, 0f00000000; selp.f32 %f2682, 0f00000000, %f2682, %p206; bra.uni $L__BB0_268; $L__BB0_242: setp.eq.s64 %p181, %rd69, 16; @%p181 bra $L__BB0_245; setp.ne.s64 %p182, %rd69, 17; @%p182 bra $L__BB0_268; setp.ge.f32 %p197, %f381, %f2682; selp.f32 %f2682, 0f00000000, %f2682, %p197; bra.uni $L__BB0_268; $L__BB0_263: fma.rn.f32 %f2682, %f381, %f2682, %f382; bra.uni $L__BB0_268; $L__BB0_249: mul.f32 %f1337, %f381, %f2682; mov.f32 %f1338, 0f3F000000; mov.f32 %f1339, 0f3BBB989D; fma.rn.f32 %f1340, %f1337, %f1339, %f1338; mov.f32 %f1341, 0f3FB8AA3B; mov.f32 %f1342, 0f437C0000; cvt.sat.f32.f32 %f1343, %f1340; mov.f32 %f1344, 0f4B400001; fma.rm.f32 %f1345, %f1343, %f1342, %f1344; add.f32 %f1346, %f1345, 0fCB40007F; neg.f32 %f1347, %f1346; fma.rn.f32 %f1348, %f1337, %f1341, %f1347; mov.f32 %f1349, 0f32A57060; fma.rn.f32 %f1350, %f1337, %f1349, %f1348; mov.b32 %r175, %f1345; shl.b32 %r176, %r175, 23; mov.b32 %f1351, %r176; ex2.approx.ftz.f32 %f1352, %f1350; fma.rn.f32 %f1353, %f1352, %f1351, 0f3F800000; setp.lt.f32 %p200, %f1353, 0f00800000; mul.f32 %f1354, %f1353, 0f4B000000; selp.f32 %f167, %f1354, %f1353, %p200; selp.f32 %f1355, 0fC1B80000, 0f00000000, %p200; mov.b32 %r177, %f167; add.s32 %r178, %r177, -1059760811; and.b32 %r179, %r178, -8388608; sub.s32 %r180, %r177, %r179; mov.b32 %f1356, %r180; cvt.rn.f32.s32 %f1357, %r179; mov.f32 %f1358, 0f34000000; fma.rn.f32 %f1359, %f1357, %f1358, %f1355; add.f32 %f1360, %f1356, 0fBF800000; mov.f32 %f1361, 0f3E1039F6; mov.f32 %f1362, 0fBE055027; fma.rn.f32 %f1363, %f1362, %f1360, %f1361; mov.f32 %f1364, 0fBDF8CDCC; fma.rn.f32 %f1365, %f1363, %f1360, %f1364; mov.f32 %f1366, 0f3E0F2955; fma.rn.f32 %f1367, %f1365, %f1360, %f1366; mov.f32 %f1368, 0fBE2AD8B9; fma.rn.f32 %f1369, %f1367, %f1360, %f1368; mov.f32 %f1370, 0f3E4CED0B; fma.rn.f32 %f1371, %f1369, %f1360, %f1370; mov.f32 %f1372, 0fBE7FFF22; fma.rn.f32 %f1373, %f1371, %f1360, %f1372; mov.f32 %f1374, 0f3EAAAA78; fma.rn.f32 %f1375, %f1373, %f1360, %f1374; mov.f32 %f1376, 0fBF000000; fma.rn.f32 %f1377, %f1375, %f1360, %f1376; mul.f32 %f1378, %f1377, %f1360; fma.rn.f32 %f1379, %f1378, %f1360, %f1360; mov.f32 %f1380, 0f3F317218; fma.rn.f32 %f2680, %f1359, %f1380, %f1379; setp.lt.u32 %p201, %r177, 2139095040; @%p201 bra $L__BB0_251; mov.f32 %f1381, 0f7F800000; fma.rn.f32 %f2680, %f167, %f1381, %f1381; $L__BB0_251: setp.eq.f32 %p202, %f167, 0f00000000; selp.f32 %f1382, 0fFF800000, %f2680, %p202; div.rn.f32 %f2682, %f1382, %f381; bra.uni $L__BB0_268; $L__BB0_266: setp.geu.f32 %p211, %f2682, 0f00000000; @%p211 bra $L__BB0_268; mov.f32 %f1486, 0f3F000000; mov.f32 %f1487, 0f3BBB989D; fma.rn.f32 %f1488, %f2682, %f1487, %f1486; mov.f32 %f1489, 0f3FB8AA3B; mov.f32 %f1490, 0f437C0000; cvt.sat.f32.f32 %f1491, %f1488; mov.f32 %f1492, 0f4B400001; fma.rm.f32 %f1493, %f1491, %f1490, %f1492; add.f32 %f1494, %f1493, 0fCB40007F; neg.f32 %f1495, %f1494; fma.rn.f32 %f1496, %f2682, %f1489, %f1495; mov.f32 %f1497, 0f32A57060; fma.rn.f32 %f1498, %f2682, %f1497, %f1496; mov.b32 %r195, %f1493; shl.b32 %r196, %r195, 23; mov.b32 %f1499, %r196; ex2.approx.ftz.f32 %f1500, %f1498; fma.rn.f32 %f1501, %f1500, %f1499, 0fBF800000; mul.f32 %f2682, %f381, %f1501; bra.uni $L__BB0_268; $L__BB0_265: fma.rn.f32 %f1482, %f381, %f2682, %f382; mov.f32 %f1483, 0f3F800000; min.f32 %f1484, %f1483, %f1482; mov.f32 %f1485, 0f00000000; max.f32 %f2682, %f1485, %f1484; bra.uni $L__BB0_268; $L__BB0_256: setp.gt.f32 %p205, %f2682, 0f00000000; @%p205 bra $L__BB0_258; bra.uni $L__BB0_257; $L__BB0_258: mul.f32 %f2682, %f382, %f2682; bra.uni $L__BB0_268; $L__BB0_253: mul.f32 %f173, %f382, %f2682; abs.f32 %f174, %f173; setp.ltu.f32 %p203, %f174, 0f3F19999A; @%p203 bra $L__BB0_255; bra.uni $L__BB0_254; $L__BB0_255: mul.f32 %f1408, %f173, %f173; mov.f32 %f1409, 0fBD563CAE; mov.f32 %f1410, 0f3C80F082; fma.rn.f32 %f1411, %f1410, %f1408, %f1409; mov.f32 %f1412, 0f3E085941; fma.rn.f32 %f1413, %f1411, %f1408, %f1412; mov.f32 %f1414, 0fBEAAA9ED; fma.rn.f32 %f1415, %f1413, %f1408, %f1414; mov.f32 %f1416, 0f00000000; fma.rn.f32 %f1417, %f1415, %f1408, %f1416; fma.rn.f32 %f176, %f1417, %f173, %f173; mul.f32 %f2682, %f381, %f176; bra.uni $L__BB0_268; $L__BB0_260: mul.f32 %f1435, %f382, %f2682; mov.f32 %f1436, 0f3F000000; mov.f32 %f1437, 0f3BBB989D; fma.rn.f32 %f1438, %f1435, %f1437, %f1436; mov.f32 %f1439, 0f3FB8AA3B; mov.f32 %f1440, 0f437C0000; cvt.sat.f32.f32 %f1441, %f1438; mov.f32 %f1442, 0f4B400001; fma.rm.f32 %f1443, %f1441, %f1440, %f1442; add.f32 %f1444, %f1443, 0fCB40007F; neg.f32 %f1445, %f1444; fma.rn.f32 %f1446, %f1435, %f1439, %f1445; mov.f32 %f1447, 0f32A57060; fma.rn.f32 %f1448, %f1435, %f1447, %f1446; mov.b32 %r189, %f1443; shl.b32 %r190, %r189, 23; mov.b32 %f1449, %r190; ex2.approx.ftz.f32 %f1450, %f1448; fma.rn.f32 %f1451, %f1450, %f1449, 0f3F800000; setp.lt.f32 %p207, %f1451, 0f00800000; mul.f32 %f1452, %f1451, 0f4B000000; selp.f32 %f182, %f1452, %f1451, %p207; selp.f32 %f1453, 0fC1B80000, 0f00000000, %p207; mov.b32 %r191, %f182; add.s32 %r192, %r191, -1059760811; and.b32 %r193, %r192, -8388608; sub.s32 %r194, %r191, %r193; mov.b32 %f1454, %r194; cvt.rn.f32.s32 %f1455, %r193; mov.f32 %f1456, 0f34000000; fma.rn.f32 %f1457, %f1455, %f1456, %f1453; add.f32 %f1458, %f1454, 0fBF800000; mov.f32 %f1459, 0f3E1039F6; mov.f32 %f1460, 0fBE055027; fma.rn.f32 %f1461, %f1460, %f1458, %f1459; mov.f32 %f1462, 0fBDF8CDCC; fma.rn.f32 %f1463, %f1461, %f1458, %f1462; mov.f32 %f1464, 0f3E0F2955; fma.rn.f32 %f1465, %f1463, %f1458, %f1464; mov.f32 %f1466, 0fBE2AD8B9; fma.rn.f32 %f1467, %f1465, %f1458, %f1466; mov.f32 %f1468, 0f3E4CED0B; fma.rn.f32 %f1469, %f1467, %f1458, %f1468; mov.f32 %f1470, 0fBE7FFF22; fma.rn.f32 %f1471, %f1469, %f1458, %f1470; mov.f32 %f1472, 0f3EAAAA78; fma.rn.f32 %f1473, %f1471, %f1458, %f1472; mov.f32 %f1474, 0fBF000000; fma.rn.f32 %f1475, %f1473, %f1458, %f1474; mul.f32 %f1476, %f1475, %f1458; fma.rn.f32 %f1477, %f1476, %f1458, %f1458; mov.f32 %f1478, 0f3F317218; fma.rn.f32 %f2681, %f1457, %f1478, %f1477; setp.lt.u32 %p208, %r191, 2139095040; @%p208 bra $L__BB0_262; mov.f32 %f1479, 0f7F800000; fma.rn.f32 %f2681, %f182, %f1479, %f1479; $L__BB0_262: setp.eq.f32 %p209, %f182, 0f00000000; selp.f32 %f1480, 0fFF800000, %f2681, %p209; mul.f32 %f2682, %f381, %f1480; bra.uni $L__BB0_268; $L__BB0_245: abs.f32 %f163, %f2682; setp.ltu.f32 %p198, %f163, 0f3F19999A; @%p198 bra $L__BB0_247; bra.uni $L__BB0_246; $L__BB0_247: mul.f32 %f1325, %f2682, %f2682; mov.f32 %f1326, 0fBD563CAE; mov.f32 %f1327, 0f3C80F082; fma.rn.f32 %f1328, %f1327, %f1325, %f1326; mov.f32 %f1329, 0f3E085941; fma.rn.f32 %f1330, %f1328, %f1325, %f1329; mov.f32 %f1331, 0fBEAAA9ED; fma.rn.f32 %f1332, %f1330, %f1325, %f1331; mov.f32 %f1333, 0f00000000; fma.rn.f32 %f1334, %f1332, %f1325, %f1333; fma.rn.f32 %f2682, %f1334, %f2682, %f2682; bra.uni $L__BB0_268; $L__BB0_257: mul.f32 %f1418, %f382, %f381; mov.f32 %f1419, 0f3F000000; mov.f32 %f1420, 0f3BBB989D; fma.rn.f32 %f1421, %f2682, %f1420, %f1419; mov.f32 %f1422, 0f3FB8AA3B; mov.f32 %f1423, 0f437C0000; cvt.sat.f32.f32 %f1424, %f1421; mov.f32 %f1425, 0f4B400001; fma.rm.f32 %f1426, %f1424, %f1423, %f1425; add.f32 %f1427, %f1426, 0fCB40007F; neg.f32 %f1428, %f1427; fma.rn.f32 %f1429, %f2682, %f1422, %f1428; mov.f32 %f1430, 0f32A57060; fma.rn.f32 %f1431, %f2682, %f1430, %f1429; mov.b32 %r187, %f1426; shl.b32 %r188, %r187, 23; mov.b32 %f1432, %r188; ex2.approx.ftz.f32 %f1433, %f1431; fma.rn.f32 %f1434, %f1433, %f1432, 0fBF800000; mul.f32 %f2682, %f1418, %f1434; bra.uni $L__BB0_268; $L__BB0_254: mul.f32 %f1400, %f174, 0f4038AA3B; ex2.approx.ftz.f32 %f1401, %f1400; add.f32 %f1402, %f1401, 0f3F800000; mov.f32 %f1403, 0f3F800000; rcp.approx.ftz.f32 %f1404, %f1402; mov.f32 %f1405, 0fC0000000; fma.rn.f32 %f1406, %f1404, %f1405, %f1403; setp.ge.f32 %p204, %f174, 0f41102CB4; selp.f32 %f1407, 0f3F800000, %f1406, %p204; mov.b32 %r183, %f1407; mov.b32 %r184, %f173; and.b32 %r185, %r184, -2147483648; or.b32 %r186, %r185, %r183; mov.b32 %f175, %r186; mul.f32 %f2682, %f381, %f175; bra.uni $L__BB0_268; $L__BB0_246: mul.f32 %f1317, %f163, 0f4038AA3B; ex2.approx.ftz.f32 %f1318, %f1317; add.f32 %f1319, %f1318, 0f3F800000; mov.f32 %f1320, 0f3F800000; rcp.approx.ftz.f32 %f1321, %f1319; mov.f32 %f1322, 0fC0000000; fma.rn.f32 %f1323, %f1321, %f1322, %f1320; setp.ge.f32 %p199, %f163, 0f41102CB4; selp.f32 %f1324, 0f3F800000, %f1323, %p199; mov.b32 %r171, %f1324; mov.b32 %r172, %f2682; and.b32 %r173, %r172, -2147483648; or.b32 %r174, %r173, %r171; mov.b32 %f2682, %r174; $L__BB0_268: // begin inline asm { cvt.rn.f16.f32 %rs38, %f2682;} // end inline asm st.global.v4.u16 [%rd14], {%rs32, %rs34, %rs36, %rs38}; bra.uni $L__BB0_586; $L__BB0_301: mul.f32 %f1608, %f382, %f381; mov.f32 %f1609, 0f3F000000; mov.f32 %f1610, 0f3BBB989D; fma.rn.f32 %f1611, %f2685, %f1610, %f1609; mov.f32 %f1612, 0f3FB8AA3B; mov.f32 %f1613, 0f437C0000; cvt.sat.f32.f32 %f1614, %f1611; mov.f32 %f1615, 0f4B400001; fma.rm.f32 %f1616, %f1614, %f1613, %f1615; add.f32 %f1617, %f1616, 0fCB40007F; neg.f32 %f1618, %f1617; fma.rn.f32 %f1619, %f2685, %f1612, %f1618; mov.f32 %f1620, 0f32A57060; fma.rn.f32 %f1621, %f2685, %f1620, %f1619; mov.b32 %r213, %f1616; shl.b32 %r214, %r213, 23; mov.b32 %f1622, %r214; ex2.approx.ftz.f32 %f1623, %f1621; fma.rn.f32 %f1624, %f1623, %f1622, 0fBF800000; mul.f32 %f2685, %f1608, %f1624; bra.uni $L__BB0_312; $L__BB0_290: mul.f32 %f1507, %f197, 0f4038AA3B; ex2.approx.ftz.f32 %f1508, %f1507; add.f32 %f1509, %f1508, 0f3F800000; mov.f32 %f1510, 0f3F800000; rcp.approx.ftz.f32 %f1511, %f1509; mov.f32 %f1512, 0fC0000000; fma.rn.f32 %f1513, %f1511, %f1512, %f1510; setp.ge.f32 %p233, %f197, 0f41102CB4; selp.f32 %f1514, 0f3F800000, %f1513, %p233; mov.b32 %r197, %f1514; mov.b32 %r198, %f2685; and.b32 %r199, %r198, -2147483648; or.b32 %r200, %r199, %r197; mov.b32 %f2685, %r200; bra.uni $L__BB0_312; $L__BB0_298: mul.f32 %f1590, %f208, 0f4038AA3B; ex2.approx.ftz.f32 %f1591, %f1590; add.f32 %f1592, %f1591, 0f3F800000; mov.f32 %f1593, 0f3F800000; rcp.approx.ftz.f32 %f1594, %f1592; mov.f32 %f1595, 0fC0000000; fma.rn.f32 %f1596, %f1594, %f1595, %f1593; setp.ge.f32 %p238, %f208, 0f41102CB4; selp.f32 %f1597, 0f3F800000, %f1596, %p238; mov.b32 %r209, %f1597; mov.b32 %r210, %f207; and.b32 %r211, %r210, -2147483648; or.b32 %r212, %r211, %r209; mov.b32 %f209, %r212; mul.f32 %f2685, %f381, %f209; $L__BB0_312: @%p212 bra $L__BB0_322; setp.gt.s64 %p256, %rd69, 4; @%p256 bra $L__BB0_317; bra.uni $L__BB0_314; $L__BB0_317: setp.gt.s64 %p257, %rd69, 7; @%p257 bra $L__BB0_320; setp.eq.s64 %p260, %rd69, 5; @%p260 bra $L__BB0_350; setp.eq.s64 %p261, %rd69, 7; @%p261 bra $L__BB0_351; bra.uni $L__BB0_355; $L__BB0_322: setp.gt.s64 %p247, %rd69, 13; @%p247 bra $L__BB0_326; bra.uni $L__BB0_323; $L__BB0_326: setp.gt.s64 %p248, %rd69, 15; @%p248 bra $L__BB0_329; setp.eq.s64 %p251, %rd69, 14; @%p251 bra $L__BB0_336; setp.eq.s64 %p252, %rd69, 15; @%p252 bra $L__BB0_335; bra.uni $L__BB0_355; $L__BB0_335: abs.f32 %f1710, %f2688; add.f32 %f1711, %f1710, 0f3F800000; div.rn.f32 %f2688, %f2688, %f1711; bra.uni $L__BB0_355; $L__BB0_314: setp.eq.s64 %p262, %rd69, 0; @%p262 bra $L__BB0_353; setp.eq.s64 %p263, %rd69, 2; @%p263 bra $L__BB0_352; setp.eq.s64 %p264, %rd69, 4; @%p264 bra $L__BB0_351; bra.uni $L__BB0_355; $L__BB0_351: setp.lt.f32 %p278, %f2688, 0f00000000; mul.f32 %f1856, %f381, %f2688; selp.f32 %f2688, %f1856, %f2688, %p278; bra.uni $L__BB0_355; $L__BB0_323: setp.eq.s64 %p253, %rd69, 10; @%p253 bra $L__BB0_343; setp.eq.s64 %p254, %rd69, 11; @%p254 bra $L__BB0_340; setp.eq.s64 %p255, %rd69, 12; @%p255 bra $L__BB0_339; bra.uni $L__BB0_355; $L__BB0_339: neg.f32 %f1758, %f2688; mov.f32 %f1759, 0f3F000000; mov.f32 %f1760, 0f3BBB989D; fma.rn.f32 %f1761, %f1758, %f1760, %f1759; mov.f32 %f1762, 0f3FB8AA3B; mov.f32 %f1763, 0f437C0000; cvt.sat.f32.f32 %f1764, %f1761; mov.f32 %f1765, 0f4B400001; fma.rm.f32 %f1766, %f1764, %f1763, %f1765; add.f32 %f1767, %f1766, 0fCB40007F; neg.f32 %f1768, %f1767; fma.rn.f32 %f1769, %f1758, %f1762, %f1768; mov.f32 %f1770, 0f32A57060; fma.rn.f32 %f1771, %f1758, %f1770, %f1769; mov.b32 %r233, %f1766; shl.b32 %r234, %r233, 23; mov.b32 %f1772, %r234; ex2.approx.ftz.f32 %f1773, %f1771; fma.rn.f32 %f1774, %f1773, %f1772, 0f3F800000; rcp.rn.f32 %f2688, %f1774; bra.uni $L__BB0_355; $L__BB0_320: setp.eq.s64 %p258, %rd69, 8; @%p258 bra $L__BB0_347; setp.eq.s64 %p259, %rd69, 9; @%p259 bra $L__BB0_346; bra.uni $L__BB0_355; $L__BB0_346: setp.lt.f32 %p274, %f2688, 0f00000000; selp.f32 %f2688, 0f00000000, %f2688, %p274; bra.uni $L__BB0_355; $L__BB0_329: setp.eq.s64 %p249, %rd69, 16; @%p249 bra $L__BB0_332; setp.ne.s64 %p250, %rd69, 17; @%p250 bra $L__BB0_355; setp.ge.f32 %p265, %f381, %f2688; selp.f32 %f2688, 0f00000000, %f2688, %p265; bra.uni $L__BB0_355; $L__BB0_350: fma.rn.f32 %f2688, %f381, %f2688, %f382; bra.uni $L__BB0_355; $L__BB0_336: mul.f32 %f1712, %f381, %f2688; mov.f32 %f1713, 0f3F000000; mov.f32 %f1714, 0f3BBB989D; fma.rn.f32 %f1715, %f1712, %f1714, %f1713; mov.f32 %f1716, 0f3FB8AA3B; mov.f32 %f1717, 0f437C0000; cvt.sat.f32.f32 %f1718, %f1715; mov.f32 %f1719, 0f4B400001; fma.rm.f32 %f1720, %f1718, %f1717, %f1719; add.f32 %f1721, %f1720, 0fCB40007F; neg.f32 %f1722, %f1721; fma.rn.f32 %f1723, %f1712, %f1716, %f1722; mov.f32 %f1724, 0f32A57060; fma.rn.f32 %f1725, %f1712, %f1724, %f1723; mov.b32 %r227, %f1720; shl.b32 %r228, %r227, 23; mov.b32 %f1726, %r228; ex2.approx.ftz.f32 %f1727, %f1725; fma.rn.f32 %f1728, %f1727, %f1726, 0f3F800000; setp.lt.f32 %p268, %f1728, 0f00800000; mul.f32 %f1729, %f1728, 0f4B000000; selp.f32 %f231, %f1729, %f1728, %p268; selp.f32 %f1730, 0fC1B80000, 0f00000000, %p268; mov.b32 %r229, %f231; add.s32 %r230, %r229, -1059760811; and.b32 %r231, %r230, -8388608; sub.s32 %r232, %r229, %r231; mov.b32 %f1731, %r232; cvt.rn.f32.s32 %f1732, %r231; mov.f32 %f1733, 0f34000000; fma.rn.f32 %f1734, %f1732, %f1733, %f1730; add.f32 %f1735, %f1731, 0fBF800000; mov.f32 %f1736, 0f3E1039F6; mov.f32 %f1737, 0fBE055027; fma.rn.f32 %f1738, %f1737, %f1735, %f1736; mov.f32 %f1739, 0fBDF8CDCC; fma.rn.f32 %f1740, %f1738, %f1735, %f1739; mov.f32 %f1741, 0f3E0F2955; fma.rn.f32 %f1742, %f1740, %f1735, %f1741; mov.f32 %f1743, 0fBE2AD8B9; fma.rn.f32 %f1744, %f1742, %f1735, %f1743; mov.f32 %f1745, 0f3E4CED0B; fma.rn.f32 %f1746, %f1744, %f1735, %f1745; mov.f32 %f1747, 0fBE7FFF22; fma.rn.f32 %f1748, %f1746, %f1735, %f1747; mov.f32 %f1749, 0f3EAAAA78; fma.rn.f32 %f1750, %f1748, %f1735, %f1749; mov.f32 %f1751, 0fBF000000; fma.rn.f32 %f1752, %f1750, %f1735, %f1751; mul.f32 %f1753, %f1752, %f1735; fma.rn.f32 %f1754, %f1753, %f1735, %f1735; mov.f32 %f1755, 0f3F317218; fma.rn.f32 %f2686, %f1734, %f1755, %f1754; setp.lt.u32 %p269, %r229, 2139095040; @%p269 bra $L__BB0_338; mov.f32 %f1756, 0f7F800000; fma.rn.f32 %f2686, %f231, %f1756, %f1756; $L__BB0_338: setp.eq.f32 %p270, %f231, 0f00000000; selp.f32 %f1757, 0fFF800000, %f2686, %p270; div.rn.f32 %f2688, %f1757, %f381; bra.uni $L__BB0_355; $L__BB0_353: setp.geu.f32 %p279, %f2688, 0f00000000; @%p279 bra $L__BB0_355; mov.f32 %f1861, 0f3F000000; mov.f32 %f1862, 0f3BBB989D; fma.rn.f32 %f1863, %f2688, %f1862, %f1861; mov.f32 %f1864, 0f3FB8AA3B; mov.f32 %f1865, 0f437C0000; cvt.sat.f32.f32 %f1866, %f1863; mov.f32 %f1867, 0f4B400001; fma.rm.f32 %f1868, %f1866, %f1865, %f1867; add.f32 %f1869, %f1868, 0fCB40007F; neg.f32 %f1870, %f1869; fma.rn.f32 %f1871, %f2688, %f1864, %f1870; mov.f32 %f1872, 0f32A57060; fma.rn.f32 %f1873, %f2688, %f1872, %f1871; mov.b32 %r247, %f1868; shl.b32 %r248, %r247, 23; mov.b32 %f1874, %r248; ex2.approx.ftz.f32 %f1875, %f1873; fma.rn.f32 %f1876, %f1875, %f1874, 0fBF800000; mul.f32 %f2688, %f381, %f1876; bra.uni $L__BB0_355; $L__BB0_343: setp.gt.f32 %p273, %f2688, 0f00000000; @%p273 bra $L__BB0_345; bra.uni $L__BB0_344; $L__BB0_345: mul.f32 %f2688, %f382, %f2688; bra.uni $L__BB0_355; $L__BB0_347: mul.f32 %f1810, %f382, %f2688; mov.f32 %f1811, 0f3F000000; mov.f32 %f1812, 0f3BBB989D; fma.rn.f32 %f1813, %f1810, %f1812, %f1811; mov.f32 %f1814, 0f3FB8AA3B; mov.f32 %f1815, 0f437C0000; cvt.sat.f32.f32 %f1816, %f1813; mov.f32 %f1817, 0f4B400001; fma.rm.f32 %f1818, %f1816, %f1815, %f1817; add.f32 %f1819, %f1818, 0fCB40007F; neg.f32 %f1820, %f1819; fma.rn.f32 %f1821, %f1810, %f1814, %f1820; mov.f32 %f1822, 0f32A57060; fma.rn.f32 %f1823, %f1810, %f1822, %f1821; mov.b32 %r241, %f1818; shl.b32 %r242, %r241, 23; mov.b32 %f1824, %r242; ex2.approx.ftz.f32 %f1825, %f1823; fma.rn.f32 %f1826, %f1825, %f1824, 0f3F800000; setp.lt.f32 %p275, %f1826, 0f00800000; mul.f32 %f1827, %f1826, 0f4B000000; selp.f32 %f246, %f1827, %f1826, %p275; selp.f32 %f1828, 0fC1B80000, 0f00000000, %p275; mov.b32 %r243, %f246; add.s32 %r244, %r243, -1059760811; and.b32 %r245, %r244, -8388608; sub.s32 %r246, %r243, %r245; mov.b32 %f1829, %r246; cvt.rn.f32.s32 %f1830, %r245; mov.f32 %f1831, 0f34000000; fma.rn.f32 %f1832, %f1830, %f1831, %f1828; add.f32 %f1833, %f1829, 0fBF800000; mov.f32 %f1834, 0f3E1039F6; mov.f32 %f1835, 0fBE055027; fma.rn.f32 %f1836, %f1835, %f1833, %f1834; mov.f32 %f1837, 0fBDF8CDCC; fma.rn.f32 %f1838, %f1836, %f1833, %f1837; mov.f32 %f1839, 0f3E0F2955; fma.rn.f32 %f1840, %f1838, %f1833, %f1839; mov.f32 %f1841, 0fBE2AD8B9; fma.rn.f32 %f1842, %f1840, %f1833, %f1841; mov.f32 %f1843, 0f3E4CED0B; fma.rn.f32 %f1844, %f1842, %f1833, %f1843; mov.f32 %f1845, 0fBE7FFF22; fma.rn.f32 %f1846, %f1844, %f1833, %f1845; mov.f32 %f1847, 0f3EAAAA78; fma.rn.f32 %f1848, %f1846, %f1833, %f1847; mov.f32 %f1849, 0fBF000000; fma.rn.f32 %f1850, %f1848, %f1833, %f1849; mul.f32 %f1851, %f1850, %f1833; fma.rn.f32 %f1852, %f1851, %f1833, %f1833; mov.f32 %f1853, 0f3F317218; fma.rn.f32 %f2687, %f1832, %f1853, %f1852; setp.lt.u32 %p276, %r243, 2139095040; @%p276 bra $L__BB0_349; mov.f32 %f1854, 0f7F800000; fma.rn.f32 %f2687, %f246, %f1854, %f1854; $L__BB0_349: setp.eq.f32 %p277, %f246, 0f00000000; selp.f32 %f1855, 0fFF800000, %f2687, %p277; mul.f32 %f2688, %f381, %f1855; bra.uni $L__BB0_355; $L__BB0_332: abs.f32 %f227, %f2688; setp.ltu.f32 %p266, %f227, 0f3F19999A; @%p266 bra $L__BB0_334; bra.uni $L__BB0_333; $L__BB0_334: mul.f32 %f1700, %f2688, %f2688; mov.f32 %f1701, 0fBD563CAE; mov.f32 %f1702, 0f3C80F082; fma.rn.f32 %f1703, %f1702, %f1700, %f1701; mov.f32 %f1704, 0f3E085941; fma.rn.f32 %f1705, %f1703, %f1700, %f1704; mov.f32 %f1706, 0fBEAAA9ED; fma.rn.f32 %f1707, %f1705, %f1700, %f1706; mov.f32 %f1708, 0f00000000; fma.rn.f32 %f1709, %f1707, %f1700, %f1708; fma.rn.f32 %f2688, %f1709, %f2688, %f2688; bra.uni $L__BB0_355; $L__BB0_352: fma.rn.f32 %f1857, %f381, %f2688, %f382; mov.f32 %f1858, 0f3F800000; min.f32 %f1859, %f1858, %f1857; mov.f32 %f1860, 0f00000000; max.f32 %f2688, %f1860, %f1859; bra.uni $L__BB0_355; $L__BB0_340: mul.f32 %f237, %f382, %f2688; abs.f32 %f238, %f237; setp.ltu.f32 %p271, %f238, 0f3F19999A; @%p271 bra $L__BB0_342; bra.uni $L__BB0_341; $L__BB0_342: mul.f32 %f1783, %f237, %f237; mov.f32 %f1784, 0fBD563CAE; mov.f32 %f1785, 0f3C80F082; fma.rn.f32 %f1786, %f1785, %f1783, %f1784; mov.f32 %f1787, 0f3E085941; fma.rn.f32 %f1788, %f1786, %f1783, %f1787; mov.f32 %f1789, 0fBEAAA9ED; fma.rn.f32 %f1790, %f1788, %f1783, %f1789; mov.f32 %f1791, 0f00000000; fma.rn.f32 %f1792, %f1790, %f1783, %f1791; fma.rn.f32 %f240, %f1792, %f237, %f237; mul.f32 %f2688, %f381, %f240; bra.uni $L__BB0_355; $L__BB0_344: mul.f32 %f1793, %f382, %f381; mov.f32 %f1794, 0f3F000000; mov.f32 %f1795, 0f3BBB989D; fma.rn.f32 %f1796, %f2688, %f1795, %f1794; mov.f32 %f1797, 0f3FB8AA3B; mov.f32 %f1798, 0f437C0000; cvt.sat.f32.f32 %f1799, %f1796; mov.f32 %f1800, 0f4B400001; fma.rm.f32 %f1801, %f1799, %f1798, %f1800; add.f32 %f1802, %f1801, 0fCB40007F; neg.f32 %f1803, %f1802; fma.rn.f32 %f1804, %f2688, %f1797, %f1803; mov.f32 %f1805, 0f32A57060; fma.rn.f32 %f1806, %f2688, %f1805, %f1804; mov.b32 %r239, %f1801; shl.b32 %r240, %r239, 23; mov.b32 %f1807, %r240; ex2.approx.ftz.f32 %f1808, %f1806; fma.rn.f32 %f1809, %f1808, %f1807, 0fBF800000; mul.f32 %f2688, %f1793, %f1809; bra.uni $L__BB0_355; $L__BB0_333: mul.f32 %f1692, %f227, 0f4038AA3B; ex2.approx.ftz.f32 %f1693, %f1692; add.f32 %f1694, %f1693, 0f3F800000; mov.f32 %f1695, 0f3F800000; rcp.approx.ftz.f32 %f1696, %f1694; mov.f32 %f1697, 0fC0000000; fma.rn.f32 %f1698, %f1696, %f1697, %f1695; setp.ge.f32 %p267, %f227, 0f41102CB4; selp.f32 %f1699, 0f3F800000, %f1698, %p267; mov.b32 %r223, %f1699; mov.b32 %r224, %f2688; and.b32 %r225, %r224, -2147483648; or.b32 %r226, %r225, %r223; mov.b32 %f2688, %r226; bra.uni $L__BB0_355; $L__BB0_341: mul.f32 %f1775, %f238, 0f4038AA3B; ex2.approx.ftz.f32 %f1776, %f1775; add.f32 %f1777, %f1776, 0f3F800000; mov.f32 %f1778, 0f3F800000; rcp.approx.ftz.f32 %f1779, %f1777; mov.f32 %f1780, 0fC0000000; fma.rn.f32 %f1781, %f1779, %f1780, %f1778; setp.ge.f32 %p272, %f238, 0f41102CB4; selp.f32 %f1782, 0f3F800000, %f1781, %p272; mov.b32 %r235, %f1782; mov.b32 %r236, %f237; and.b32 %r237, %r236, -2147483648; or.b32 %r238, %r237, %r235; mov.b32 %f239, %r238; mul.f32 %f2688, %f381, %f239; $L__BB0_355: @%p212 bra $L__BB0_365; setp.gt.s64 %p290, %rd69, 4; @%p290 bra $L__BB0_360; bra.uni $L__BB0_357; $L__BB0_360: setp.gt.s64 %p291, %rd69, 7; @%p291 bra $L__BB0_363; setp.eq.s64 %p294, %rd69, 5; @%p294 bra $L__BB0_393; setp.eq.s64 %p295, %rd69, 7; @%p295 bra $L__BB0_394; bra.uni $L__BB0_398; $L__BB0_365: setp.gt.s64 %p281, %rd69, 13; @%p281 bra $L__BB0_369; bra.uni $L__BB0_366; $L__BB0_369: setp.gt.s64 %p282, %rd69, 15; @%p282 bra $L__BB0_372; setp.eq.s64 %p285, %rd69, 14; @%p285 bra $L__BB0_379; setp.eq.s64 %p286, %rd69, 15; @%p286 bra $L__BB0_378; bra.uni $L__BB0_398; $L__BB0_378: abs.f32 %f1895, %f2691; add.f32 %f1896, %f1895, 0f3F800000; div.rn.f32 %f2691, %f2691, %f1896; bra.uni $L__BB0_398; $L__BB0_357: setp.eq.s64 %p296, %rd69, 0; @%p296 bra $L__BB0_396; setp.eq.s64 %p297, %rd69, 2; @%p297 bra $L__BB0_395; setp.eq.s64 %p298, %rd69, 4; @%p298 bra $L__BB0_394; bra.uni $L__BB0_398; $L__BB0_394: setp.lt.f32 %p312, %f2691, 0f00000000; mul.f32 %f2041, %f381, %f2691; selp.f32 %f2691, %f2041, %f2691, %p312; bra.uni $L__BB0_398; $L__BB0_366: setp.eq.s64 %p287, %rd69, 10; @%p287 bra $L__BB0_386; setp.eq.s64 %p288, %rd69, 11; @%p288 bra $L__BB0_383; setp.eq.s64 %p289, %rd69, 12; @%p289 bra $L__BB0_382; bra.uni $L__BB0_398; $L__BB0_382: neg.f32 %f1943, %f2691; mov.f32 %f1944, 0f3F000000; mov.f32 %f1945, 0f3BBB989D; fma.rn.f32 %f1946, %f1943, %f1945, %f1944; mov.f32 %f1947, 0f3FB8AA3B; mov.f32 %f1948, 0f437C0000; cvt.sat.f32.f32 %f1949, %f1946; mov.f32 %f1950, 0f4B400001; fma.rm.f32 %f1951, %f1949, %f1948, %f1950; add.f32 %f1952, %f1951, 0fCB40007F; neg.f32 %f1953, %f1952; fma.rn.f32 %f1954, %f1943, %f1947, %f1953; mov.f32 %f1955, 0f32A57060; fma.rn.f32 %f1956, %f1943, %f1955, %f1954; mov.b32 %r259, %f1951; shl.b32 %r260, %r259, 23; mov.b32 %f1957, %r260; ex2.approx.ftz.f32 %f1958, %f1956; fma.rn.f32 %f1959, %f1958, %f1957, 0f3F800000; rcp.rn.f32 %f2691, %f1959; bra.uni $L__BB0_398; $L__BB0_363: setp.eq.s64 %p292, %rd69, 8; @%p292 bra $L__BB0_390; setp.eq.s64 %p293, %rd69, 9; @%p293 bra $L__BB0_389; bra.uni $L__BB0_398; $L__BB0_389: setp.lt.f32 %p308, %f2691, 0f00000000; selp.f32 %f2691, 0f00000000, %f2691, %p308; bra.uni $L__BB0_398; $L__BB0_372: setp.eq.s64 %p283, %rd69, 16; @%p283 bra $L__BB0_375; setp.ne.s64 %p284, %rd69, 17; @%p284 bra $L__BB0_398; setp.ge.f32 %p299, %f381, %f2691; selp.f32 %f2691, 0f00000000, %f2691, %p299; bra.uni $L__BB0_398; $L__BB0_393: fma.rn.f32 %f2691, %f381, %f2691, %f382; bra.uni $L__BB0_398; $L__BB0_379: mul.f32 %f1897, %f381, %f2691; mov.f32 %f1898, 0f3F000000; mov.f32 %f1899, 0f3BBB989D; fma.rn.f32 %f1900, %f1897, %f1899, %f1898; mov.f32 %f1901, 0f3FB8AA3B; mov.f32 %f1902, 0f437C0000; cvt.sat.f32.f32 %f1903, %f1900; mov.f32 %f1904, 0f4B400001; fma.rm.f32 %f1905, %f1903, %f1902, %f1904; add.f32 %f1906, %f1905, 0fCB40007F; neg.f32 %f1907, %f1906; fma.rn.f32 %f1908, %f1897, %f1901, %f1907; mov.f32 %f1909, 0f32A57060; fma.rn.f32 %f1910, %f1897, %f1909, %f1908; mov.b32 %r253, %f1905; shl.b32 %r254, %r253, 23; mov.b32 %f1911, %r254; ex2.approx.ftz.f32 %f1912, %f1910; fma.rn.f32 %f1913, %f1912, %f1911, 0f3F800000; setp.lt.f32 %p302, %f1913, 0f00800000; mul.f32 %f1914, %f1913, 0f4B000000; selp.f32 %f261, %f1914, %f1913, %p302; selp.f32 %f1915, 0fC1B80000, 0f00000000, %p302; mov.b32 %r255, %f261; add.s32 %r256, %r255, -1059760811; and.b32 %r257, %r256, -8388608; sub.s32 %r258, %r255, %r257; mov.b32 %f1916, %r258; cvt.rn.f32.s32 %f1917, %r257; mov.f32 %f1918, 0f34000000; fma.rn.f32 %f1919, %f1917, %f1918, %f1915; add.f32 %f1920, %f1916, 0fBF800000; mov.f32 %f1921, 0f3E1039F6; mov.f32 %f1922, 0fBE055027; fma.rn.f32 %f1923, %f1922, %f1920, %f1921; mov.f32 %f1924, 0fBDF8CDCC; fma.rn.f32 %f1925, %f1923, %f1920, %f1924; mov.f32 %f1926, 0f3E0F2955; fma.rn.f32 %f1927, %f1925, %f1920, %f1926; mov.f32 %f1928, 0fBE2AD8B9; fma.rn.f32 %f1929, %f1927, %f1920, %f1928; mov.f32 %f1930, 0f3E4CED0B; fma.rn.f32 %f1931, %f1929, %f1920, %f1930; mov.f32 %f1932, 0fBE7FFF22; fma.rn.f32 %f1933, %f1931, %f1920, %f1932; mov.f32 %f1934, 0f3EAAAA78; fma.rn.f32 %f1935, %f1933, %f1920, %f1934; mov.f32 %f1936, 0fBF000000; fma.rn.f32 %f1937, %f1935, %f1920, %f1936; mul.f32 %f1938, %f1937, %f1920; fma.rn.f32 %f1939, %f1938, %f1920, %f1920; mov.f32 %f1940, 0f3F317218; fma.rn.f32 %f2689, %f1919, %f1940, %f1939; setp.lt.u32 %p303, %r255, 2139095040; @%p303 bra $L__BB0_381; mov.f32 %f1941, 0f7F800000; fma.rn.f32 %f2689, %f261, %f1941, %f1941; $L__BB0_381: setp.eq.f32 %p304, %f261, 0f00000000; selp.f32 %f1942, 0fFF800000, %f2689, %p304; div.rn.f32 %f2691, %f1942, %f381; bra.uni $L__BB0_398; $L__BB0_396: setp.geu.f32 %p313, %f2691, 0f00000000; @%p313 bra $L__BB0_398; mov.f32 %f2046, 0f3F000000; mov.f32 %f2047, 0f3BBB989D; fma.rn.f32 %f2048, %f2691, %f2047, %f2046; mov.f32 %f2049, 0f3FB8AA3B; mov.f32 %f2050, 0f437C0000; cvt.sat.f32.f32 %f2051, %f2048; mov.f32 %f2052, 0f4B400001; fma.rm.f32 %f2053, %f2051, %f2050, %f2052; add.f32 %f2054, %f2053, 0fCB40007F; neg.f32 %f2055, %f2054; fma.rn.f32 %f2056, %f2691, %f2049, %f2055; mov.f32 %f2057, 0f32A57060; fma.rn.f32 %f2058, %f2691, %f2057, %f2056; mov.b32 %r273, %f2053; shl.b32 %r274, %r273, 23; mov.b32 %f2059, %r274; ex2.approx.ftz.f32 %f2060, %f2058; fma.rn.f32 %f2061, %f2060, %f2059, 0fBF800000; mul.f32 %f2691, %f381, %f2061; bra.uni $L__BB0_398; $L__BB0_386: setp.gt.f32 %p307, %f2691, 0f00000000; @%p307 bra $L__BB0_388; bra.uni $L__BB0_387; $L__BB0_388: mul.f32 %f2691, %f382, %f2691; bra.uni $L__BB0_398; $L__BB0_390: mul.f32 %f1995, %f382, %f2691; mov.f32 %f1996, 0f3F000000; mov.f32 %f1997, 0f3BBB989D; fma.rn.f32 %f1998, %f1995, %f1997, %f1996; mov.f32 %f1999, 0f3FB8AA3B; mov.f32 %f2000, 0f437C0000; cvt.sat.f32.f32 %f2001, %f1998; mov.f32 %f2002, 0f4B400001; fma.rm.f32 %f2003, %f2001, %f2000, %f2002; add.f32 %f2004, %f2003, 0fCB40007F; neg.f32 %f2005, %f2004; fma.rn.f32 %f2006, %f1995, %f1999, %f2005; mov.f32 %f2007, 0f32A57060; fma.rn.f32 %f2008, %f1995, %f2007, %f2006; mov.b32 %r267, %f2003; shl.b32 %r268, %r267, 23; mov.b32 %f2009, %r268; ex2.approx.ftz.f32 %f2010, %f2008; fma.rn.f32 %f2011, %f2010, %f2009, 0f3F800000; setp.lt.f32 %p309, %f2011, 0f00800000; mul.f32 %f2012, %f2011, 0f4B000000; selp.f32 %f276, %f2012, %f2011, %p309; selp.f32 %f2013, 0fC1B80000, 0f00000000, %p309; mov.b32 %r269, %f276; add.s32 %r270, %r269, -1059760811; and.b32 %r271, %r270, -8388608; sub.s32 %r272, %r269, %r271; mov.b32 %f2014, %r272; cvt.rn.f32.s32 %f2015, %r271; mov.f32 %f2016, 0f34000000; fma.rn.f32 %f2017, %f2015, %f2016, %f2013; add.f32 %f2018, %f2014, 0fBF800000; mov.f32 %f2019, 0f3E1039F6; mov.f32 %f2020, 0fBE055027; fma.rn.f32 %f2021, %f2020, %f2018, %f2019; mov.f32 %f2022, 0fBDF8CDCC; fma.rn.f32 %f2023, %f2021, %f2018, %f2022; mov.f32 %f2024, 0f3E0F2955; fma.rn.f32 %f2025, %f2023, %f2018, %f2024; mov.f32 %f2026, 0fBE2AD8B9; fma.rn.f32 %f2027, %f2025, %f2018, %f2026; mov.f32 %f2028, 0f3E4CED0B; fma.rn.f32 %f2029, %f2027, %f2018, %f2028; mov.f32 %f2030, 0fBE7FFF22; fma.rn.f32 %f2031, %f2029, %f2018, %f2030; mov.f32 %f2032, 0f3EAAAA78; fma.rn.f32 %f2033, %f2031, %f2018, %f2032; mov.f32 %f2034, 0fBF000000; fma.rn.f32 %f2035, %f2033, %f2018, %f2034; mul.f32 %f2036, %f2035, %f2018; fma.rn.f32 %f2037, %f2036, %f2018, %f2018; mov.f32 %f2038, 0f3F317218; fma.rn.f32 %f2690, %f2017, %f2038, %f2037; setp.lt.u32 %p310, %r269, 2139095040; @%p310 bra $L__BB0_392; mov.f32 %f2039, 0f7F800000; fma.rn.f32 %f2690, %f276, %f2039, %f2039; $L__BB0_392: setp.eq.f32 %p311, %f276, 0f00000000; selp.f32 %f2040, 0fFF800000, %f2690, %p311; mul.f32 %f2691, %f381, %f2040; bra.uni $L__BB0_398; $L__BB0_375: abs.f32 %f257, %f2691; setp.ltu.f32 %p300, %f257, 0f3F19999A; @%p300 bra $L__BB0_377; bra.uni $L__BB0_376; $L__BB0_377: mul.f32 %f1885, %f2691, %f2691; mov.f32 %f1886, 0fBD563CAE; mov.f32 %f1887, 0f3C80F082; fma.rn.f32 %f1888, %f1887, %f1885, %f1886; mov.f32 %f1889, 0f3E085941; fma.rn.f32 %f1890, %f1888, %f1885, %f1889; mov.f32 %f1891, 0fBEAAA9ED; fma.rn.f32 %f1892, %f1890, %f1885, %f1891; mov.f32 %f1893, 0f00000000; fma.rn.f32 %f1894, %f1892, %f1885, %f1893; fma.rn.f32 %f2691, %f1894, %f2691, %f2691; bra.uni $L__BB0_398; $L__BB0_395: fma.rn.f32 %f2042, %f381, %f2691, %f382; mov.f32 %f2043, 0f3F800000; min.f32 %f2044, %f2043, %f2042; mov.f32 %f2045, 0f00000000; max.f32 %f2691, %f2045, %f2044; bra.uni $L__BB0_398; $L__BB0_383: mul.f32 %f267, %f382, %f2691; abs.f32 %f268, %f267; setp.ltu.f32 %p305, %f268, 0f3F19999A; @%p305 bra $L__BB0_385; bra.uni $L__BB0_384; $L__BB0_385: mul.f32 %f1968, %f267, %f267; mov.f32 %f1969, 0fBD563CAE; mov.f32 %f1970, 0f3C80F082; fma.rn.f32 %f1971, %f1970, %f1968, %f1969; mov.f32 %f1972, 0f3E085941; fma.rn.f32 %f1973, %f1971, %f1968, %f1972; mov.f32 %f1974, 0fBEAAA9ED; fma.rn.f32 %f1975, %f1973, %f1968, %f1974; mov.f32 %f1976, 0f00000000; fma.rn.f32 %f1977, %f1975, %f1968, %f1976; fma.rn.f32 %f270, %f1977, %f267, %f267; mul.f32 %f2691, %f381, %f270; bra.uni $L__BB0_398; $L__BB0_387: mul.f32 %f1978, %f382, %f381; mov.f32 %f1979, 0f3F000000; mov.f32 %f1980, 0f3BBB989D; fma.rn.f32 %f1981, %f2691, %f1980, %f1979; mov.f32 %f1982, 0f3FB8AA3B; mov.f32 %f1983, 0f437C0000; cvt.sat.f32.f32 %f1984, %f1981; mov.f32 %f1985, 0f4B400001; fma.rm.f32 %f1986, %f1984, %f1983, %f1985; add.f32 %f1987, %f1986, 0fCB40007F; neg.f32 %f1988, %f1987; fma.rn.f32 %f1989, %f2691, %f1982, %f1988; mov.f32 %f1990, 0f32A57060; fma.rn.f32 %f1991, %f2691, %f1990, %f1989; mov.b32 %r265, %f1986; shl.b32 %r266, %r265, 23; mov.b32 %f1992, %r266; ex2.approx.ftz.f32 %f1993, %f1991; fma.rn.f32 %f1994, %f1993, %f1992, 0fBF800000; mul.f32 %f2691, %f1978, %f1994; bra.uni $L__BB0_398; $L__BB0_376: mul.f32 %f1877, %f257, 0f4038AA3B; ex2.approx.ftz.f32 %f1878, %f1877; add.f32 %f1879, %f1878, 0f3F800000; mov.f32 %f1880, 0f3F800000; rcp.approx.ftz.f32 %f1881, %f1879; mov.f32 %f1882, 0fC0000000; fma.rn.f32 %f1883, %f1881, %f1882, %f1880; setp.ge.f32 %p301, %f257, 0f41102CB4; selp.f32 %f1884, 0f3F800000, %f1883, %p301; mov.b32 %r249, %f1884; mov.b32 %r250, %f2691; and.b32 %r251, %r250, -2147483648; or.b32 %r252, %r251, %r249; mov.b32 %f2691, %r252; bra.uni $L__BB0_398; $L__BB0_384: mul.f32 %f1960, %f268, 0f4038AA3B; ex2.approx.ftz.f32 %f1961, %f1960; add.f32 %f1962, %f1961, 0f3F800000; mov.f32 %f1963, 0f3F800000; rcp.approx.ftz.f32 %f1964, %f1962; mov.f32 %f1965, 0fC0000000; fma.rn.f32 %f1966, %f1964, %f1965, %f1963; setp.ge.f32 %p306, %f268, 0f41102CB4; selp.f32 %f1967, 0f3F800000, %f1966, %p306; mov.b32 %r261, %f1967; mov.b32 %r262, %f267; and.b32 %r263, %r262, -2147483648; or.b32 %r264, %r263, %r261; mov.b32 %f269, %r264; mul.f32 %f2691, %f381, %f269; $L__BB0_398: @%p212 bra $L__BB0_408; setp.gt.s64 %p324, %rd69, 4; @%p324 bra $L__BB0_403; bra.uni $L__BB0_400; $L__BB0_403: setp.gt.s64 %p325, %rd69, 7; @%p325 bra $L__BB0_406; setp.eq.s64 %p328, %rd69, 5; @%p328 bra $L__BB0_436; setp.eq.s64 %p329, %rd69, 7; @%p329 bra $L__BB0_437; bra.uni $L__BB0_441; $L__BB0_408: setp.gt.s64 %p315, %rd69, 13; @%p315 bra $L__BB0_412; bra.uni $L__BB0_409; $L__BB0_412: setp.gt.s64 %p316, %rd69, 15; @%p316 bra $L__BB0_415; setp.eq.s64 %p319, %rd69, 14; @%p319 bra $L__BB0_422; setp.eq.s64 %p320, %rd69, 15; @%p320 bra $L__BB0_421; bra.uni $L__BB0_441; $L__BB0_421: abs.f32 %f2080, %f2694; add.f32 %f2081, %f2080, 0f3F800000; div.rn.f32 %f2694, %f2694, %f2081; bra.uni $L__BB0_441; $L__BB0_400: setp.eq.s64 %p330, %rd69, 0; @%p330 bra $L__BB0_439; setp.eq.s64 %p331, %rd69, 2; @%p331 bra $L__BB0_438; setp.eq.s64 %p332, %rd69, 4; @%p332 bra $L__BB0_437; bra.uni $L__BB0_441; $L__BB0_437: setp.lt.f32 %p346, %f2694, 0f00000000; mul.f32 %f2226, %f381, %f2694; selp.f32 %f2694, %f2226, %f2694, %p346; bra.uni $L__BB0_441; $L__BB0_409: setp.eq.s64 %p321, %rd69, 10; @%p321 bra $L__BB0_429; setp.eq.s64 %p322, %rd69, 11; @%p322 bra $L__BB0_426; setp.eq.s64 %p323, %rd69, 12; @%p323 bra $L__BB0_425; bra.uni $L__BB0_441; $L__BB0_425: neg.f32 %f2128, %f2694; mov.f32 %f2129, 0f3F000000; mov.f32 %f2130, 0f3BBB989D; fma.rn.f32 %f2131, %f2128, %f2130, %f2129; mov.f32 %f2132, 0f3FB8AA3B; mov.f32 %f2133, 0f437C0000; cvt.sat.f32.f32 %f2134, %f2131; mov.f32 %f2135, 0f4B400001; fma.rm.f32 %f2136, %f2134, %f2133, %f2135; add.f32 %f2137, %f2136, 0fCB40007F; neg.f32 %f2138, %f2137; fma.rn.f32 %f2139, %f2128, %f2132, %f2138; mov.f32 %f2140, 0f32A57060; fma.rn.f32 %f2141, %f2128, %f2140, %f2139; mov.b32 %r285, %f2136; shl.b32 %r286, %r285, 23; mov.b32 %f2142, %r286; ex2.approx.ftz.f32 %f2143, %f2141; fma.rn.f32 %f2144, %f2143, %f2142, 0f3F800000; rcp.rn.f32 %f2694, %f2144; bra.uni $L__BB0_441; $L__BB0_406: setp.eq.s64 %p326, %rd69, 8; @%p326 bra $L__BB0_433; setp.eq.s64 %p327, %rd69, 9; @%p327 bra $L__BB0_432; bra.uni $L__BB0_441; $L__BB0_432: setp.lt.f32 %p342, %f2694, 0f00000000; selp.f32 %f2694, 0f00000000, %f2694, %p342; bra.uni $L__BB0_441; $L__BB0_415: setp.eq.s64 %p317, %rd69, 16; @%p317 bra $L__BB0_418; setp.ne.s64 %p318, %rd69, 17; @%p318 bra $L__BB0_441; setp.ge.f32 %p333, %f381, %f2694; selp.f32 %f2694, 0f00000000, %f2694, %p333; bra.uni $L__BB0_441; $L__BB0_436: fma.rn.f32 %f2694, %f381, %f2694, %f382; bra.uni $L__BB0_441; $L__BB0_422: mul.f32 %f2082, %f381, %f2694; mov.f32 %f2083, 0f3F000000; mov.f32 %f2084, 0f3BBB989D; fma.rn.f32 %f2085, %f2082, %f2084, %f2083; mov.f32 %f2086, 0f3FB8AA3B; mov.f32 %f2087, 0f437C0000; cvt.sat.f32.f32 %f2088, %f2085; mov.f32 %f2089, 0f4B400001; fma.rm.f32 %f2090, %f2088, %f2087, %f2089; add.f32 %f2091, %f2090, 0fCB40007F; neg.f32 %f2092, %f2091; fma.rn.f32 %f2093, %f2082, %f2086, %f2092; mov.f32 %f2094, 0f32A57060; fma.rn.f32 %f2095, %f2082, %f2094, %f2093; mov.b32 %r279, %f2090; shl.b32 %r280, %r279, 23; mov.b32 %f2096, %r280; ex2.approx.ftz.f32 %f2097, %f2095; fma.rn.f32 %f2098, %f2097, %f2096, 0f3F800000; setp.lt.f32 %p336, %f2098, 0f00800000; mul.f32 %f2099, %f2098, 0f4B000000; selp.f32 %f291, %f2099, %f2098, %p336; selp.f32 %f2100, 0fC1B80000, 0f00000000, %p336; mov.b32 %r281, %f291; add.s32 %r282, %r281, -1059760811; and.b32 %r283, %r282, -8388608; sub.s32 %r284, %r281, %r283; mov.b32 %f2101, %r284; cvt.rn.f32.s32 %f2102, %r283; mov.f32 %f2103, 0f34000000; fma.rn.f32 %f2104, %f2102, %f2103, %f2100; add.f32 %f2105, %f2101, 0fBF800000; mov.f32 %f2106, 0f3E1039F6; mov.f32 %f2107, 0fBE055027; fma.rn.f32 %f2108, %f2107, %f2105, %f2106; mov.f32 %f2109, 0fBDF8CDCC; fma.rn.f32 %f2110, %f2108, %f2105, %f2109; mov.f32 %f2111, 0f3E0F2955; fma.rn.f32 %f2112, %f2110, %f2105, %f2111; mov.f32 %f2113, 0fBE2AD8B9; fma.rn.f32 %f2114, %f2112, %f2105, %f2113; mov.f32 %f2115, 0f3E4CED0B; fma.rn.f32 %f2116, %f2114, %f2105, %f2115; mov.f32 %f2117, 0fBE7FFF22; fma.rn.f32 %f2118, %f2116, %f2105, %f2117; mov.f32 %f2119, 0f3EAAAA78; fma.rn.f32 %f2120, %f2118, %f2105, %f2119; mov.f32 %f2121, 0fBF000000; fma.rn.f32 %f2122, %f2120, %f2105, %f2121; mul.f32 %f2123, %f2122, %f2105; fma.rn.f32 %f2124, %f2123, %f2105, %f2105; mov.f32 %f2125, 0f3F317218; fma.rn.f32 %f2692, %f2104, %f2125, %f2124; setp.lt.u32 %p337, %r281, 2139095040; @%p337 bra $L__BB0_424; mov.f32 %f2126, 0f7F800000; fma.rn.f32 %f2692, %f291, %f2126, %f2126; $L__BB0_424: setp.eq.f32 %p338, %f291, 0f00000000; selp.f32 %f2127, 0fFF800000, %f2692, %p338; div.rn.f32 %f2694, %f2127, %f381; bra.uni $L__BB0_441; $L__BB0_439: setp.geu.f32 %p347, %f2694, 0f00000000; @%p347 bra $L__BB0_441; mov.f32 %f2231, 0f3F000000; mov.f32 %f2232, 0f3BBB989D; fma.rn.f32 %f2233, %f2694, %f2232, %f2231; mov.f32 %f2234, 0f3FB8AA3B; mov.f32 %f2235, 0f437C0000; cvt.sat.f32.f32 %f2236, %f2233; mov.f32 %f2237, 0f4B400001; fma.rm.f32 %f2238, %f2236, %f2235, %f2237; add.f32 %f2239, %f2238, 0fCB40007F; neg.f32 %f2240, %f2239; fma.rn.f32 %f2241, %f2694, %f2234, %f2240; mov.f32 %f2242, 0f32A57060; fma.rn.f32 %f2243, %f2694, %f2242, %f2241; mov.b32 %r299, %f2238; shl.b32 %r300, %r299, 23; mov.b32 %f2244, %r300; ex2.approx.ftz.f32 %f2245, %f2243; fma.rn.f32 %f2246, %f2245, %f2244, 0fBF800000; mul.f32 %f2694, %f381, %f2246; bra.uni $L__BB0_441; $L__BB0_429: setp.gt.f32 %p341, %f2694, 0f00000000; @%p341 bra $L__BB0_431; bra.uni $L__BB0_430; $L__BB0_431: mul.f32 %f2694, %f382, %f2694; bra.uni $L__BB0_441; $L__BB0_433: mul.f32 %f2180, %f382, %f2694; mov.f32 %f2181, 0f3F000000; mov.f32 %f2182, 0f3BBB989D; fma.rn.f32 %f2183, %f2180, %f2182, %f2181; mov.f32 %f2184, 0f3FB8AA3B; mov.f32 %f2185, 0f437C0000; cvt.sat.f32.f32 %f2186, %f2183; mov.f32 %f2187, 0f4B400001; fma.rm.f32 %f2188, %f2186, %f2185, %f2187; add.f32 %f2189, %f2188, 0fCB40007F; neg.f32 %f2190, %f2189; fma.rn.f32 %f2191, %f2180, %f2184, %f2190; mov.f32 %f2192, 0f32A57060; fma.rn.f32 %f2193, %f2180, %f2192, %f2191; mov.b32 %r293, %f2188; shl.b32 %r294, %r293, 23; mov.b32 %f2194, %r294; ex2.approx.ftz.f32 %f2195, %f2193; fma.rn.f32 %f2196, %f2195, %f2194, 0f3F800000; setp.lt.f32 %p343, %f2196, 0f00800000; mul.f32 %f2197, %f2196, 0f4B000000; selp.f32 %f306, %f2197, %f2196, %p343; selp.f32 %f2198, 0fC1B80000, 0f00000000, %p343; mov.b32 %r295, %f306; add.s32 %r296, %r295, -1059760811; and.b32 %r297, %r296, -8388608; sub.s32 %r298, %r295, %r297; mov.b32 %f2199, %r298; cvt.rn.f32.s32 %f2200, %r297; mov.f32 %f2201, 0f34000000; fma.rn.f32 %f2202, %f2200, %f2201, %f2198; add.f32 %f2203, %f2199, 0fBF800000; mov.f32 %f2204, 0f3E1039F6; mov.f32 %f2205, 0fBE055027; fma.rn.f32 %f2206, %f2205, %f2203, %f2204; mov.f32 %f2207, 0fBDF8CDCC; fma.rn.f32 %f2208, %f2206, %f2203, %f2207; mov.f32 %f2209, 0f3E0F2955; fma.rn.f32 %f2210, %f2208, %f2203, %f2209; mov.f32 %f2211, 0fBE2AD8B9; fma.rn.f32 %f2212, %f2210, %f2203, %f2211; mov.f32 %f2213, 0f3E4CED0B; fma.rn.f32 %f2214, %f2212, %f2203, %f2213; mov.f32 %f2215, 0fBE7FFF22; fma.rn.f32 %f2216, %f2214, %f2203, %f2215; mov.f32 %f2217, 0f3EAAAA78; fma.rn.f32 %f2218, %f2216, %f2203, %f2217; mov.f32 %f2219, 0fBF000000; fma.rn.f32 %f2220, %f2218, %f2203, %f2219; mul.f32 %f2221, %f2220, %f2203; fma.rn.f32 %f2222, %f2221, %f2203, %f2203; mov.f32 %f2223, 0f3F317218; fma.rn.f32 %f2693, %f2202, %f2223, %f2222; setp.lt.u32 %p344, %r295, 2139095040; @%p344 bra $L__BB0_435; mov.f32 %f2224, 0f7F800000; fma.rn.f32 %f2693, %f306, %f2224, %f2224; $L__BB0_435: setp.eq.f32 %p345, %f306, 0f00000000; selp.f32 %f2225, 0fFF800000, %f2693, %p345; mul.f32 %f2694, %f381, %f2225; bra.uni $L__BB0_441; $L__BB0_418: abs.f32 %f287, %f2694; setp.ltu.f32 %p334, %f287, 0f3F19999A; @%p334 bra $L__BB0_420; bra.uni $L__BB0_419; $L__BB0_420: mul.f32 %f2070, %f2694, %f2694; mov.f32 %f2071, 0fBD563CAE; mov.f32 %f2072, 0f3C80F082; fma.rn.f32 %f2073, %f2072, %f2070, %f2071; mov.f32 %f2074, 0f3E085941; fma.rn.f32 %f2075, %f2073, %f2070, %f2074; mov.f32 %f2076, 0fBEAAA9ED; fma.rn.f32 %f2077, %f2075, %f2070, %f2076; mov.f32 %f2078, 0f00000000; fma.rn.f32 %f2079, %f2077, %f2070, %f2078; fma.rn.f32 %f2694, %f2079, %f2694, %f2694; bra.uni $L__BB0_441; $L__BB0_438: fma.rn.f32 %f2227, %f381, %f2694, %f382; mov.f32 %f2228, 0f3F800000; min.f32 %f2229, %f2228, %f2227; mov.f32 %f2230, 0f00000000; max.f32 %f2694, %f2230, %f2229; bra.uni $L__BB0_441; $L__BB0_426: mul.f32 %f297, %f382, %f2694; abs.f32 %f298, %f297; setp.ltu.f32 %p339, %f298, 0f3F19999A; @%p339 bra $L__BB0_428; bra.uni $L__BB0_427; $L__BB0_428: mul.f32 %f2153, %f297, %f297; mov.f32 %f2154, 0fBD563CAE; mov.f32 %f2155, 0f3C80F082; fma.rn.f32 %f2156, %f2155, %f2153, %f2154; mov.f32 %f2157, 0f3E085941; fma.rn.f32 %f2158, %f2156, %f2153, %f2157; mov.f32 %f2159, 0fBEAAA9ED; fma.rn.f32 %f2160, %f2158, %f2153, %f2159; mov.f32 %f2161, 0f00000000; fma.rn.f32 %f2162, %f2160, %f2153, %f2161; fma.rn.f32 %f300, %f2162, %f297, %f297; mul.f32 %f2694, %f381, %f300; bra.uni $L__BB0_441; $L__BB0_430: mul.f32 %f2163, %f382, %f381; mov.f32 %f2164, 0f3F000000; mov.f32 %f2165, 0f3BBB989D; fma.rn.f32 %f2166, %f2694, %f2165, %f2164; mov.f32 %f2167, 0f3FB8AA3B; mov.f32 %f2168, 0f437C0000; cvt.sat.f32.f32 %f2169, %f2166; mov.f32 %f2170, 0f4B400001; fma.rm.f32 %f2171, %f2169, %f2168, %f2170; add.f32 %f2172, %f2171, 0fCB40007F; neg.f32 %f2173, %f2172; fma.rn.f32 %f2174, %f2694, %f2167, %f2173; mov.f32 %f2175, 0f32A57060; fma.rn.f32 %f2176, %f2694, %f2175, %f2174; mov.b32 %r291, %f2171; shl.b32 %r292, %r291, 23; mov.b32 %f2177, %r292; ex2.approx.ftz.f32 %f2178, %f2176; fma.rn.f32 %f2179, %f2178, %f2177, 0fBF800000; mul.f32 %f2694, %f2163, %f2179; bra.uni $L__BB0_441; $L__BB0_419: mul.f32 %f2062, %f287, 0f4038AA3B; ex2.approx.ftz.f32 %f2063, %f2062; add.f32 %f2064, %f2063, 0f3F800000; mov.f32 %f2065, 0f3F800000; rcp.approx.ftz.f32 %f2066, %f2064; mov.f32 %f2067, 0fC0000000; fma.rn.f32 %f2068, %f2066, %f2067, %f2065; setp.ge.f32 %p335, %f287, 0f41102CB4; selp.f32 %f2069, 0f3F800000, %f2068, %p335; mov.b32 %r275, %f2069; mov.b32 %r276, %f2694; and.b32 %r277, %r276, -2147483648; or.b32 %r278, %r277, %r275; mov.b32 %f2694, %r278; bra.uni $L__BB0_441; $L__BB0_427: mul.f32 %f2145, %f298, 0f4038AA3B; ex2.approx.ftz.f32 %f2146, %f2145; add.f32 %f2147, %f2146, 0f3F800000; mov.f32 %f2148, 0f3F800000; rcp.approx.ftz.f32 %f2149, %f2147; mov.f32 %f2150, 0fC0000000; fma.rn.f32 %f2151, %f2149, %f2150, %f2148; setp.ge.f32 %p340, %f298, 0f41102CB4; selp.f32 %f2152, 0f3F800000, %f2151, %p340; mov.b32 %r287, %f2152; mov.b32 %r288, %f297; and.b32 %r289, %r288, -2147483648; or.b32 %r290, %r289, %r287; mov.b32 %f299, %r290; mul.f32 %f2694, %f381, %f299; $L__BB0_441: st.global.v4.f32 [%rd15], {%f2685, %f2688, %f2691, %f2694}; $L__BB0_586: ret; }