Move rotate/cntlz/cnttz helpers to Utilities/asm.h

This commit is contained in:
Nekotekina 2018-09-05 19:57:52 +03:00
parent ee96807305
commit ed9fb8405b
15 changed files with 242 additions and 197 deletions

171
Utilities/asm.h Normal file
View file

@ -0,0 +1,171 @@
#pragma once
#include "types.h"
namespace utils
{
inline u32 cntlz32(u32 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanReverse(&res, arg) || nonzero ? res ^ 31 : 32;
#else
return arg || nonzero ? __builtin_clz(arg) : 32;
#endif
}
inline u64 cntlz64(u64 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanReverse64(&res, arg) || nonzero ? res ^ 63 : 64;
#else
return arg || nonzero ? __builtin_clzll(arg) : 64;
#endif
}
inline u32 cnttz32(u32 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanForward(&res, arg) || nonzero ? res : 32;
#else
return arg || nonzero ? __builtin_ctz(arg) : 32;
#endif
}
inline u64 cnttz64(u64 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanForward64(&res, arg) || nonzero ? res : 64;
#else
return arg || nonzero ? __builtin_ctzll(arg) : 64;
#endif
}
// Rotate helpers
#if defined(__GNUG__)
inline u8 rol8(u8 x, u8 n)
{
u8 result = x;
__asm__("rolb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
}
inline u8 ror8(u8 x, u8 n)
{
u8 result = x;
__asm__("rorb %[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
}
inline u16 rol16(u16 x, u16 n)
{
u16 result = x;
__asm__("rolw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
}
inline u16 ror16(u16 x, u16 n)
{
u16 result = x;
__asm__("rorw %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
}
inline u32 rol32(u32 x, u32 n)
{
u32 result = x;
__asm__("roll %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
}
inline u32 ror32(u32 x, u32 n)
{
u32 result = x;
__asm__("rorl %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
}
inline u64 rol64(u64 x, u64 n)
{
u64 result = x;
__asm__("rolq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
}
inline u64 ror64(u64 x, u64 n)
{
u64 result = x;
__asm__("rorq %b[n], %[result]" : [result] "+g"(result) : [n] "c"(n));
return result;
}
inline u64 umulh64(u64 a, u64 b)
{
u64 result;
__asm__("mulq %[b]" : "=d"(result) : [a] "a"(a), [b] "rm"(b));
return result;
}
inline s64 mulh64(s64 a, s64 b)
{
s64 result;
__asm__("imulq %[b]" : "=d"(result) : [a] "a"(a), [b] "rm"(b));
return result;
}
#elif defined(_MSC_VER)
inline u8 rol8(u8 x, u8 n)
{
return _rotl8(x, n);
}
inline u8 ror8(u8 x, u8 n)
{
return _rotr8(x, n);
}
inline u16 rol16(u16 x, u16 n)
{
return _rotl16(x, (u8)n);
}
inline u16 ror16(u16 x, u16 n)
{
return _rotr16(x, (u8)n);
}
inline u32 rol32(u32 x, u32 n)
{
return _rotl(x, (int)n);
}
inline u32 ror32(u32 x, u32 n)
{
return _rotr(x, (int)n);
}
inline u64 rol64(u64 x, u64 n)
{
return _rotl64(x, (int)n);
}
inline u64 ror64(u64 x, u64 n)
{
return _rotr64(x, (int)n);
}
inline u64 umulh64(u64 x, u64 y)
{
return __umulh(x, y);
}
inline s64 mulh64(s64 x, s64 y)
{
return __mulh(x, y);
}
#endif
} // namespace utils

View file

@ -1,6 +1,7 @@
#pragma once
#include "types.h"
#include "asm.h"
#include <climits>
#include <string>
#include <vector>
@ -56,7 +57,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)
const auto write_octal = [&](u64 value, u64 min_num)
{
out.resize(out.size() + std::max<u64>(min_num, 66 / 3 - (cntlz64(value | 1, true) + 2) / 3), '0');
out.resize(out.size() + std::max<u64>(min_num, 66 / 3 - (utils::cntlz64(value | 1, true) + 2) / 3), '0');
// Write in reversed order
for (auto i = out.rbegin(); value; i++, value /= 8)
@ -67,8 +68,8 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)
const auto write_hex = [&](u64 value, bool upper, u64 min_num)
{
out.resize(out.size() + std::max<u64>(min_num, 64 / 4 - cntlz64(value | 1, true) / 4), '0');
out.resize(out.size() + std::max<u64>(min_num, 64 / 4 - utils::cntlz64(value | 1, true) / 4), '0');
// Write in reversed order
for (auto i = out.rbegin(); value; i++, value /= 16)
{
@ -141,7 +142,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)
{
ctx.width = read_decimal(ch - '0');
}
break;
}
@ -157,7 +158,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)
ctx.width = std::abs(warg);
ctx.left |= warg < 0;
}
break;
}
@ -307,7 +308,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)
const std::size_t start = out.size();
const std::size_t size1 = src.fmt_string(out, ctx.args);
if (ctx.dot && size1 > ctx.prec)
{
// Shrink if necessary
@ -576,7 +577,7 @@ std::size_t cfmt_append(Dst& out, const Char* fmt, Src&& src)
// Add padding if necessary
out.insert(ctx.left ? out.end() : out.begin() + start, ctx.width - size2, ' ');
}
src.skip(ctx.args);
ctx = {0};
break;

View file

@ -433,46 +433,6 @@ struct offset32_detail<T3 T4::*>
}
};
inline u32 cntlz32(u32 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanReverse(&res, arg) || nonzero ? res ^ 31 : 32;
#else
return arg || nonzero ? __builtin_clzll(arg) - 32 : 32;
#endif
}
inline u64 cntlz64(u64 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanReverse64(&res, arg) || nonzero ? res ^ 63 : 64;
#else
return arg || nonzero ? __builtin_clzll(arg) : 64;
#endif
}
inline u32 cnttz32(u32 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanForward(&res, arg) || nonzero ? res : 32;
#else
return arg || nonzero ? __builtin_ctzll(arg) : 32;
#endif
}
inline u64 cnttz64(u64 arg, bool nonzero = false)
{
#ifdef _MSC_VER
ulong res;
return _BitScanForward64(&res, arg) || nonzero ? res : 64;
#else
return arg || nonzero ? __builtin_ctzll(arg) : 64;
#endif
}
// Helper function, used by ""_u16, ""_u32, ""_u64
constexpr u8 to_u8(char c)
{
@ -848,89 +808,3 @@ inline void busy_wait(std::size_t cycles = 3000)
const u64 s = __rdtsc();
do _mm_pause(); while (__rdtsc() - s < cycles);
}
// Rotate helpers
#if defined(__GNUG__)
inline u8 rol8(u8 x, u8 n)
{
u8 result = x;
__asm__("rolb %[n], %[result]" : [result] "+g" (result) : [n] "c" (n));
return result;
}
inline u8 ror8(u8 x, u8 n)
{
u8 result = x;
__asm__("rorb %[n], %[result]" : [result] "+g" (result) : [n] "c" (n));
return result;
}
inline u16 rol16(u16 x, u16 n)
{
u16 result = x;
__asm__("rolw %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n));
return result;
}
inline u16 ror16(u16 x, u16 n)
{
u16 result = x;
__asm__("rorw %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n));
return result;
}
inline u32 rol32(u32 x, u32 n)
{
u32 result = x;
__asm__("roll %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n));
return result;
}
inline u32 ror32(u32 x, u32 n)
{
u32 result = x;
__asm__("rorl %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n));
return result;
}
inline u64 rol64(u64 x, u64 n)
{
u64 result = x;
__asm__("rolq %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n));
return result;
}
inline u64 ror64(u64 x, u64 n)
{
u64 result = x;
__asm__("rorq %b[n], %[result]" : [result] "+g" (result) : [n] "c" (n));
return result;
}
inline u64 umulh64(u64 a, u64 b)
{
u64 result;
__asm__("mulq %[b]" : "=d" (result) : [a] "a" (a), [b] "rm" (b));
return result;
}
inline s64 mulh64(s64 a, s64 b)
{
s64 result;
__asm__("imulq %[b]" : "=d" (result) : [a] "a" (a), [b] "rm" (b));
return result;
}
#elif defined(_MSC_VER)
inline u8 rol8(u8 x, u8 n) { return _rotl8(x, n); }
inline u8 ror8(u8 x, u8 n) { return _rotr8(x, n); }
inline u16 rol16(u16 x, u16 n) { return _rotl16(x, (u8)n); }
inline u16 ror16(u16 x, u16 n) { return _rotr16(x, (u8)n); }
inline u32 rol32(u32 x, u32 n) { return _rotl(x, (int)n); }
inline u32 ror32(u32 x, u32 n) { return _rotr(x, (int)n); }
inline u64 rol64(u64 x, u64 n) { return _rotl64(x, (int)n); }
inline u64 ror64(u64 x, u64 n) { return _rotr64(x, (int)n); }
inline u64 umulh64(u64 x, u64 y) { return __umulh(x, y); }
inline s64 mulh64(s64 x, s64 y) { return __mulh(x, y); }
#endif