Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps: update simdutf to 5.3.0 #53837

Merged
merged 1 commit into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 46 additions & 34 deletions deps/simdutf/simdutf.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2024-05-07 22:33:11 -0400. Do not edit! */
/* auto-generated on 2024-07-11 00:01:58 -0400. Do not edit! */
/* begin file src/simdutf.cpp */
#include "simdutf.h"
// We include base64_tables once.
Expand Down Expand Up @@ -1522,10 +1522,10 @@ template<>
struct simd16<bool>: base16<bool> {
static simdutf_really_inline simd16<bool> splat(bool _value) { return vmovq_n_u16(uint16_t(-(!!_value))); }

simdutf_really_inline simd16<bool>() : base16() {}
simdutf_really_inline simd16<bool>(const uint16x8_t _value) : base16<bool>(_value) {}
simdutf_really_inline simd16() : base16() {}
simdutf_really_inline simd16(const uint16x8_t _value) : base16<bool>(_value) {}
// Splat constructor
simdutf_really_inline simd16<bool>(bool _value) : base16<bool>(splat(_value)) {}
simdutf_really_inline simd16(bool _value) : base16<bool>(splat(_value)) {}

};

Expand Down Expand Up @@ -2832,10 +2832,10 @@ template<>
struct simd16<bool>: base16<bool> {
static simdutf_really_inline simd16<bool> splat(bool _value) { return _mm256_set1_epi16(uint16_t(-(!!_value))); }

simdutf_really_inline simd16<bool>() : base16() {}
simdutf_really_inline simd16<bool>(const __m256i _value) : base16<bool>(_value) {}
simdutf_really_inline simd16() : base16() {}
simdutf_really_inline simd16(const __m256i _value) : base16<bool>(_value) {}
// Splat constructor
simdutf_really_inline simd16<bool>(bool _value) : base16<bool>(splat(_value)) {}
simdutf_really_inline simd16(bool _value) : base16<bool>(splat(_value)) {}

simdutf_really_inline bitmask_type to_bitmask() const { return _mm256_movemask_epi8(*this); }
simdutf_really_inline bool any() const { return !_mm256_testz_si256(*this, *this); }
Expand Down Expand Up @@ -3803,10 +3803,10 @@ template<>
struct simd16<bool>: base16<bool> {
static simdutf_really_inline simd16<bool> splat(bool _value) { return _mm_set1_epi16(uint16_t(-(!!_value))); }

simdutf_really_inline simd16<bool>() : base16() {}
simdutf_really_inline simd16<bool>(const __m128i _value) : base16<bool>(_value) {}
simdutf_really_inline simd16() : base16() {}
simdutf_really_inline simd16(const __m128i _value) : base16<bool>(_value) {}
// Splat constructor
simdutf_really_inline simd16<bool>(bool _value) : base16<bool>(splat(_value)) {}
simdutf_really_inline simd16(bool _value) : base16<bool>(splat(_value)) {}

simdutf_really_inline int to_bitmask() const { return _mm_movemask_epi8(*this); }
simdutf_really_inline bool any() const { return !_mm_testz_si128(*this, *this); }
Expand Down Expand Up @@ -5807,6 +5807,13 @@ result base64_tail_decode_safe(char *dst, size_t& outlen, const char_type *src,
// Returns the number of bytes written. The destination buffer must be large
// enough. It will add padding (=) if needed.
size_t tail_encode_base64(char *dst, const char *src, size_t srclen, base64_options options) {
// By default, we use padding if we are not using the URL variant.
// This is check with ((options & base64_url) == 0) which returns true if we are not using the URL variant.
// However, we also allow 'inversion' of the convention with the base64_reverse_padding option.
// If the base64_reverse_padding option is set, we use padding if we are using the URL variant,
// and we omit it if we are not using the URL variant. This is checked with
// ((options & base64_reverse_padding) == base64_reverse_padding).
bool use_padding = ((options & base64_url) == 0) ^ ((options & base64_reverse_padding) == base64_reverse_padding);
// This looks like 3 branches, but we expect the compiler to resolve this to a single branch:
const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0 : tables::base64::base64_default::e0;
const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1 : tables::base64::base64_default::e1;
Expand All @@ -5830,7 +5837,7 @@ size_t tail_encode_base64(char *dst, const char *src, size_t srclen, base64_opti
t1 = uint8_t(src[i]);
*out++ = e0[t1];
*out++ = e1[(t1 & 0x03) << 4];
if((options & base64_url) == 0) {
if(use_padding) {
*out++ = '=';
*out++ = '=';
}
Expand All @@ -5841,7 +5848,7 @@ size_t tail_encode_base64(char *dst, const char *src, size_t srclen, base64_opti
*out++ = e0[t1];
*out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
*out++ = e2[(t2 & 0x0F) << 2];
if((options & base64_url) == 0) {
if(use_padding) {
*out++ = '=';
}
}
Expand Down Expand Up @@ -5869,7 +5876,14 @@ simdutf_warn_unused size_t maximal_binary_length_from_base64(const char_type * i
}

simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options) noexcept {
if(options & base64_url) {
// By default, we use padding if we are not using the URL variant.
// This is check with ((options & base64_url) == 0) which returns true if we are not using the URL variant.
// However, we also allow 'inversion' of the convention with the base64_reverse_padding option.
// If the base64_reverse_padding option is set, we use padding if we are using the URL variant,
// and we omit it if we are not using the URL variant. This is checked with
// ((options & base64_reverse_padding) == base64_reverse_padding).
bool use_padding = ((options & base64_url) == 0) ^ ((options & base64_reverse_padding) == base64_reverse_padding);
if(!use_padding) {
return length/3 * 4 + ((length % 3) ? (length % 3) + 1 : 0);
}
return (length + 2)/3 * 4; // We use padding to make the length a multiple of 4.
Expand Down Expand Up @@ -17055,8 +17069,6 @@ result compress_decode_base64(char *dst, const char_type *src, size_t srclen,
// can avoid the call to compress_block and decode directly.
copy_block(&b, bufferptr);
bufferptr += 64;
// base64_decode_block(dst, &b);
// dst += 48;
}
if (bufferptr >= (block_size - 1) * 64 + buffer) {
for (size_t i = 0; i < (block_size - 1); i++) {
Expand Down Expand Up @@ -27138,8 +27150,8 @@ simdutf_really_inline __m256i lookup_pshufb_improved(const __m256i input) {
return _mm256_add_epi8(result, input);
}

template <base64_options options>
size_t encode_base64(char *dst, const char *src, size_t srclen) {
template <bool isbase64url>
size_t encode_base64(char *dst, const char *src, size_t srclen, base64_options options) {
// credit: Wojciech Muła
const uint8_t *input = (const uint8_t *)src;

Expand Down Expand Up @@ -27206,18 +27218,18 @@ size_t encode_base64(char *dst, const char *src, size_t srclen) {
const __m256i input3 = _mm256_or_si256(t1_3, t3_3);

_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(input0));
lookup_pshufb_improved<isbase64url>(input0));
out += 32;

_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(input1));
lookup_pshufb_improved<isbase64url>(input1));
out += 32;

_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(input2));
lookup_pshufb_improved<isbase64url>(input2));
out += 32;
_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(input3));
lookup_pshufb_improved<isbase64url>(input3));
out += 32;
}
for (; i + 28 <= srclen; i += 24) {
Expand All @@ -27241,7 +27253,7 @@ size_t encode_base64(char *dst, const char *src, size_t srclen) {
const __m256i indices = _mm256_or_si256(t1, t3);

_mm256_storeu_si256(reinterpret_cast<__m256i *>(out),
lookup_pshufb_improved<options == base64_url>(indices));
lookup_pshufb_improved<isbase64url>(indices));
out += 32;
}
return i / 3 * 4 + scalar::base64::tail_encode_base64((char *)out, src + i,
Expand Down Expand Up @@ -30012,9 +30024,9 @@ simdutf_warn_unused size_t implementation::base64_length_from_binary(size_t leng

size_t implementation::binary_to_base64(const char * input, size_t length, char* output, base64_options options) const noexcept {
if(options & base64_url) {
return encode_base64<base64_url>(output, input, length);
return encode_base64<true>(output, input, length, options);
} else {
return encode_base64<base64_default>(output, input, length);
return encode_base64<false>(output, input, length, options);
}
}
} // namespace haswell
Expand Down Expand Up @@ -35675,8 +35687,8 @@ template <bool base64_url> __m128i lookup_pshufb_improved(const __m128i input) {
return _mm_add_epi8(result, input);
}

template <base64_options options>
size_t encode_base64(char *dst, const char *src, size_t srclen) {
template <bool isbase64url>
size_t encode_base64(char *dst, const char *src, size_t srclen, base64_options options) {
// credit: Wojciech Muła
// SSE (lookup: pshufb improved unrolled)
const uint8_t *input = (const uint8_t *)src;
Expand Down Expand Up @@ -35727,19 +35739,19 @@ size_t encode_base64(char *dst, const char *src, size_t srclen) {
const __m128i input3 = _mm_or_si128(t1_3, t3_3);

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(input0));
lookup_pshufb_improved<isbase64url>(input0));
out += 16;

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(input1));
lookup_pshufb_improved<isbase64url>(input1));
out += 16;

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(input2));
lookup_pshufb_improved<isbase64url>(input2));
out += 16;

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(input3));
lookup_pshufb_improved<isbase64url>(input3));
out += 16;
}
for (; i + 16 <= srclen; i += 12) {
Expand Down Expand Up @@ -35779,7 +35791,7 @@ size_t encode_base64(char *dst, const char *src, size_t srclen) {
const __m128i indices = _mm_or_si128(t1, t3);

_mm_storeu_si128(reinterpret_cast<__m128i *>(out),
lookup_pshufb_improved<options & base64_url>(indices));
lookup_pshufb_improved<isbase64url>(indices));
out += 16;
}

Expand Down Expand Up @@ -38555,10 +38567,10 @@ simdutf_warn_unused size_t implementation::base64_length_from_binary(size_t leng
}

size_t implementation::binary_to_base64(const char * input, size_t length, char* output, base64_options options) const noexcept {
if(options == base64_url) {
return encode_base64<base64_url>(output, input, length);
if(options & base64_url) {
return encode_base64<true>(output, input, length, options);
} else {
return encode_base64<base64_default>(output, input, length);
return encode_base64<false>(output, input, length, options);
}
}
} // namespace westmere
Expand Down
52 changes: 42 additions & 10 deletions deps/simdutf/simdutf.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2024-05-07 22:33:11 -0400. Do not edit! */
/* auto-generated on 2024-07-11 00:01:58 -0400. Do not edit! */
/* begin file include/simdutf.h */
#ifndef SIMDUTF_H
#define SIMDUTF_H
Expand Down Expand Up @@ -594,7 +594,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
#define SIMDUTF_SIMDUTF_VERSION_H

/** The version of simdutf being used (major.minor.revision) */
#define SIMDUTF_VERSION "5.2.8"
#define SIMDUTF_VERSION "5.3.0"

namespace simdutf {
enum {
Expand All @@ -605,11 +605,11 @@ enum {
/**
* The minor version (major.MINOR.revision) of simdutf being used.
*/
SIMDUTF_VERSION_MINOR = 2,
SIMDUTF_VERSION_MINOR = 3,
/**
* The revision (major.minor.REVISION) of simdutf being used.
*/
SIMDUTF_VERSION_REVISION = 8
SIMDUTF_VERSION_REVISION = 0
};
} // namespace simdutf

Expand Down Expand Up @@ -2300,9 +2300,13 @@ simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t leng

// base64_options are used to specify the base64 encoding options.
using base64_options = uint64_t;
using base64_options = uint64_t;
enum : base64_options {
base64_default = 0, /* standard base64 format */
base64_url = 1 /* base64url format*/
base64_default = 0, /* standard base64 format (with padding) */
base64_url = 1, /* base64url format (no padding) */
base64_reverse_padding = 2, /* modifier for base64_default and base64_url */
base64_default_no_padding = base64_default | base64_reverse_padding, /* standard base64 format without padding */
base64_url_with_padding = base64_url | base64_reverse_padding, /* base64url with padding */
};

/**
Expand Down Expand Up @@ -2345,6 +2349,12 @@ simdutf_warn_unused size_t maximal_binary_length_from_base64(const char16_t * in
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
* r.count contains the number of bytes decoded.
*
* The default option (simdutf::base64_default) expects the characters `+` and `/` as part of its alphabet.
* The URL option (simdutf::base64_url) expects the characters `-` and `_` as part of its alphabet.
*
* The padding (`=`) is validated if present. There may be at most two padding characters at the end of the input.
* If there are any padding characters, the total number of characters (excluding spaces but including padding characters) must be divisible by four.
*
* You should call this function with a buffer that is at least maximal_binary_length_from_base64(input, length) bytes long.
* If you fail to provide that much space, the function may cause a buffer overflow.
*
Expand All @@ -2365,8 +2375,13 @@ simdutf_warn_unused result base64_to_binary(const char * input, size_t length, c
simdutf_warn_unused size_t base64_length_from_binary(size_t length, base64_options options = base64_default) noexcept;

/**
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
* a multiple of 4 bytes long.
* Convert a binary input to a base64 ouput.
*
* The default option (simdutf::base64_default) uses the characters `+` and `/` as part of its alphabet.
* Further, it adds padding (`=`) at the end of the output to ensure that the output length is a multiple of four.
*
* The URL option (simdutf::base64_url) uses the characters `-` and `_` as part of its alphabet. No padding
* is added at the end of the output.
*
* This function always succeeds.
*
Expand Down Expand Up @@ -2396,6 +2411,12 @@ size_t binary_to_base64(const char * input, size_t length, char* output, base64_
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
* r.count contains the number of bytes decoded.
*
* The default option (simdutf::base64_default) expects the characters `+` and `/` as part of its alphabet.
* The URL option (simdutf::base64_url) expects the characters `-` and `_` as part of its alphabet.
*
* The padding (`=`) is validated if present. There may be at most two padding characters at the end of the input.
* If there are any padding characters, the total number of characters (excluding spaces but including padding characters) must be divisible by four.
*
* You should call this function with a buffer that is at least maximal_binary_length_from_utf6_base64(input, length) bytes long.
* If you fail to provide that much space, the function may cause a buffer overflow.
*
Expand Down Expand Up @@ -2429,6 +2450,12 @@ simdutf_warn_unused result base64_to_binary(const char16_t * input, size_t lengt
* where the invalid character was found. When the error is BASE64_INPUT_REMAINDER, then
* r.count contains the number of bytes decoded.
*
* The default option (simdutf::base64_default) expects the characters `+` and `/` as part of its alphabet.
* The URL option (simdutf::base64_url) expects the characters `-` and `_` as part of its alphabet.
*
* The padding (`=`) is validated if present. There may be at most two padding characters at the end of the input.
* If there are any padding characters, the total number of characters (excluding spaces but including padding characters) must be divisible by four.
*
* The INVALID_BASE64_CHARACTER cases are considered fatal and you are expected to discard
* the output.
*
Expand Down Expand Up @@ -3590,8 +3617,13 @@ class implementation {
simdutf_warn_unused virtual size_t base64_length_from_binary(size_t length, base64_options options = base64_default) const noexcept = 0;

/**
* Convert a binary input to a base64 ouput. The output is always padded with equal signs so that it is
* a multiple of 4 bytes long.
* Convert a binary input to a base64 ouput.
*
* The default option (simdutf::base64_default) uses the characters `+` and `/` as part of its alphabet.
* Further, it adds padding (`=`) at the end of the output to ensure that the output length is a multiple of four.
*
* The URL option (simdutf::base64_url) uses the characters `-` and `_` as part of its alphabet. No padding
* is added at the end of the output.
*
* This function always succeeds.
*
Expand Down
Loading