SSE routines

From vegard.wiki
Revision as of 08:54, 22 November 2022 by Vegard (talk | contribs) (add link from saidwho12)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

Boilerplate

#include <emmintrin.h>

Get length of string prefix consisting of letters and underscore

This can be useful for e.g. parsing identifiers.

The string does not need to be aligned, but beware that up to 15 bytes after the NUL terminator may be loaded from memory, so if these cross a page boundary you can get spurious page faults.

int nr_alpha(const char *s)
{
    const char *start = s;
    
    __m128i ch_A = _mm_set1_epi8('A' - 1);
    __m128i ch_Z = _mm_set1_epi8('Z');
    __m128i ch_underscore = _mm_set1_epi8('_');

    while (1) {
        __m128i ch = _mm_loadu_si128((__m128i *) s);
        __m128i b = _mm_and_si128(ch, _mm_set1_epi8(~0x20));
        b = _mm_or_si128(_mm_and_si128(_mm_cmpgt_epi8(b, ch_A),
                                       _mm_cmplt_epi8(b, ch_Z)),
                         _mm_cmpeq_epi8(ch, ch_underscore));

        unsigned int ffs = __builtin_ffs(~_mm_movemask_epi8(b)) - 1;
        s += ffs;

        if (ffs < 16)
            return s - start;
    }
}

Get length of string prefix consisting of whitespace

(See caveats above.)

int nr_whitespace(const char *s)
{
    const char *start = s;

    while (1) {
        __m128i ch = _mm_loadu_si128((__m128i *) s);
        __m128i b = _mm_or_si128(_mm_cmpeq_epi8(ch, _mm_set1_epi8(' ')),
                                 _mm_cmpeq_epi8(ch, _mm_set1_epi8('\t')));
        b = _mm_or_si128(b, _mm_cmpeq_epi8(ch, _mm_set1_epi8('\r')));
        b = _mm_or_si128(b, _mm_cmpeq_epi8(ch, _mm_set1_epi8('\n')));

        unsigned int ffs = __builtin_ffs(~_mm_movemask_epi8(b)) - 1;
        s += ffs;

        if (ffs < 16)
            return s - start;
    }
}

See also