| Summary: | _allshr in SDL_stdlib.c is not working properly | ||
|---|---|---|---|
| Product: | SDL | Reporter: | Mark Pizzolato <MarkPizzolato-libSDLbugs> |
| Component: | build | Assignee: | Sam Lantinga <slouken> |
| Status: | RESOLVED FIXED | QA Contact: | Sam Lantinga <slouken> |
| Severity: | blocker | ||
| Priority: | P2 | ||
| Version: | 2.0.5 | ||
| Hardware: | x86 | ||
| OS: | Windows 10 | ||
Fixed, thanks! https://hg.libsdl.org/SDL/rev/513f0e80a7dd |
On Windows with Visual Studio, when building SDL as a static library using the x86 (32bit) mode, several intrinsic operations are implemented in code in SDL_stdlib.c. One of these, _allshr() is not properly implemented and fails for some input. As a result, some operations on 64bit data elements (long long) don't always work. I classified this bug as a blocker since things absolutely don't work when the affected code is invoked. The affected code is only invoked when SDL is compiled in x86 mode on Visual Studio when building a SDL as a static library. This build environment isn't common, and hence the bug hasn't been noticed previously. I reopened #2537 and mentioned this problem and provided a fix. That fix is provided again here along with test code which could be added to some of the SDL test code. This test code verifies that the x86 intrinsic routines produce the same results as the native x64 instructions which these routines emulate under the Microsoft compiler. The point of the tests is to make sure that Visual Studio x86 code produces the same results as Visual Studio x64 code. Some of the arguments (or boundary conditions) may produce different results on other compiler environments, so the tests really shouldn't be run on all compilers. The test driver only actually exercised code when the compiler defines _MSC_VER, so the driver can generically be invoked without issue. Given the fact that, if 64bit operations don't produce correct results everywhere, running other tests might produce meaningless results, it might make sense for this test to be invoked by the SDLTest_harness before running any other tests. --- a/libSDL/SDL2-2.0.5/src/stdlib/SDL_stdlib.c +++ b/libSDL/SDL2-2.0.5/src/stdlib/SDL_stdlib.c @@ -365,19 +365,22 @@ integer_QnaN_or_zero: localexit: leave ret } /* *INDENT-ON* */ } +/* Static Compiles on Visual Studio 2008 also define _fto2_sse. Provide a way not to do it here */ +#if (_MSC_VER < 1500) void _ftol2_sse() { _ftol(); } +#endif /* _MSC_VER < 1500 */ /* 64-bit math operators for 32-bit systems */ void __declspec(naked) _allmul() { /* *INDENT-OFF* */ @@ -910,30 +913,30 @@ RETZERO: void __declspec(naked) _allshr() { /* *INDENT-OFF* */ __asm { - cmp cl,40h - jae RETZERO + cmp cl,3Fh + jae RETSIGN cmp cl,20h jae MORE32 shrd eax,edx,cl sar edx,cl ret MORE32: mov eax,edx - xor edx,edx + sar edx,1Fh and cl,1Fh sar eax,cl ret -RETZERO: - xor eax,eax - xor edx,edx +RETSIGN: + sar edx,1Fh + mov eax,edx ret } /* *INDENT-ON* */ } void __declspec(naked) ----------- static int TST_allmul (void *a, void *b, int arg, void *result, void *expected) { (*(long long *)result) = ((*(long long *)a) * (*(long long *)b)); return (*(long long *)result) == (*(long long *)expected); } static int TST_alldiv (void *a, void *b, int arg, void *result, void *expected) { (*(long long *)result) = ((*(long long *)a) / (*(long long *)b)); return (*(long long *)result) == (*(long long *)expected); } static int TST_allrem (void *a, void *b, int arg, void *result, void *expected) { (*(long long *)result) = ((*(long long *)a) % (*(long long *)b)); return (*(long long *)result) == (*(long long *)expected); } static int TST_ualldiv (void *a, void *b, int arg, void *result, void *expected) { (*(unsigned long long *)result) = ((*(unsigned long long *)a) / (*(unsigned long long *)b)); return (*(unsigned long long *)result) == (*(unsigned long long *)expected); } static int TST_uallrem (void *a, void *b, int arg, void *result, void *expected) { (*(unsigned long long *)result) = ((*(unsigned long long *)a) % (*(unsigned long long *)b)); return (*(unsigned long long *)result) == (*(unsigned long long *)expected); } static int TST_allshl (void *a, void *b, int arg, void *result, void *expected) { (*(long long *)result) = (*(long long *)a) << arg; return (*(long long *)result) == (*(long long *)expected); } static int TST_aullshl (void *a, void *b, int arg, void *result, void *expected) { (*(unsigned long long *)result) = (*(unsigned long long *)a) << arg; return (*(unsigned long long *)result) == (*(unsigned long long *)expected); } static int TST_allshr (void *a, void *b, int arg, void *result, void *expected) { (*(long long *)result) = (*(long long *)a) >> arg; return (*(long long *)result) == (*(long long *)expected); } static int TST_aullshr (void *a, void *b, int arg, void *result, void *expected) { (*(unsigned long long *)result) = (*(unsigned long long *)a) >> arg; return (*(unsigned long long *)result) == (*(unsigned long long *)expected); } typedef int (*LL_Intrinsic)(void *a, void *b, int arg, void *result, void *expected); typedef struct { const char *operation; LL_Intrinsic routine; unsigned long long a, b; int arg; unsigned long long expected_result; } LL_Test; static LL_Test LL_Tests[] = { {"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 65, 0x0000000000000000ll}, {"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 1, 0xFFFFFFFFFFFFFFFEll}, {"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 32, 0xFFFFFFFF00000000ll}, {"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 33, 0xFFFFFFFE00000000ll}, {"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 0, 0xFFFFFFFFFFFFFFFFll}, {"_allshr", &TST_allshr, 0xAAAAAAAA55555555ll, 0ll, 63, 0xFFFFFFFFFFFFFFFFll}, {"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 65, 0xFFFFFFFFFFFFFFFFll}, {"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 1, 0xFFFFFFFFFFFFFFFFll}, {"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 32, 0xFFFFFFFFFFFFFFFFll}, {"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 33, 0xFFFFFFFFFFFFFFFFll}, {"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 0, 0xFFFFFFFFFFFFFFFFll}, {"_allshr", &TST_allshr, 0x5F5F5F5F5F5F5F5Fll, 0ll, 65, 0x0000000000000000ll}, {"_allshr", &TST_allshr, 0x5F5F5F5F5F5F5F5Fll, 0ll, 1, 0x2FAFAFAFAFAFAFAFll}, {"_allshr", &TST_allshr, 0x5F5F5F5F5F5F5F5Fll, 0ll, 32, 0x000000005F5F5F5Fll}, {"_allshr", &TST_allshr, 0x5F5F5F5F5F5F5F5Fll, 0ll, 33, 0x000000002FAFAFAFll}, {"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 65, 0x0000000000000000ll}, {"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 1, 0xFFFFFFFFFFFFFFFEll}, {"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 32, 0xFFFFFFFF00000000ll}, {"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 33, 0xFFFFFFFE00000000ll}, {"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 0, 0xFFFFFFFFFFFFFFFFll}, {"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 65, 0x0000000000000000ll}, {"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 1, 0x7FFFFFFFFFFFFFFFll}, {"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 32, 0x00000000FFFFFFFFll}, {"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 33, 0x000000007FFFFFFFll}, {"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 0, 0xFFFFFFFFFFFFFFFFll}, {"_allmul", &TST_allmul, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000000ll, 0, 0x0000000000000000ll}, {"_allmul", &TST_allmul, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x000000000FFFFFFFll}, {"_allmul", &TST_allmul, 0x0000000000000001ll, 0x000000000FFFFFFFll, 0, 0x000000000FFFFFFFll}, {"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000000000010ll, 0, 0x00000000FFFFFFF0ll}, {"_allmul", &TST_allmul, 0x0000000000000010ll, 0x000000000FFFFFFFll, 0, 0x00000000FFFFFFF0ll}, {"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000000000100ll, 0, 0x0000000FFFFFFF00ll}, {"_allmul", &TST_allmul, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000FFFFFFF00ll}, {"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000010000000ll, 0, 0x00FFFFFFF0000000ll}, {"_allmul", &TST_allmul, 0x0000000010000000ll, 0x000000000FFFFFFFll, 0, 0x00FFFFFFF0000000ll}, {"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000080000000ll, 0, 0x07FFFFFF80000000ll}, {"_allmul", &TST_allmul, 0x0000000080000000ll, 0x000000000FFFFFFFll, 0, 0x07FFFFFF80000000ll}, {"_allmul", &TST_allmul, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0xFFFFFFFF00000000ll}, {"_allmul", &TST_allmul, 0x0000000080000000ll, 0xFFFFFFFFFFFFFFFEll, 0, 0xFFFFFFFF00000000ll}, {"_allmul", &TST_allmul, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000008ll, 0, 0xFFFFFFFEFFFFFFF0ll}, {"_allmul", &TST_allmul, 0x0000000080000008ll, 0xFFFFFFFFFFFFFFFEll, 0, 0xFFFFFFFEFFFFFFF0ll}, {"_allmul", &TST_allmul, 0x00000000FFFFFFFFll, 0x00000000FFFFFFFFll, 0, 0xFFFFFFFE00000001ll}, {"_alldiv", &TST_alldiv, 0x0000000000000000ll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_alldiv", &TST_alldiv, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_alldiv", &TST_alldiv, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0xFFFFFFFFFFFFFFFFll}, {"_alldiv", &TST_alldiv, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000001ll, 0, 0xFFFFFFFFFFFFFFFFll}, {"_alldiv", &TST_alldiv, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0xFFFFFFFFFFFFFFFFll}, {"_alldiv", &TST_alldiv, 0x0000000000000001ll, 0x0000000000000001ll, 0, 0x0000000000000001ll}, {"_alldiv", &TST_alldiv, 0xFFFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000001ll}, {"_alldiv", &TST_alldiv, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x000000000FFFFFFFll}, {"_alldiv", &TST_alldiv, 0x0000000FFFFFFFFFll, 0x0000000000000010ll, 0, 0x00000000FFFFFFFFll}, {"_alldiv", &TST_alldiv, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000000000000ll}, {"_alldiv", &TST_alldiv, 0x00FFFFFFF0000000ll, 0x0000000010000000ll, 0, 0x000000000FFFFFFFll}, {"_alldiv", &TST_alldiv, 0x07FFFFFF80000000ll, 0x0000000080000000ll, 0, 0x000000000FFFFFFFll}, {"_alldiv", &TST_alldiv, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0x0000000000000000ll}, {"_alldiv", &TST_alldiv, 0xFFFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000080000008ll}, {"_alldiv", &TST_alldiv, 0x7FFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0xC000000080000008ll}, {"_alldiv", &TST_alldiv, 0x7FFFFFFEFFFFFFF0ll, 0x0000FFFFFFFFFFFEll, 0, 0x0000000000007FFFll}, {"_alldiv", &TST_alldiv, 0x7FFFFFFEFFFFFFF0ll, 0x7FFFFFFEFFFFFFF0ll, 0, 0x0000000000000001ll}, {"_allrem", &TST_allrem, 0x0000000000000000ll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0x0000000000000001ll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0xFFFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0x0000000FFFFFFFFFll, 0x0000000000000010ll, 0, 0x000000000000000Fll}, {"_allrem", &TST_allrem, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000000000100ll}, {"_allrem", &TST_allrem, 0x00FFFFFFF0000000ll, 0x0000000010000000ll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0x07FFFFFF80000000ll, 0x0000000080000000ll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0xFFFFFFFFFFFFFFFEll}, {"_allrem", &TST_allrem, 0xFFFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0x7FFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000000000000ll}, {"_allrem", &TST_allrem, 0x7FFFFFFEFFFFFFF0ll, 0x0000FFFFFFFFFFFEll, 0, 0x0000FFFF0000FFEEll}, {"_allrem", &TST_allrem, 0x7FFFFFFEFFFFFFF0ll, 0x7FFFFFFEFFFFFFF0ll, 0, 0x0000000000000000ll}, {"_ualldiv", &TST_ualldiv, 0x0000000000000000ll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_ualldiv", &TST_ualldiv, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_ualldiv", &TST_ualldiv, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_ualldiv", &TST_ualldiv, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000001ll, 0, 0xFFFFFFFFFFFFFFFFll}, {"_ualldiv", &TST_ualldiv, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_ualldiv", &TST_ualldiv, 0x0000000000000001ll, 0x0000000000000001ll, 0, 0x0000000000000001ll}, {"_ualldiv", &TST_ualldiv, 0xFFFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000001ll}, {"_ualldiv", &TST_ualldiv, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x000000000FFFFFFFll}, {"_ualldiv", &TST_ualldiv, 0x0000000FFFFFFFFFll, 0x0000000000000010ll, 0, 0x00000000FFFFFFFFll}, {"_ualldiv", &TST_ualldiv, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000000000000ll}, {"_ualldiv", &TST_ualldiv, 0x00FFFFFFF0000000ll, 0x0000000010000000ll, 0, 0x000000000FFFFFFFll}, {"_ualldiv", &TST_ualldiv, 0x07FFFFFF80000000ll, 0x0000000080000000ll, 0, 0x000000000FFFFFFFll}, {"_ualldiv", &TST_ualldiv, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0x00000001FFFFFFFFll}, {"_ualldiv", &TST_ualldiv, 0xFFFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000000000000ll}, {"_ualldiv", &TST_ualldiv, 0x7FFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000000000000ll}, {"_ualldiv", &TST_ualldiv, 0x7FFFFFFEFFFFFFF0ll, 0x0000FFFFFFFFFFFEll, 0, 0x0000000000007FFFll}, {"_ualldiv", &TST_ualldiv, 0x7FFFFFFEFFFFFFF0ll, 0x7FFFFFFEFFFFFFF0ll, 0, 0x0000000000000001ll}, {"_uallrem", &TST_uallrem, 0x0000000000000000ll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_uallrem", &TST_uallrem, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_uallrem", &TST_uallrem, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000001ll}, {"_uallrem", &TST_uallrem, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_uallrem", &TST_uallrem, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000001ll}, {"_uallrem", &TST_uallrem, 0x0000000000000001ll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_uallrem", &TST_uallrem, 0xFFFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll}, {"_uallrem", &TST_uallrem, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x0000000000000000ll}, {"_uallrem", &TST_uallrem, 0x0000000FFFFFFFFFll, 0x0000000000000010ll, 0, 0x000000000000000Fll}, {"_uallrem", &TST_uallrem, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000000000100ll}, {"_uallrem", &TST_uallrem, 0x00FFFFFFF0000000ll, 0x0000000010000000ll, 0, 0x0000000000000000ll}, {"_uallrem", &TST_uallrem, 0x07FFFFFF80000000ll, 0x0000000080000000ll, 0, 0x0000000000000000ll}, {"_uallrem", &TST_uallrem, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0x000000007FFFFFFEll}, {"_uallrem", &TST_uallrem, 0xFFFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0xFFFFFFFEFFFFFFF0ll}, {"_uallrem", &TST_uallrem, 0x7FFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x7FFFFFFEFFFFFFF0ll}, {"_uallrem", &TST_uallrem, 0x7FFFFFFEFFFFFFF0ll, 0x0000FFFFFFFFFFFEll, 0, 0x0000FFFF0000FFEEll}, {"_uallrem", &TST_uallrem, 0x7FFFFFFEFFFFFFF0ll, 0x7FFFFFFEFFFFFFF0ll, 0, 0x0000000000000000ll}, {NULL} }; int SDLTest_stdlib (SDL_bool verbose) { LL_Test *t; int failed = 0; #if defined(_MSC_VER) && !defined(_WIN64) for (t = LL_Tests; t->routine != NULL; t++) { unsigned long long result = 0; unsigned int *al = (unsigned int *)&t->a; unsigned int *bl = (unsigned int *)&t->b; unsigned int *el = (unsigned int *)&t->expected_result; unsigned int *rl = (unsigned int *)&result; if (!t->routine(&t->a, &t->b, t->arg, &result, &t->expected_result)) { if (verbose) SDL_Log("%s(0x%08X%08X, 0x%08X%08X, %3d, produced: 0x%08X%08X, expected: 0x%08X%08X\n", t->operation, al[1], al[0], bl[1], bl[0], t->arg, rl[1], rl[0], el[1], el[0]); ++failed; } } #ifndef HAVE_LIBC if (verbose && (failed == 0)) SDL_Log("All 64bit instrinsic tests passed\n"); #endif #else if (verbose) SDL_Log("Microsoft 64bit intrinsics aren't needed with this compiler\n"); #endif return (failed ? 1 : 0); }