From bf1dc0097b34e6370a3ae115ac2cc5b58e53b872 Mon Sep 17 00:00:00 2001 From: Ben Avison Date: Tue, 30 Aug 2016 17:04:51 +0100 Subject: [PATCH 05/11] ARM: SIMD assembly optimization for BGR-to-RGB 32bpp normal blits --- src/video/SDL_blit_N.c | 25 +++++++++++++++-- src/video/arm/pixman-arm-simd-asm.S | 42 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index 08bb4a8a0..308f0ddef 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -31,7 +31,8 @@ enum blit_features { BLIT_FEATURE_HAS_MMX = 1, BLIT_FEATURE_HAS_ALTIVEC = 2, - BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH = 4 + BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH = 4, + BLIT_FEATURE_HAS_ARM_SIMD = 8 }; #if SDL_ALTIVEC_BLITTERS @@ -853,7 +854,23 @@ static enum blit_features GetBlitFeatures( void ) #endif #else /* Feature 1 is has-MMX */ -#define GetBlitFeatures() (SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0) +#define GetBlitFeatures() ((SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0) | (SDL_HasARMSIMD() ? BLIT_FEATURE_HAS_ARM_SIMD : 0)) +#endif + +#if SDL_ARM_SIMD_BLITTERS +void Blit_BGR888_RGB888ARMSIMDAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride); + +static void Blit_BGR888_RGB888ARMSIMD(SDL_BlitInfo *info) +{ + int32_t width = info->d_width; + int32_t height = info->d_height; + uint32_t *dstp = (uint32_t *)info->d_pixels; + int32_t dststride = width + (info->d_skip >> 2); + uint32_t *srcp = (uint32_t *)info->s_pixels; + int32_t srcstride = width + (info->s_skip >> 2); + + Blit_BGR888_RGB888ARMSIMDAsm(width, height, dstp, dststride, srcp, srcstride); +} #endif /* This is now endian dependent */ @@ -2394,6 +2411,10 @@ static const struct blit_table normal_blit_4[] = { /* has-altivec */ { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F, BLIT_FEATURE_HAS_ALTIVEC, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA }, +#endif +#if SDL_ARM_SIMD_BLITTERS + { 0x000000FF,0x0000FF00,0x00FF0000, 4, 0x00FF0000,0x0000FF00,0x000000FF, + BLIT_FEATURE_HAS_ARM_SIMD, NULL, Blit_BGR888_RGB888ARMSIMD, NO_ALPHA | COPY_ALPHA }, #endif { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F, 0, NULL, Blit_RGB888_RGB565, NO_ALPHA }, diff --git a/src/video/arm/pixman-arm-simd-asm.S b/src/video/arm/pixman-arm-simd-asm.S index 6dcbbe5e2..81e38c44b 100644 --- a/src/video/arm/pixman-arm-simd-asm.S +++ b/src/video/arm/pixman-arm-simd-asm.S @@ -363,3 +363,45 @@ generate_composite_function \ nop_macro, /* cleanup */ \ ARGBto565PixelAlpha_process_head, \ ARGBto565PixelAlpha_process_tail + + /******************************************************************************/ + +.macro BGR888toRGB888_1pixel cond, reg, tmp + uxtb16&cond tmp, WK®, ror #8 + uxtb16&cond WK®, WK®, ror #16 + orr&cond WK®, WK®, tmp, lsl #8 +.endm + +.macro BGR888toRGB888_2pixels cond, reg1, reg2, tmp1, tmp2 + uxtb16&cond tmp1, WK®1, ror #8 + uxtb16&cond WK®1, WK®1, ror #16 + uxtb16&cond tmp2, WK®2, ror #8 + uxtb16&cond WK®2, WK®2, ror #16 + orr&cond WK®1, WK®1, tmp1, lsl #8 + orr&cond WK®2, WK®2, tmp2, lsl #8 +.endm + +.macro BGR888toRGB888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload + pixld cond, numbytes, firstreg, SRC, unaligned_src +.endm + +.macro BGR888toRGB888_process_tail cond, numbytes, firstreg + .if numbytes >= 8 + BGR888toRGB888_2pixels cond, %(firstreg+0), %(firstreg+1), MASK, STRIDE_M + .if numbytes == 16 + BGR888toRGB888_2pixels cond, %(firstreg+2), %(firstreg+3), MASK, STRIDE_M + .endif + .else @ numbytes == 4 + BGR888toRGB888_1pixel cond, %(firstreg+0), MASK + .endif +.endm + +generate_composite_function \ + Blit_BGR888_RGB888ARMSIMDAsm, 32, 0, 32, \ + FLAG_DST_WRITEONLY | FLAG_COND_EXEC | FLAG_PROCESS_PRESERVES_SCRATCH, \ + 2, /* prefetch distance */ \ + nop_macro, /* init */ \ + nop_macro, /* newline */ \ + nop_macro, /* cleanup */ \ + BGR888toRGB888_process_head, \ + BGR888toRGB888_process_tail -- 2.17.1