diff -r 2b1cce20e59b src/render/SDL_rendergeometry_generic.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/render/SDL_rendergeometry_generic.c Wed Nov 04 16:32:30 2020 -0800 @@ -0,0 +1,284 @@ +static void +RasterizeTriangle(SDL_Renderer * renderer, SDL_Texture *texture, SDL_Vertex v1, + SDL_Vertex v2, SDL_Vertex v3) +{ + const float minX = SDL_min(v1.position.x, SDL_min(v2.position.x, v3.position.x)); + const float minY = SDL_min(v1.position.y, SDL_min(v2.position.y, v3.position.y)); + const float maxX = SDL_max(v1.position.x, SDL_max(v2.position.x, v3.position.x)); + const float maxY = SDL_max(v1.position.y, SDL_max(v2.position.y, v3.position.y)); + + // If we're outside the clip rect, then don't run the triangle rasterizer + if (SDL_RenderIsClipEnabled(renderer)) { + SDL_Rect clip_rect; + SDL_RenderGetClipRect(renderer, &clip_rect); + if ((minX > clip_rect.x + clip_rect.w || maxX < clip_rect.x) && + (minY > clip_rect.y + clip_rect.h || maxY < clip_rect.y)) + { + return; + } + } + + // This function operates in s28.4 fixed point: it's more precise than + // floating point and often faster. This also effectively lets us work in + // a subpixel space where each pixel is divided into 256 subpixels. + // TODO: check for overflow + + // Find integral bounding box, scale to fixed point. We use this to + // iterate over all pixels possibly covered by the triangle. + const Sint32 minXf = SDL_floor(minX) * 16; + const Sint32 minYf = SDL_floor(minY) * 16; + const Sint32 maxXf = SDL_ceil(maxX) * 16; + const Sint32 maxYf = SDL_ceil(maxY) * 16; + + // Find center of bounding box, used for translating coordinates. + // This accomplishes two things: 1) makes the fixed point calculations less + // likely to overflow 2) makes it less likely that triangles will be rendered + // differently on different parts of the screen (seems to happen sometimes + // with fractional coordinates). Make sure they're on integer boundaries too, + // to make it easy to calculate our starting position. + const Sint32 meanXf = round((maxXf - minXf) / 2 / 16) * 16; + const Sint32 meanYf = round((maxYf - minYf) / 2 / 16) * 16; + + // Translate vertex coordinates with respect to the center of the bounding + // box, and scale to fixed point. + Sint32 f1x = round(v1.position.x * 16) - meanXf; + Sint32 f1y = round(v1.position.y * 16) - meanYf; + Sint32 f2x = round(v2.position.x * 16) - meanXf; + Sint32 f2y = round(v2.position.y * 16) - meanYf; + Sint32 f3x = round(v3.position.x * 16) - meanXf; + Sint32 f3y = round(v3.position.y * 16) - meanYf; + + // Calculate starting position for iteration. It's the top-left of our + // bounding box with respect to the center of the bounding box. We add a + // half-pixel on each axis to match hardware renderers, which evaluate at the + // center of pixels. + const Sint32 px = minXf - meanXf + 8; + const Sint32 py = minYf - meanYf + 8; + + // Calculate barycentric coordinates at starting position. Barycentric + // coordinates tell us the position of a point with respect to the + // edges/vertices of a triangle: we can easily use these to calculate if + // a point is inside a triangle (the three barycentric coordinates will all + // be positive) and how to interpolate vertex attributes (multiply them by + // the normalized barycentric coordinates at that point.) + Sint32 w1 = (f3x - f2x) * (py - f2y) - (f3y - f2y) * (px - f2x); + Sint32 w2 = (f1x - f3x) * (py - f3y) - (f1y - f3y) * (px - f3x); + Sint32 w3 = (f2x - f1x) * (py - f1y) - (f2y - f1y) * (px - f1x); + + // Calculate the normalization factor for transforming our barycentric + // coordinates into interpolation constants. If it's negative, then the + // triangle is back-facing (wound the wrong way), and we flip two vertices + // to make it front-facing. Keep this factor as a float since 1) we'll be + // dividing by it later 2) we don't lose precision going through the raster + // loop. + float normalization = (w1 + w2 + w3); + if (normalization < 0) { + GEOM_Vertex vswap = v3; + v3 = v2; + v2 = vswap; + + Sint32 fxswap = f3x; + f3x = f2x; + f2x = fxswap; + Sint32 fyswap = f3y; + f3y = f2y; + f2y = fyswap; + + Sint32 wswap = w3; + w3 = -w2; + w2 = -wswap; + w1 = -w1; + + normalization = -normalization; + } + + // We deal with shared edges between triangles by defining a fill rule: only + // edges on the top or left of the triangle will be filled. We could change + // the comparison in the loop below to differentiate between greater-than + // and greater-or-equal-than, but since we're in fixed point space where + // everything is an integer we instead add a bias to each barycentric + // coordinate corresponding to a non-top, non-left edge. + const int bias1 = ((f3y == f2y && f3x > f2x) || f3x < f2x) ? 0 : -1; + const int bias2 = ((f3y == f1y && f1x > f3x) || f1x < f3x) ? 0 : -1; + const int bias3 = ((f2y == f1y && f2x > f1x) || f2x < f1x) ? 0 : -1; + w1 += bias1; + w2 += bias2; + w3 += bias3; + + // As we go through each pixel, we use the barycentric coordinates to check + // if they're covered by the triangle. We could recalculate them every time, + // but since they're linear we can calculate the linear factors with respect + // to advancing through columns and rows and just add each time through the + // loop. We multiply to get from subpixel space back to pixel space, since + // we'll be iterating pixel by pixel. + const Sint32 a1 = (f2y - f3y) * 16; + const Sint32 a2 = (f3y - f1y) * 16; + const Sint32 a3 = (f1y - f2y) * 16; + const Sint32 b1 = (f3x - f2x) * 16; + const Sint32 b2 = (f1x - f3x) * 16; + const Sint32 b3 = (f2x - f1x) * 16; + + // Save the original texture color and alpha mod here, since we change it + // according to vertex attributes and need to return it to its original state + // afterwards. + Uint8 original_mod_r, original_mod_g, original_mod_b, original_mod_a; + if (texture) { + SDL_GetTextureColorMod(texture, &original_mod_r, &original_mod_g, &original_mod_b); + SDL_GetTextureAlphaMod(texture, &original_mod_a); + } + + // Store texture width and height for use in mapping vertex attributes + int texture_width = 0, texture_height = 0; + if (texture) { + SDL_QueryTexture(texture, NULL, NULL, &texture_width, &texture_height); + } + + // Precalculate normalized vertex attributes. We just need to multiply these + // by the barycentric coordinates and sum them to get the interpolated vertex + // attribute for any point. This can save a few frames per second. + const float col1r = v1.color.r / normalization; + const float col1g = v1.color.g / normalization; + const float col1b = v1.color.b / normalization; + const float col1a = v1.color.a / normalization; + const float col2r = v2.color.r / normalization; + const float col2g = v2.color.g / normalization; + const float col2b = v2.color.b / normalization; + const float col2a = v2.color.a / normalization; + const float col3r = v3.color.r / normalization; + const float col3g = v3.color.g / normalization; + const float col3b = v3.color.b / normalization; + const float col3a = v3.color.a / normalization; + const float v1u = v1.tex_coord.x * texture_width / normalization; + const float v1v = v1.tex_coord.y * texture_height / normalization; + const float v2u = v2.tex_coord.x * texture_width / normalization; + const float v2v = v2.tex_coord.y * texture_height / normalization; + const float v3u = v3.tex_coord.x * texture_width / normalization; + const float v3v = v3.tex_coord.y * texture_height / normalization; + + // If the triangle is uniformly-colored, we can get a big speed up by setting + // the color once and drawing batches of rows, rather than drawing individually + // colored pixels. Avoid malloc and a dynamic buffer size since it's slower + // than just grabbing space from the stack. + SDL_bool isUniformColor = SDL_FALSE; + SDL_Rect rectsbuffer[1024]; + int rects_i = 0; + if (!texture && GEOM_ColorEquals(v1.color, v2.color) && GEOM_ColorEquals(v1.color, v3.color)) { + isUniformColor = SDL_TRUE; + SDL_SetRenderDrawColor(renderer, v1.color.r, v1.color.g, v1.color.b, v1.color.a); + } + + // Iterate over all pixels in the bounding box. + for (int y = minYf / 16; y <= maxYf / 16; y++) { + // Stash barycentric coordinates at start of row + Sint32 w1_row = w1; + Sint32 w2_row = w2; + Sint32 w3_row = w3; + + // Keep track of where the triangle starts on this row, as an optimization + // to draw uniformly-colored triangles. + SDL_bool in_triangle = SDL_FALSE; + int x_start; + + for (int x = minXf / 16; x <= maxXf / 16; x++) { + // If all barycentric coordinates are positive, we're inside the triangle + if (w1 >= 0 && w2 >= 0 && w3 >= 0) { + if (!in_triangle) { + // We draw uniformly-colored triangles row by row, so we need to keep + // track of where the row starts and know when it ends. + x_start = x; + in_triangle = SDL_TRUE; + } + + if (!isUniformColor) { + // Fix the adjustment due to fill rule. It's incorrect when calculating + // interpolation values. + const Sint32 alpha = w1 - bias1; + const Sint32 beta = w2 - bias2; + const Sint32 gamma = w3 - bias3; + + // Interpolate color + const Uint8 r = col1r * alpha + col2r * beta + col3r * gamma; + const Uint8 g = col1g * alpha + col2g * beta + col3g * gamma; + const Uint8 b = col1b * alpha + col2b * beta + col3b * gamma; + const Uint8 a = col1a * alpha + col2a * beta + col3a * gamma; + + if (!texture) { + // Draw a single colored pixel + SDL_SetRenderDrawColor(renderer, r, g, b, a); + SDL_RenderDrawPoint(renderer, x, y); + } else { + // Copy a pixel from the source texture to the target pixel. This + // effectively does nearest neighbor sampling. Could probably be + // extended to copy from a larger rect to do bilinear sampling if + // needed. + const int u = v1u * alpha + v2u * beta + v3u * gamma; + const int v = v1v * alpha + v2v * beta + v3v * gamma; + SDL_SetTextureColorMod(texture, r, g, b); + SDL_SetTextureAlphaMod(texture, a); + SDL_Rect srcrect; + srcrect.x = u; + srcrect.y = v; + srcrect.w = 1; + srcrect.h = 1; + SDL_Rect destrect; + destrect.x = x; + destrect.y = y; + destrect.w = 1; + destrect.h = 1; + SDL_RenderCopy(renderer, texture, &srcrect, &destrect); + } + } + } else if (in_triangle) { + // No longer in triangle, so we're done with this row. + if (isUniformColor) { + // For uniformly-colored triangles, store lines so we can send them + // to the renderer in batches. This provides a huge speedup in most + // cases (even with SDL 2.0.10's built-in batching!). + rectsbuffer[rects_i].x = x_start; + rectsbuffer[rects_i].y = y; + rectsbuffer[rects_i].w = x - x_start; + rectsbuffer[rects_i].h = 1; + rects_i++; + if (rects_i == 1024) { + SDL_RenderFillRects(renderer, rectsbuffer, rects_i); + rects_i = 0; + } + } + break; + } + // Increment barycentric coordinates one pixel rightwards + w1 += a1; + w2 += a2; + w3 += a3; + } + // Increment barycentric coordinates one pixel downwards + w1 = w1_row + b1; + w2 = w2_row + b2; + w3 = w3_row + b3; + } + + if (isUniformColor) { + SDL_RenderFillRects(renderer, rectsbuffer, rects_i); + } + + // Restore original texture color and alpha mod. + if (texture) { + SDL_SetTextureColorMod(texture, original_mod_r, original_mod_g, original_mod_b); + SDL_SetTextureAlphaMod(texture, original_mod_a); + } +} + +static int +GenericRenderGeometry(SDL_Renderer *renderer, SDL_Texture *texture, + SDL_Vertex *vertices, int num_vertices, Uint16 *indices, + int num_indices) +{ + for (int i = 0; i + 3 <= (indices ? num_indices : num_vertices); i += 3) { + RasterizeTriangle(renderer, texture, + vertices[indices ? indices[i] : i], + vertices[indices ? indices[i+1] : i+1], + vertices[indices ? indices[i+2] : i+2]); + } + + return 0; +} \ No newline at end of file