Commit a95b5568 authored by Arjen Hiemstra's avatar Arjen Hiemstra
Browse files

Drop custom SSE code from WindowQuads::makeInterleavedArrays

Both GCC and Clang do auto vecrization these days and testing shows that
that actually produces faster code, so dropping the SSE stuff makes
things both simpler and faster.
parent 0c8d87f9
......@@ -25,10 +25,6 @@
#include <kconfiggroup.h>
#include <ksharedconfig.h>
#if defined(__SSE2__)
#include <emmintrin.h>
#endif
#include <optional>
namespace KWin
......@@ -1055,106 +1051,41 @@ void WindowQuadList::makeInterleavedArrays(unsigned int type, GLVertex2D *vertic
Q_ASSERT(type == GL_QUADS || type == GL_TRIANGLES);
switch (type) {
case GL_QUADS:
#if defined(__SSE2__)
if (!(intptr_t(vertex) & 0xf)) {
for (const WindowQuad &quad : *this) {
alignas(16) GLVertex2D v[4];
for (int j = 0; j < 4; j++) {
const WindowVertex &wv = quad[j];
v[j].position = QVector2D(wv.x(), wv.y());
v[j].texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
}
case GL_QUADS: {
for (const WindowQuad &quad : *this) {
for (int j = 0; j < 4; j++) {
const WindowVertex &wv = quad[j];
const __m128i *srcP = reinterpret_cast<const __m128i *>(&v);
__m128i *dstP = reinterpret_cast<__m128i *>(vertex);
GLVertex2D v;
v.position = QVector2D(wv.x(), wv.y());
v.texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
_mm_stream_si128(&dstP[0], _mm_load_si128(&srcP[0])); // Top-left
_mm_stream_si128(&dstP[1], _mm_load_si128(&srcP[1])); // Top-right
_mm_stream_si128(&dstP[2], _mm_load_si128(&srcP[2])); // Bottom-right
_mm_stream_si128(&dstP[3], _mm_load_si128(&srcP[3])); // Bottom-left
vertex += 4;
}
} else
#endif // __SSE2__
{
for (const WindowQuad &quad : *this) {
for (int j = 0; j < 4; j++) {
const WindowVertex &wv = quad[j];
GLVertex2D v;
v.position = QVector2D(wv.x(), wv.y());
v.texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
*(vertex++) = v;
}
*(vertex++) = v;
}
}
break;
case GL_TRIANGLES:
#if defined(__SSE2__)
if (!(intptr_t(vertex) & 0xf)) {
for (const WindowQuad &quad : *this) {
alignas(16) GLVertex2D v[4];
} break;
case GL_TRIANGLES: {
for (const WindowQuad &quad : *this) {
GLVertex2D v[4]; // Four unique vertices / quad
for (int j = 0; j < 4; j++) {
const WindowVertex &wv = quad[j];
for (int j = 0; j < 4; j++) {
const WindowVertex &wv = quad[j];
v[j].position = QVector2D(wv.x(), wv.y());
v[j].texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
}
const __m128i *srcP = reinterpret_cast<const __m128i *>(&v);
__m128i *dstP = reinterpret_cast<__m128i *>(vertex);
__m128i src[4];
src[0] = _mm_load_si128(&srcP[0]); // Top-left
src[1] = _mm_load_si128(&srcP[1]); // Top-right
src[2] = _mm_load_si128(&srcP[2]); // Bottom-right
src[3] = _mm_load_si128(&srcP[3]); // Bottom-left
// First triangle
_mm_stream_si128(&dstP[0], src[1]); // Top-right
_mm_stream_si128(&dstP[1], src[0]); // Top-left
_mm_stream_si128(&dstP[2], src[3]); // Bottom-left
v[j].position = QVector2D(wv.x(), wv.y());
v[j].texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
}
// Second triangle
_mm_stream_si128(&dstP[3], src[3]); // Bottom-left
_mm_stream_si128(&dstP[4], src[2]); // Bottom-right
_mm_stream_si128(&dstP[5], src[1]); // Top-right
// First triangle
*(vertex++) = v[1]; // Top-right
*(vertex++) = v[0]; // Top-left
*(vertex++) = v[3]; // Bottom-left
vertex += 6;
}
} else
#endif // __SSE2__
{
for (const WindowQuad &quad : *this) {
GLVertex2D v[4]; // Four unique vertices / quad
for (int j = 0; j < 4; j++) {
const WindowVertex &wv = quad[j];
v[j].position = QVector2D(wv.x(), wv.y());
v[j].texcoord = QVector2D(wv.u(), wv.v()) * coeff + offset;
}
// First triangle
*(vertex++) = v[1]; // Top-right
*(vertex++) = v[0]; // Top-left
*(vertex++) = v[3]; // Bottom-left
// Second triangle
*(vertex++) = v[3]; // Bottom-left
*(vertex++) = v[2]; // Bottom-right
*(vertex++) = v[1]; // Top-right
}
// Second triangle
*(vertex++) = v[3]; // Bottom-left
*(vertex++) = v[2]; // Bottom-right
*(vertex++) = v[1]; // Top-right
}
break;
} break;
default:
break;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment