kis_composition_benchmark.cpp 26.4 KB
Newer Older
1 2
/*
 *  Copyright (c) 2012 Dmitry Kazakov <dimula73@gmail.com>
3
 *  Copyright (c) 2015 Thorsten Zachmann <zachmann@kde.org>
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include "kis_composition_benchmark.h"

22
#include <QTest>
23 24 25 26 27 28 29

#include <KoColorSpace.h>
#include <KoCompositeOp.h>
#include <KoColorSpaceRegistry.h>

#include <KoColorSpaceTraits.h>
#include <KoCompositeOpAlphaDarken.h>
30
#include <KoCompositeOpOver.h>
31 32
#include "KoOptimizedCompositeOpFactory.h"

33 34


35
// for calculation of the needed alignment
Yuri Chornoivan's avatar
Yuri Chornoivan committed
36
#include <config-vc.h>
37
#ifdef HAVE_VC
38 39 40 41 42 43
#if defined _MSC_VER
// Lets shut up the "possible loss of data" and "forcing value to bool 'true' or 'false'
#pragma warning ( push )
#pragma warning ( disable : 4244 )
#pragma warning ( disable : 4800 )
#endif
44 45
#include <Vc/Vc>
#include <Vc/IO>
46 47 48
#if defined _MSC_VER
#pragma warning ( pop )
#endif
49 50

#include <KoOptimizedCompositeOpOver32.h>
51
#include <KoOptimizedCompositeOpOver128.h>
52
#include <KoOptimizedCompositeOpAlphaDarken32.h>
53 54
#endif

55 56
// for posix_memalign()
#include <stdlib.h>
57

58 59
#include <kis_debug.h>

60 61
const int alpha_pos = 3;

62 63 64 65 66 67
enum AlphaRange {
    ALPHA_ZERO,
    ALPHA_UNIT,
    ALPHA_RANDOM
};

68 69 70 71

template <typename channel_type, class RandomGenerator>
inline channel_type generateAlphaValue(AlphaRange range, RandomGenerator &rnd) {
    channel_type value = 0;
72 73 74 75 76

    switch (range) {
    case ALPHA_ZERO:
        break;
    case ALPHA_UNIT:
77
        value = rnd.unit();
78 79
        break;
    case ALPHA_RANDOM:
80
        value = rnd();
81 82 83 84 85 86
        break;
    }

    return value;
}

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_smallint.hpp>
#include <boost/random/uniform_real.hpp>

template <typename channel_type>
struct RandomGenerator {
    channel_type operator() () {
        qFatal("Wrong template instantiation");
        return channel_type(0);
    }

    channel_type unit() {
        qFatal("Wrong template instantiation");
        return channel_type(0);
    }
};

template <>
struct RandomGenerator<quint8>
106
{
107 108 109 110 111 112 113 114 115 116 117 118 119
    RandomGenerator(int seed)
        : m_smallint(0,255),
          m_rnd(seed)
    {
    }

    quint8 operator() () {
        return m_smallint(m_rnd);
    }

    quint8 unit() {
        return KoColorSpaceMathsTraits<quint8>::unitValue;
    }
120

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
    boost::uniform_smallint<int> m_smallint;
    boost::mt11213b m_rnd;
};

template <>
struct RandomGenerator<float>
{
    RandomGenerator(int seed)
        : m_rnd(seed)
    {
    }

    float operator() () {
        //return float(m_rnd()) / float(m_rnd.max());
        return m_smallfloat(m_rnd);
    }

    float unit() {
        return KoColorSpaceMathsTraits<float>::unitValue;
    }
141

142 143 144
    boost::uniform_real<float> m_smallfloat;
    boost::mt11213b m_rnd;
};
145

146 147 148 149 150 151
template <>
struct RandomGenerator<double> : RandomGenerator<float>
{
    RandomGenerator(int seed)
        : RandomGenerator<float>(seed)
    {
152
    }
153 154 155 156 157 158 159
};


template <typename channel_type>
void generateDataLine(uint seed, int numPixels, quint8 *srcPixels, quint8 *dstPixels, quint8 *mask, AlphaRange srcAlphaRange, AlphaRange dstAlphaRange)
{
    Q_ASSERT(numPixels >= 4);
160

161 162 163 164 165
    RandomGenerator<channel_type> rnd(seed);
    RandomGenerator<quint8> maskRnd(seed + 1);

    channel_type *srcArray = reinterpret_cast<channel_type*>(srcPixels);
    channel_type *dstArray = reinterpret_cast<channel_type*>(dstPixels);
166 167

    for (int i = 0; i < numPixels; i++) {
168
        for (int j = 0; j < 3; j++) {
169 170 171 172
            channel_type s = rnd();
            channel_type d = rnd();
            *(srcArray++) = s;
            *(dstArray++) = d;
173
        }
174

175 176 177 178
        channel_type sa = generateAlphaValue<channel_type>(srcAlphaRange, rnd);
        channel_type da = generateAlphaValue<channel_type>(dstAlphaRange, rnd);
        *(srcArray++) = sa;
        *(dstArray++) = da;
179

180
        *(mask++) = maskRnd();
181 182 183 184 185 186
    }
}

void printData(int numPixels, quint8 *srcPixels, quint8 *dstPixels, quint8 *mask)
{
    for (int i = 0; i < numPixels; i++) {
187
        dbgKrita << "Src: "
188 189 190 191 192 193
                 << srcPixels[i*4] << "\t"
                 << srcPixels[i*4+1] << "\t"
                 << srcPixels[i*4+2] << "\t"
                 << srcPixels[i*4+3] << "\t"
                 << "Msk:" << mask[i];

194
        dbgKrita << "Dst: "
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
                 << dstPixels[i*4] << "\t"
                 << dstPixels[i*4+1] << "\t"
                 << dstPixels[i*4+2] << "\t"
                 << dstPixels[i*4+3];
    }
}

const int rowStride = 64;
const int totalRows = 64;
const QRect processRect(0,0,64,64);
const int numPixels = rowStride * totalRows;
const int numTiles = 1024;


struct Tile {
    quint8 *src;
    quint8 *dst;
    quint8 *mask;
};
214 215 216
#include <stdint.h>
QVector<Tile> generateTiles(int size,
                            const int srcAlignmentShift,
217 218
                            const int dstAlignmentShift,
                            AlphaRange srcAlphaRange,
219 220
                            AlphaRange dstAlphaRange,
                            const quint32 pixelSize)
221
{
222
    QVector<Tile> tiles(size);
223 224 225 226 227 228 229

#ifdef HAVE_VC
    const int vecSize = Vc::float_v::Size;
#else
    const int vecSize = 1;
#endif

230
    // the 256 are used to make sure that we have a good alignment no matter what build options are used.
231 232
    const size_t pixelAlignment = qMax(size_t(vecSize * sizeof(float)), size_t(256));
    const size_t maskAlignment = qMax(size_t(vecSize), size_t(256));
233
    for (int i = 0; i < size; i++) {
234
        void *ptr = NULL;
235 236 237 238
        int error = posix_memalign(&ptr, pixelAlignment, numPixels * pixelSize + srcAlignmentShift);
        if (error) {
            qFatal("posix_memalign failed: %d", error);
        }
239
        tiles[i].src = (quint8*)ptr + srcAlignmentShift;
240 241 242 243
        error = posix_memalign(&ptr, pixelAlignment, numPixels * pixelSize + dstAlignmentShift);
        if (error) {
            qFatal("posix_memalign failed: %d", error);
        }
244
        tiles[i].dst = (quint8*)ptr + dstAlignmentShift;
245 246 247 248
        error = posix_memalign(&ptr, maskAlignment, numPixels);
        if (error) {
            qFatal("posix_memalign failed: %d", error);
        }
249
        tiles[i].mask = (quint8*)ptr;
250 251 252 253 254 255 256 257

        if (pixelSize == 4) {
            generateDataLine<quint8>(1, numPixels, tiles[i].src, tiles[i].dst, tiles[i].mask, srcAlphaRange, dstAlphaRange);
        } else if (pixelSize == 16) {
            generateDataLine<float>(1, numPixels, tiles[i].src, tiles[i].dst, tiles[i].mask, srcAlphaRange, dstAlphaRange);
        } else {
            qFatal("Pixel size %i is not implemented", pixelSize);
        }
258 259
    }

260 261 262 263 264 265 266
    return tiles;
}

void freeTiles(QVector<Tile> tiles,
               const int srcAlignmentShift,
               const int dstAlignmentShift)
{
267
    Q_FOREACH (const Tile &tile, tiles) {
268 269 270 271 272 273
        free(tile.src - srcAlignmentShift);
        free(tile.dst - dstAlignmentShift);
        free(tile.mask);
    }
}

274 275
template <typename channel_type>
inline bool fuzzyCompare(channel_type a, channel_type b, channel_type prec) {
276 277 278
    return qAbs(a - b) <= prec;
}

279 280
template <typename channel_type>
inline bool comparePixels(channel_type *p1, channel_type *p2, channel_type prec) {
281 282 283 284 285
    return (p1[3] == p2[3] && p1[3] == 0) ||
        (fuzzyCompare(p1[0], p2[0], prec) &&
         fuzzyCompare(p1[1], p2[1], prec) &&
         fuzzyCompare(p1[2], p2[2], prec) &&
         fuzzyCompare(p1[3], p2[3], prec));
286 287 288 289 290 291 292 293 294 295 296 297
}

template <typename channel_type>
bool compareTwoOpsPixels(QVector<Tile> &tiles, channel_type prec) {
    channel_type *dst1 = reinterpret_cast<channel_type*>(tiles[0].dst);
    channel_type *dst2 = reinterpret_cast<channel_type*>(tiles[1].dst);

    channel_type *src1 = reinterpret_cast<channel_type*>(tiles[0].src);
    channel_type *src2 = reinterpret_cast<channel_type*>(tiles[1].src);

    for (int i = 0; i < numPixels; i++) {
        if (!comparePixels<channel_type>(dst1, dst2, prec)) {
298 299 300 301
            dbgKrita << "Wrong result:" << i;
            dbgKrita << "Act: " << dst1[0] << dst1[1] << dst1[2] << dst1[3];
            dbgKrita << "Exp: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
            dbgKrita << "Dif: " << dst1[0] - dst2[0] << dst1[1] - dst2[1] << dst1[2] - dst2[2] << dst1[3] - dst2[3];
302 303 304 305

            channel_type *s1 = src1 + 4 * i;
            channel_type *s2 = src2 + 4 * i;

306 307
            dbgKrita << "SrcA:" << s1[0] << s1[1] << s1[2] << s1[3];
            dbgKrita << "SrcE:" << s2[0] << s2[1] << s2[2] << s2[3];
308

309 310
            dbgKrita << "MskA:" << tiles[0].mask[i];
            dbgKrita << "MskE:" << tiles[1].mask[i];
311

312 313 314 315 316 317
            return false;
        }
        dst1 += 4;
        dst2 += 4;
    }
    return true;
318 319
}

320 321
bool compareTwoOps(bool haveMask, const KoCompositeOp *op1, const KoCompositeOp *op2)
{
322 323 324 325
    Q_ASSERT(op1->colorSpace()->pixelSize() == op2->colorSpace()->pixelSize());
    const quint32 pixelSize = op1->colorSpace()->pixelSize();
    const int alignment = 16;
    QVector<Tile> tiles = generateTiles(2, alignment, alignment, ALPHA_RANDOM, ALPHA_RANDOM, op1->colorSpace()->pixelSize());
326 327 328 329 330 331 332

    KoCompositeOp::ParameterInfo params;
    params.dstRowStride  = 4 * rowStride;
    params.srcRowStride  = 4 * rowStride;
    params.maskRowStride = rowStride;
    params.rows          = processRect.height();
    params.cols          = processRect.width();
333 334
    // This is a hack as in the old version we get a rounding of opacity to this value
    params.opacity       = float(Arithmetic::scale<quint8>(0.5*1.0f))/255.0;
335 336 337 338 339 340 341 342 343 344 345 346 347
    params.flow          = 0.3*1.0f;
    params.channelFlags  = QBitArray();

    params.dstRowStart   = tiles[0].dst;
    params.srcRowStart   = tiles[0].src;
    params.maskRowStart  = haveMask ? tiles[0].mask : 0;
    op1->composite(params);

    params.dstRowStart   = tiles[1].dst;
    params.srcRowStart   = tiles[1].src;
    params.maskRowStart  = haveMask ? tiles[1].mask : 0;
    op2->composite(params);

348 349 350 351 352 353 354 355 356
    bool compareResult = true;
    if (pixelSize == 4) {
        compareResult = compareTwoOpsPixels<quint8>(tiles, 10);
    }
    else if (pixelSize == 16) {
        compareResult = compareTwoOpsPixels<float>(tiles, 0);
    }
    else {
        qFatal("Pixel size %i is not implemented", pixelSize);
357 358
    }

359
    freeTiles(tiles, alignment, alignment);
360

361
    return compareResult;
362 363
}

364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
QString getTestName(bool haveMask,
                    const int srcAlignmentShift,
                    const int dstAlignmentShift,
                    AlphaRange srcAlphaRange,
                    AlphaRange dstAlphaRange)
{

    QString testName;
    testName +=
        !srcAlignmentShift && !dstAlignmentShift ? "Aligned   " :
        !srcAlignmentShift &&  dstAlignmentShift ? "SrcUnalig " :
         srcAlignmentShift && !dstAlignmentShift ? "DstUnalig " :
         srcAlignmentShift &&  dstAlignmentShift ? "Unaligned " : "###";

    testName += haveMask ? "Mask   " : "NoMask ";

    testName +=
        srcAlphaRange == ALPHA_RANDOM ? "SrcRand " :
        srcAlphaRange == ALPHA_ZERO   ? "SrcZero " :
        srcAlphaRange == ALPHA_UNIT   ? "SrcUnit " : "###";

    testName +=
        dstAlphaRange == ALPHA_RANDOM ? "DstRand" :
        dstAlphaRange == ALPHA_ZERO   ? "DstZero" :
        dstAlphaRange == ALPHA_UNIT   ? "DstUnit" : "###";

    return testName;
}

393 394
void benchmarkCompositeOp(const KoCompositeOp *op,
                          bool haveMask,
395 396
                          qreal opacity,
                          qreal flow,
397
                          const int srcAlignmentShift,
398 399 400
                          const int dstAlignmentShift,
                          AlphaRange srcAlphaRange,
                          AlphaRange dstAlphaRange)
401
{
402 403
    QString testName = getTestName(haveMask, srcAlignmentShift, dstAlignmentShift, srcAlphaRange, dstAlphaRange);

404
    QVector<Tile> tiles =
405
        generateTiles(numTiles, srcAlignmentShift, dstAlignmentShift, srcAlphaRange, dstAlphaRange, op->colorSpace()->pixelSize());
406 407 408 409 410 411 412 413 414

    const int tileOffset = 4 * (processRect.y() * rowStride + processRect.x());

    KoCompositeOp::ParameterInfo params;
    params.dstRowStride  = 4 * rowStride;
    params.srcRowStride  = 4 * rowStride;
    params.maskRowStride = rowStride;
    params.rows          = processRect.height();
    params.cols          = processRect.width();
415 416
    params.opacity       = opacity;
    params.flow          = flow;
417 418
    params.channelFlags  = QBitArray();

419 420 421
    QTime timer;
    timer.start();

422
    Q_FOREACH (const Tile &tile, tiles) {
423 424 425 426
        params.dstRowStart   = tile.dst + tileOffset;
        params.srcRowStart   = tile.src + tileOffset;
        params.maskRowStart  = haveMask ? tile.mask : 0;
        op->composite(params);
427 428
    }

429
    dbgKrita << testName << "RESULT:" << timer.elapsed() << "msec";
430

431 432 433
    freeTiles(tiles, srcAlignmentShift, dstAlignmentShift);
}

434 435
void benchmarkCompositeOp(const KoCompositeOp *op, const QString &postfix)
{
436
    dbgKrita << "Testing Composite Op:" << op->id() << "(" << postfix << ")";
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460

    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 8, 0, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 8, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 4, 8, ALPHA_RANDOM, ALPHA_RANDOM);

/// --- Vary the content of the source and destination

    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_RANDOM);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_RANDOM);

/// ---

    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_ZERO);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_ZERO);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_ZERO);

/// ---

    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_UNIT);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_UNIT);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_UNIT);
}
461 462 463 464

#ifdef HAVE_VC

template<class Compositor>
465
void checkRounding(qreal opacity, qreal flow, qreal averageOpacity = -1, quint32 pixelSize = 4)
466 467
{
    QVector<Tile> tiles =
468
        generateTiles(2, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM, pixelSize);
469 470 471 472 473 474 475 476 477 478 479 480 481

    const int vecSize = Vc::float_v::Size;

    const int numBlocks = numPixels / vecSize;

    quint8 *src1 = tiles[0].src;
    quint8 *dst1 = tiles[0].dst;
    quint8 *msk1 = tiles[0].mask;

    quint8 *src2 = tiles[1].src;
    quint8 *dst2 = tiles[1].dst;
    quint8 *msk2 = tiles[1].mask;

482 483 484 485 486 487 488 489 490 491 492 493
    KoCompositeOp::ParameterInfo params;
    params.opacity = opacity;
    params.flow = flow;

    if (averageOpacity >= 0.0) {
        params._lastOpacityData = averageOpacity;
        params.lastOpacity = &params._lastOpacityData;
    }

    params.channelFlags = QBitArray();
    typename Compositor::OptionalParams optionalParams(params);

494 495
    // The error count is needed as 38.5 gets rounded to 38 instead of 39 in the vc version.
    int errorcount = 0;
496
    for (int i = 0; i < numBlocks; i++) {
497
        Compositor::template compositeVector<true,true, VC_IMPL>(src1, dst1, msk1, params.opacity, optionalParams);
498 499
        for (int j = 0; j < vecSize; j++) {

500
            //if (8 * i + j == 7080) {
501 502 503
            //    dbgKrita << "src: " << src2[0] << src2[1] << src2[2] << src2[3];
            //    dbgKrita << "dst: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
            //    dbgKrita << "msk:" << msk2[0];
504 505 506
            //}

            Compositor::template compositeOnePixelScalar<true, VC_IMPL>(src2, dst2, msk2, params.opacity, optionalParams);
507

508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
            bool compareResult = true;
            if (pixelSize == 4) {
                compareResult = comparePixels<quint8>(dst1, dst2, 0);
                if (!compareResult) {
                    ++errorcount;
                    compareResult = comparePixels<quint8>(dst1, dst2, 1);
                    if (!compareResult) {
                        ++errorcount;
                    }
                }
            }
            else if (pixelSize == 16) {
                compareResult = comparePixels<float>(reinterpret_cast<float*>(dst1), reinterpret_cast<float*>(dst2), 0);
            }
            else {
                qFatal("Pixel size %i is not implemented", pixelSize);
            }

            if(!compareResult || errorcount > 1) {
527 528 529
                dbgKrita << "Wrong rounding in pixel:" << 8 * i + j;
                dbgKrita << "Vector version: " << dst1[0] << dst1[1] << dst1[2] << dst1[3];
                dbgKrita << "Scalar version: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
530

531 532
                dbgKrita << "src:" << src1[0] << src1[1] << src1[2] << src1[3];
                dbgKrita << "msk:" << msk1[0];
533 534 535 536

                QFAIL("Wrong rounding");
            }

537 538 539 540
            src1 += pixelSize;
            dst1 += pixelSize;
            src2 += pixelSize;
            dst2 += pixelSize;
541 542 543 544 545 546 547 548 549 550 551
            msk1++;
            msk2++;
        }
    }

    freeTiles(tiles, 0, 0);
}

#endif


552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
void KisCompositionBenchmark::checkRoundingAlphaDarken_05_03()
{
#ifdef HAVE_VC
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,0.3);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_05()
{
#ifdef HAVE_VC
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,0.5);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_07()
{
#ifdef HAVE_VC
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,0.7);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_10()
{
#ifdef HAVE_VC
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,1.0);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_10_08()
581 582
{
#ifdef HAVE_VC
583
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,1.0,0.8);
584 585 586 587 588 589
#endif
}

void KisCompositionBenchmark::checkRoundingOver()
{
#ifdef HAVE_VC
590
    checkRounding<OverCompositor32<quint8, quint32, false, true> >(0.5, 0.3);
591 592 593
#endif
}

594 595 596 597 598 599 600
void KisCompositionBenchmark::checkRoundingOverRgbaF32()
{
#ifdef HAVE_VC
    checkRounding<OverCompositor128<float, float, false, true> >(0.5, 0.3, -1, 16);
#endif
}

601 602 603 604 605 606
void KisCompositionBenchmark::compareAlphaDarkenOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoBgrU8Traits>(cs);

607
    QVERIFY(compareTwoOps(true, opAct, opExp));
608 609 610 611 612

    delete opExp;
    delete opAct;
}

613 614 615 616 617 618 619 620 621 622 623 624
void KisCompositionBenchmark::compareAlphaDarkenOpsNoMask()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoBgrU8Traits>(cs);

    QVERIFY(compareTwoOps(false, opAct, opExp));

    delete opExp;
    delete opAct;
}

625 626 627 628 629 630 631 632 633
void KisCompositionBenchmark::compareOverOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpOver<KoBgrU8Traits>(cs);

    QVERIFY(compareTwoOps(true, opAct, opExp));

    delete opExp;
634 635 636 637 638 639 640 641 642 643 644 645
    delete opAct;
}

void KisCompositionBenchmark::compareOverOpsNoMask()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpOver<KoBgrU8Traits>(cs);

    QVERIFY(compareTwoOps(false, opAct, opExp));

    delete opExp;
646
    delete opAct;
647 648
}

649 650 651 652 653 654 655 656 657 658 659 660
void KisCompositionBenchmark::compareRgbF32OverOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp128(cs);
    KoCompositeOp *opExp = new KoCompositeOpOver<KoRgbF32Traits>(cs);

    QVERIFY(compareTwoOps(false, opAct, opExp));

    delete opExp;
    delete opAct;
}

661
void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenLegacy()
662 663 664
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = new KoCompositeOpAlphaDarken<KoBgrU8Traits>(cs);
665
    benchmarkCompositeOp(op, "Legacy");
666 667 668
    delete op;
}

669
void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenOptimized()
670 671 672
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createAlphaDarkenOp32(cs);
673
    benchmarkCompositeOp(op, "Optimized");
674 675 676
    delete op;
}

677
void KisCompositionBenchmark::testRgb8CompositeOverLegacy()
678 679 680
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = new KoCompositeOpOver<KoBgrU8Traits>(cs);
681
    benchmarkCompositeOp(op, "Legacy");
682 683 684
    delete op;
}

685
void KisCompositionBenchmark::testRgb8CompositeOverOptimized()
686 687 688
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createOverOp32(cs);
689
    benchmarkCompositeOp(op, "Optimized");
690 691 692
    delete op;
}

693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
void KisCompositionBenchmark::testRgbF32CompositeOverLegacy()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
    KoCompositeOp *op = new KoCompositeOpOver<KoRgbF32Traits>(cs);
    benchmarkCompositeOp(op, "RGBF32 Legacy");
    delete op;
}

void KisCompositionBenchmark::testRgbF32CompositeOverOptimized()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createOverOp128(cs);
    benchmarkCompositeOp(op, "RGBF32 Optimized");
    delete op;
}

709
void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenReal_Aligned()
710 711
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
712 713
    const KoCompositeOp *op = cs->compositeOp(COMPOSITE_ALPHA_DARKEN);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
714 715
}

716
void KisCompositionBenchmark::testRgb8CompositeOverReal_Aligned()
717 718
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
719 720
    const KoCompositeOp *op = cs->compositeOp(COMPOSITE_OVER);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
721 722
}

723 724 725 726 727 728 729
void KisCompositionBenchmark::testRgb8CompositeCopyLegacy()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    const KoCompositeOp *op = cs->compositeOp(COMPOSITE_COPY);
    benchmarkCompositeOp(op, "Copy");
}

730
void KisCompositionBenchmark::benchmarkMemcpy()
731
{
732
    QVector<Tile> tiles =
733
        generateTiles(numTiles, 0, 0, ALPHA_UNIT, ALPHA_UNIT, 4);
734

735
    QBENCHMARK_ONCE {
736
        Q_FOREACH (const Tile &tile, tiles) {
737 738 739
            memcpy(tile.dst, tile.src, 4 * numPixels);
        }
    }
740

741
    freeTiles(tiles, 0, 0);
742 743
}

744 745
#ifdef HAVE_VC
    const int vecSize = Vc::float_v::Size;
746 747 748 749
    const size_t uint8VecAlignment = qMax(vecSize * sizeof(quint8), sizeof(void*));
    const size_t uint32VecAlignment = qMax(vecSize * sizeof(quint32), sizeof(void*));
    const size_t floatVecAlignment = qMax(vecSize * sizeof(float), sizeof(void*));
#endif
750

751 752 753
void KisCompositionBenchmark::benchmarkUintFloat()
{
#ifdef HAVE_VC
754
    const int dataSize = 4096;
755
    void *ptr = NULL;
756 757 758 759
    int error = posix_memalign(&ptr, uint8VecAlignment, dataSize);
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
760
    quint8 *iData = (quint8*)ptr;
761 762 763 764
    error = posix_memalign(&ptr, floatVecAlignment, dataSize * sizeof(float));
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
765
    float *fData = (float*)ptr;
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784

    QBENCHMARK {
        for (int i = 0; i < dataSize; i += Vc::float_v::Size) {
            // convert uint -> float directly, this causes
            // static_cast helper be called
            Vc::float_v b(Vc::uint_v(iData + i));
            b.store(fData + i);
        }
    }

    free(iData);
    free(fData);
#endif
}

void KisCompositionBenchmark::benchmarkUintIntFloat()
{
#ifdef HAVE_VC
    const int dataSize = 4096;
785
    void *ptr = NULL;
786 787 788 789
    int error = posix_memalign(&ptr, uint8VecAlignment, dataSize);
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
790
    quint8 *iData = (quint8*)ptr;
791 792 793 794
    error = posix_memalign(&ptr, floatVecAlignment, dataSize * sizeof(float));
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
795
    float *fData = (float*)ptr;
796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814

    QBENCHMARK {
        for (int i = 0; i < dataSize; i += Vc::float_v::Size) {
            // convert uint->int->float, that avoids special sign
            // treating, and gives 2.6 times speedup
            Vc::float_v b(Vc::int_v(Vc::uint_v(iData + i)));
            b.store(fData + i);
        }
    }

    free(iData);
    free(fData);
#endif
}

void KisCompositionBenchmark::benchmarkFloatUint()
{
#ifdef HAVE_VC
    const int dataSize = 4096;
815
    void *ptr = NULL;
816 817 818 819
    int error = posix_memalign(&ptr, uint32VecAlignment, dataSize * sizeof(quint32));
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
820
    quint32 *iData = (quint32*)ptr;
821 822 823 824
    error = posix_memalign(&ptr, floatVecAlignment, dataSize * sizeof(float));
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
825
    float *fData = (float*)ptr;
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844

    QBENCHMARK {
        for (int i = 0; i < dataSize; i += Vc::float_v::Size) {
            // conversion float -> uint
            Vc::uint_v b(Vc::float_v(fData + i));

            b.store(iData + i);
        }
    }

    free(iData);
    free(fData);
#endif
}

void KisCompositionBenchmark::benchmarkFloatIntUint()
{
#ifdef HAVE_VC
    const int dataSize = 4096;
845
    void *ptr = NULL;
846 847 848 849
    int error = posix_memalign(&ptr, uint32VecAlignment, dataSize * sizeof(quint32));
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
850
    quint32 *iData = (quint32*)ptr;
851 852 853 854
    error = posix_memalign(&ptr, floatVecAlignment, dataSize * sizeof(float));
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
855
    float *fData = (float*)ptr;
856 857 858 859 860 861 862 863 864 865 866 867 868 869 870

    QBENCHMARK {
        for (int i = 0; i < dataSize; i += Vc::float_v::Size) {
            // conversion float -> int -> uint
            Vc::uint_v b(Vc::int_v(Vc::float_v(fData + i)));

            b.store(iData + i);
        }
    }

    free(iData);
    free(fData);
#endif
}

871
QTEST_MAIN(KisCompositionBenchmark)
872