kis_composition_benchmark.cpp 29.4 KB
Newer Older
1 2
/*
 *  Copyright (c) 2012 Dmitry Kazakov <dimula73@gmail.com>
3
 *  Copyright (c) 2015 Thorsten Zachmann <zachmann@kde.org>
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

// for calculation of the needed alignment
Yuri Chornoivan's avatar
Yuri Chornoivan committed
21
#include <config-vc.h>
22
#ifdef HAVE_VC
23 24 25 26 27 28
#if defined _MSC_VER
// Lets shut up the "possible loss of data" and "forcing value to bool 'true' or 'false'
#pragma warning ( push )
#pragma warning ( disable : 4244 )
#pragma warning ( disable : 4800 )
#endif
29 30
#include <Vc/Vc>
#include <Vc/IO>
31 32 33
#if defined _MSC_VER
#pragma warning ( pop )
#endif
34

35 36 37 38 39
#include <KoOptimizedCompositeOpOver32.h>
#include <KoOptimizedCompositeOpOver128.h>
#include <KoOptimizedCompositeOpAlphaDarken32.h>
#endif

40 41 42 43 44 45 46 47 48 49
#include "kis_composition_benchmark.h"
#include <QTest>

#include <KoColorSpace.h>
#include <KoCompositeOp.h>
#include <KoColorSpaceRegistry.h>

#include <KoColorSpaceTraits.h>
#include <KoCompositeOpAlphaDarken.h>
#include <KoCompositeOpOver.h>
50 51
#include <KoOptimizedCompositeOpFactory.h>
#include <KoAlphaDarkenParamsWrapper.h>
52

53 54
// for posix_memalign()
#include <stdlib.h>
55

56 57
#include <kis_debug.h>

58 59 60 61 62 63 64 65
#if defined _MSC_VER
#define MEMALIGN_ALLOC(p, a, s) ((*(p)) = _aligned_malloc((s), (a)), *(p) ? 0 : errno)
#define MEMALIGN_FREE(p) _aligned_free((p))
#else
#define MEMALIGN_ALLOC(p, a, s) posix_memalign((p), (a), (s))
#define MEMALIGN_FREE(p) free((p))
#endif

66 67 68 69 70 71
enum AlphaRange {
    ALPHA_ZERO,
    ALPHA_UNIT,
    ALPHA_RANDOM
};

72 73 74 75

template <typename channel_type, class RandomGenerator>
inline channel_type generateAlphaValue(AlphaRange range, RandomGenerator &rnd) {
    channel_type value = 0;
76 77 78 79 80

    switch (range) {
    case ALPHA_ZERO:
        break;
    case ALPHA_UNIT:
81
        value = rnd.unit();
82 83
        break;
    case ALPHA_RANDOM:
84
        value = rnd();
85 86 87 88 89 90
        break;
    }

    return value;
}

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_smallint.hpp>
#include <boost/random/uniform_real.hpp>

template <typename channel_type>
struct RandomGenerator {
    channel_type operator() () {
        qFatal("Wrong template instantiation");
        return channel_type(0);
    }

    channel_type unit() {
        qFatal("Wrong template instantiation");
        return channel_type(0);
    }
};

template <>
struct RandomGenerator<quint8>
110
{
111 112 113 114 115 116 117 118 119 120 121 122 123
    RandomGenerator(int seed)
        : m_smallint(0,255),
          m_rnd(seed)
    {
    }

    quint8 operator() () {
        return m_smallint(m_rnd);
    }

    quint8 unit() {
        return KoColorSpaceMathsTraits<quint8>::unitValue;
    }
124

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
    boost::uniform_smallint<int> m_smallint;
    boost::mt11213b m_rnd;
};

template <>
struct RandomGenerator<float>
{
    RandomGenerator(int seed)
        : m_rnd(seed)
    {
    }

    float operator() () {
        //return float(m_rnd()) / float(m_rnd.max());
        return m_smallfloat(m_rnd);
    }

    float unit() {
        return KoColorSpaceMathsTraits<float>::unitValue;
    }
145

146 147 148
    boost::uniform_real<float> m_smallfloat;
    boost::mt11213b m_rnd;
};
149

150 151 152 153 154 155
template <>
struct RandomGenerator<double> : RandomGenerator<float>
{
    RandomGenerator(int seed)
        : RandomGenerator<float>(seed)
    {
156
    }
157 158 159 160 161 162 163
};


template <typename channel_type>
void generateDataLine(uint seed, int numPixels, quint8 *srcPixels, quint8 *dstPixels, quint8 *mask, AlphaRange srcAlphaRange, AlphaRange dstAlphaRange)
{
    Q_ASSERT(numPixels >= 4);
164

165 166 167 168 169
    RandomGenerator<channel_type> rnd(seed);
    RandomGenerator<quint8> maskRnd(seed + 1);

    channel_type *srcArray = reinterpret_cast<channel_type*>(srcPixels);
    channel_type *dstArray = reinterpret_cast<channel_type*>(dstPixels);
170 171

    for (int i = 0; i < numPixels; i++) {
172
        for (int j = 0; j < 3; j++) {
173 174 175 176
            channel_type s = rnd();
            channel_type d = rnd();
            *(srcArray++) = s;
            *(dstArray++) = d;
177
        }
178

179 180 181 182
        channel_type sa = generateAlphaValue<channel_type>(srcAlphaRange, rnd);
        channel_type da = generateAlphaValue<channel_type>(dstAlphaRange, rnd);
        *(srcArray++) = sa;
        *(dstArray++) = da;
183

184
        *(mask++) = maskRnd();
185 186 187 188 189 190
    }
}

void printData(int numPixels, quint8 *srcPixels, quint8 *dstPixels, quint8 *mask)
{
    for (int i = 0; i < numPixels; i++) {
191
        qDebug() << "Src: "
192 193 194 195 196 197
                 << srcPixels[i*4] << "\t"
                 << srcPixels[i*4+1] << "\t"
                 << srcPixels[i*4+2] << "\t"
                 << srcPixels[i*4+3] << "\t"
                 << "Msk:" << mask[i];

198
        qDebug() << "Dst: "
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
                 << dstPixels[i*4] << "\t"
                 << dstPixels[i*4+1] << "\t"
                 << dstPixels[i*4+2] << "\t"
                 << dstPixels[i*4+3];
    }
}

const int rowStride = 64;
const int totalRows = 64;
const QRect processRect(0,0,64,64);
const int numPixels = rowStride * totalRows;
const int numTiles = 1024;


struct Tile {
    quint8 *src;
    quint8 *dst;
    quint8 *mask;
};
218 219 220
#include <stdint.h>
QVector<Tile> generateTiles(int size,
                            const int srcAlignmentShift,
221 222
                            const int dstAlignmentShift,
                            AlphaRange srcAlphaRange,
223 224
                            AlphaRange dstAlphaRange,
                            const quint32 pixelSize)
225
{
226
    QVector<Tile> tiles(size);
227 228

#ifdef HAVE_VC
229
    const int vecSize = Vc::float_v::size();
230 231 232 233
#else
    const int vecSize = 1;
#endif

234
    // the 256 are used to make sure that we have a good alignment no matter what build options are used.
235 236
    const size_t pixelAlignment = qMax(size_t(vecSize * sizeof(float)), size_t(256));
    const size_t maskAlignment = qMax(size_t(vecSize), size_t(256));
237
    for (int i = 0; i < size; i++) {
238
        void *ptr = 0;
239
        int error = MEMALIGN_ALLOC(&ptr, pixelAlignment, numPixels * pixelSize + srcAlignmentShift);
240 241 242
        if (error) {
            qFatal("posix_memalign failed: %d", error);
        }
243
        tiles[i].src = (quint8*)ptr + srcAlignmentShift;
244
        error = MEMALIGN_ALLOC(&ptr, pixelAlignment, numPixels * pixelSize + dstAlignmentShift);
245 246 247
        if (error) {
            qFatal("posix_memalign failed: %d", error);
        }
248
        tiles[i].dst = (quint8*)ptr + dstAlignmentShift;
249
        error = MEMALIGN_ALLOC(&ptr, maskAlignment, numPixels);
250 251 252
        if (error) {
            qFatal("posix_memalign failed: %d", error);
        }
253
        tiles[i].mask = (quint8*)ptr;
254 255 256 257 258 259 260 261

        if (pixelSize == 4) {
            generateDataLine<quint8>(1, numPixels, tiles[i].src, tiles[i].dst, tiles[i].mask, srcAlphaRange, dstAlphaRange);
        } else if (pixelSize == 16) {
            generateDataLine<float>(1, numPixels, tiles[i].src, tiles[i].dst, tiles[i].mask, srcAlphaRange, dstAlphaRange);
        } else {
            qFatal("Pixel size %i is not implemented", pixelSize);
        }
262 263
    }

264 265 266 267 268 269 270
    return tiles;
}

void freeTiles(QVector<Tile> tiles,
               const int srcAlignmentShift,
               const int dstAlignmentShift)
{
271
    Q_FOREACH (const Tile &tile, tiles) {
272 273 274
        MEMALIGN_FREE(tile.src - srcAlignmentShift);
        MEMALIGN_FREE(tile.dst - dstAlignmentShift);
        MEMALIGN_FREE(tile.mask);
275 276 277
    }
}

278 279
template <typename channel_type>
inline bool fuzzyCompare(channel_type a, channel_type b, channel_type prec) {
280 281 282
    return qAbs(a - b) <= prec;
}

283 284
template <typename channel_type>
inline bool comparePixels(channel_type *p1, channel_type *p2, channel_type prec) {
285 286 287 288 289
    return (p1[3] == p2[3] && p1[3] == 0) ||
        (fuzzyCompare(p1[0], p2[0], prec) &&
         fuzzyCompare(p1[1], p2[1], prec) &&
         fuzzyCompare(p1[2], p2[2], prec) &&
         fuzzyCompare(p1[3], p2[3], prec));
290 291 292 293 294 295 296 297 298 299 300 301
}

template <typename channel_type>
bool compareTwoOpsPixels(QVector<Tile> &tiles, channel_type prec) {
    channel_type *dst1 = reinterpret_cast<channel_type*>(tiles[0].dst);
    channel_type *dst2 = reinterpret_cast<channel_type*>(tiles[1].dst);

    channel_type *src1 = reinterpret_cast<channel_type*>(tiles[0].src);
    channel_type *src2 = reinterpret_cast<channel_type*>(tiles[1].src);

    for (int i = 0; i < numPixels; i++) {
        if (!comparePixels<channel_type>(dst1, dst2, prec)) {
302 303 304 305
            qDebug() << "Wrong result:" << i;
            qDebug() << "Act: " << dst1[0] << dst1[1] << dst1[2] << dst1[3];
            qDebug() << "Exp: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
            qDebug() << "Dif: " << dst1[0] - dst2[0] << dst1[1] - dst2[1] << dst1[2] - dst2[2] << dst1[3] - dst2[3];
306 307 308 309

            channel_type *s1 = src1 + 4 * i;
            channel_type *s2 = src2 + 4 * i;

310 311
            qDebug() << "SrcA:" << s1[0] << s1[1] << s1[2] << s1[3];
            qDebug() << "SrcE:" << s2[0] << s2[1] << s2[2] << s2[3];
312

313 314
            qDebug() << "MskA:" << tiles[0].mask[i];
            qDebug() << "MskE:" << tiles[1].mask[i];
315

316 317 318 319 320 321
            return false;
        }
        dst1 += 4;
        dst2 += 4;
    }
    return true;
322 323
}

324 325
bool compareTwoOps(bool haveMask, const KoCompositeOp *op1, const KoCompositeOp *op2)
{
326 327 328 329
    Q_ASSERT(op1->colorSpace()->pixelSize() == op2->colorSpace()->pixelSize());
    const quint32 pixelSize = op1->colorSpace()->pixelSize();
    const int alignment = 16;
    QVector<Tile> tiles = generateTiles(2, alignment, alignment, ALPHA_RANDOM, ALPHA_RANDOM, op1->colorSpace()->pixelSize());
330 331 332 333 334 335 336

    KoCompositeOp::ParameterInfo params;
    params.dstRowStride  = 4 * rowStride;
    params.srcRowStride  = 4 * rowStride;
    params.maskRowStride = rowStride;
    params.rows          = processRect.height();
    params.cols          = processRect.width();
337 338
    // This is a hack as in the old version we get a rounding of opacity to this value
    params.opacity       = float(Arithmetic::scale<quint8>(0.5*1.0f))/255.0;
339 340 341 342 343 344 345 346 347 348 349 350 351
    params.flow          = 0.3*1.0f;
    params.channelFlags  = QBitArray();

    params.dstRowStart   = tiles[0].dst;
    params.srcRowStart   = tiles[0].src;
    params.maskRowStart  = haveMask ? tiles[0].mask : 0;
    op1->composite(params);

    params.dstRowStart   = tiles[1].dst;
    params.srcRowStart   = tiles[1].src;
    params.maskRowStart  = haveMask ? tiles[1].mask : 0;
    op2->composite(params);

352 353 354 355 356
    bool compareResult = true;
    if (pixelSize == 4) {
        compareResult = compareTwoOpsPixels<quint8>(tiles, 10);
    }
    else if (pixelSize == 16) {
357
        compareResult = compareTwoOpsPixels<float>(tiles, 2e-7);
358 359 360
    }
    else {
        qFatal("Pixel size %i is not implemented", pixelSize);
361 362
    }

363
    freeTiles(tiles, alignment, alignment);
364

365
    return compareResult;
366 367
}

368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
QString getTestName(bool haveMask,
                    const int srcAlignmentShift,
                    const int dstAlignmentShift,
                    AlphaRange srcAlphaRange,
                    AlphaRange dstAlphaRange)
{

    QString testName;
    testName +=
        !srcAlignmentShift && !dstAlignmentShift ? "Aligned   " :
        !srcAlignmentShift &&  dstAlignmentShift ? "SrcUnalig " :
         srcAlignmentShift && !dstAlignmentShift ? "DstUnalig " :
         srcAlignmentShift &&  dstAlignmentShift ? "Unaligned " : "###";

    testName += haveMask ? "Mask   " : "NoMask ";

    testName +=
        srcAlphaRange == ALPHA_RANDOM ? "SrcRand " :
        srcAlphaRange == ALPHA_ZERO   ? "SrcZero " :
        srcAlphaRange == ALPHA_UNIT   ? "SrcUnit " : "###";

    testName +=
        dstAlphaRange == ALPHA_RANDOM ? "DstRand" :
        dstAlphaRange == ALPHA_ZERO   ? "DstZero" :
        dstAlphaRange == ALPHA_UNIT   ? "DstUnit" : "###";

    return testName;
}

397 398
void benchmarkCompositeOp(const KoCompositeOp *op,
                          bool haveMask,
399 400
                          qreal opacity,
                          qreal flow,
401
                          const int srcAlignmentShift,
402 403 404
                          const int dstAlignmentShift,
                          AlphaRange srcAlphaRange,
                          AlphaRange dstAlphaRange)
405
{
406 407
    QString testName = getTestName(haveMask, srcAlignmentShift, dstAlignmentShift, srcAlphaRange, dstAlphaRange);

408
    QVector<Tile> tiles =
409
        generateTiles(numTiles, srcAlignmentShift, dstAlignmentShift, srcAlphaRange, dstAlphaRange, op->colorSpace()->pixelSize());
410 411 412 413 414 415 416 417 418

    const int tileOffset = 4 * (processRect.y() * rowStride + processRect.x());

    KoCompositeOp::ParameterInfo params;
    params.dstRowStride  = 4 * rowStride;
    params.srcRowStride  = 4 * rowStride;
    params.maskRowStride = rowStride;
    params.rows          = processRect.height();
    params.cols          = processRect.width();
419 420
    params.opacity       = opacity;
    params.flow          = flow;
421 422
    params.channelFlags  = QBitArray();

423 424 425
    QTime timer;
    timer.start();

426
    Q_FOREACH (const Tile &tile, tiles) {
427 428 429 430
        params.dstRowStart   = tile.dst + tileOffset;
        params.srcRowStart   = tile.src + tileOffset;
        params.maskRowStart  = haveMask ? tile.mask : 0;
        op->composite(params);
431 432
    }

433
    qDebug() << testName << "RESULT:" << timer.elapsed() << "msec";
434

435 436 437
    freeTiles(tiles, srcAlignmentShift, dstAlignmentShift);
}

438 439
void benchmarkCompositeOp(const KoCompositeOp *op, const QString &postfix)
{
440
    qDebug() << "Testing Composite Op:" << op->id() << "(" << postfix << ")";
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464

    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 8, 0, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 8, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 4, 8, ALPHA_RANDOM, ALPHA_RANDOM);

/// --- Vary the content of the source and destination

    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_RANDOM);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_RANDOM);

/// ---

    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_ZERO);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_ZERO);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_ZERO);

/// ---

    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_UNIT);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_UNIT);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_UNIT);
}
465 466 467 468

#ifdef HAVE_VC

template<class Compositor>
469
void checkRounding(qreal opacity, qreal flow, qreal averageOpacity = -1, quint32 pixelSize = 4)
470 471
{
    QVector<Tile> tiles =
472
        generateTiles(2, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM, pixelSize);
473

474
    const int vecSize = Vc::float_v::size();
475 476 477 478 479 480 481 482 483 484 485

    const int numBlocks = numPixels / vecSize;

    quint8 *src1 = tiles[0].src;
    quint8 *dst1 = tiles[0].dst;
    quint8 *msk1 = tiles[0].mask;

    quint8 *src2 = tiles[1].src;
    quint8 *dst2 = tiles[1].dst;
    quint8 *msk2 = tiles[1].mask;

486 487 488 489 490 491 492 493 494 495
    KoCompositeOp::ParameterInfo params;
    params.opacity = opacity;
    params.flow = flow;

    if (averageOpacity >= 0.0) {
        params._lastOpacityData = averageOpacity;
        params.lastOpacity = &params._lastOpacityData;
    }

    params.channelFlags = QBitArray();
496
    typename Compositor::ParamsWrapper paramsWrapper(params);
497

498 499
    // The error count is needed as 38.5 gets rounded to 38 instead of 39 in the vc version.
    int errorcount = 0;
500
    for (int i = 0; i < numBlocks; i++) {
501
        Compositor::template compositeVector<true,true, Vc::CurrentImplementation::current()>(src1, dst1, msk1, params.opacity, paramsWrapper);
502 503
        for (int j = 0; j < vecSize; j++) {

504
            //if (8 * i + j == 7080) {
505 506 507
            //    qDebug() << "src: " << src2[0] << src2[1] << src2[2] << src2[3];
            //    qDebug() << "dst: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
            //    qDebug() << "msk:" << msk2[0];
508 509
            //}

510
            Compositor::template compositeOnePixelScalar<true, Vc::CurrentImplementation::current()>(src2, dst2, msk2, params.opacity, paramsWrapper);
511

512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
            bool compareResult = true;
            if (pixelSize == 4) {
                compareResult = comparePixels<quint8>(dst1, dst2, 0);
                if (!compareResult) {
                    ++errorcount;
                    compareResult = comparePixels<quint8>(dst1, dst2, 1);
                    if (!compareResult) {
                        ++errorcount;
                    }
                }
            }
            else if (pixelSize == 16) {
                compareResult = comparePixels<float>(reinterpret_cast<float*>(dst1), reinterpret_cast<float*>(dst2), 0);
            }
            else {
                qFatal("Pixel size %i is not implemented", pixelSize);
            }

            if(!compareResult || errorcount > 1) {
531 532 533
                qDebug() << "Wrong rounding in pixel:" << 8 * i + j;
                qDebug() << "Vector version: " << dst1[0] << dst1[1] << dst1[2] << dst1[3];
                qDebug() << "Scalar version: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
534

535 536
                qDebug() << "src:" << src1[0] << src1[1] << src1[2] << src1[3];
                qDebug() << "msk:" << msk1[0];
537 538 539 540

                QFAIL("Wrong rounding");
            }

541 542 543 544
            src1 += pixelSize;
            dst1 += pixelSize;
            src2 += pixelSize;
            dst2 += pixelSize;
545 546 547 548 549 550 551 552 553 554 555
            msk1++;
            msk2++;
        }
    }

    freeTiles(tiles, 0, 0);
}

#endif


556 557 558
void KisCompositionBenchmark::checkRoundingAlphaDarken_05_03()
{
#ifdef HAVE_VC
559
    checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,0.3);
560 561 562 563 564 565
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_05()
{
#ifdef HAVE_VC
566
    checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,0.5);
567 568 569 570 571 572
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_07()
{
#ifdef HAVE_VC
573
    checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,0.7);
574 575 576 577 578 579
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_10()
{
#ifdef HAVE_VC
580
    checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,1.0);
581 582 583 584
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_10_08()
585 586
{
#ifdef HAVE_VC
587
    checkRounding<AlphaDarkenCompositor32<quint8, quint32, KoAlphaDarkenParamsWrapperCreamy> >(0.5,1.0,0.8);
588 589 590
#endif
}

591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625
void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_03()
{
#ifdef HAVE_VC
    checkRounding<OverCompositor128<float, float, false, true> >(0.5, 0.3, -1, 16);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_05()
{
#ifdef HAVE_VC
    checkRounding<OverCompositor128<float, float, false, true> >(0.5, 0.5, -1, 16);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_07()
{
#ifdef HAVE_VC
    checkRounding<OverCompositor128<float, float, false, true> >(0.5, 0.7, -1, 16);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_10()
{
#ifdef HAVE_VC
    checkRounding<OverCompositor128<float, float, false, true> >(0.5, 1.0, -1, 16);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarkenF32_05_10_08()
{
#ifdef HAVE_VC
    checkRounding<OverCompositor128<float, float, false, true> >(0.5, 1.0, 0.8, 16);
#endif
}

626 627 628
void KisCompositionBenchmark::checkRoundingOver()
{
#ifdef HAVE_VC
629
    checkRounding<OverCompositor32<quint8, quint32, false, true> >(0.5, 0.3);
630 631 632
#endif
}

633 634 635 636 637 638 639
void KisCompositionBenchmark::checkRoundingOverRgbaF32()
{
#ifdef HAVE_VC
    checkRounding<OverCompositor128<float, float, false, true> >(0.5, 0.3, -1, 16);
#endif
}

640 641 642
void KisCompositionBenchmark::compareAlphaDarkenOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
643
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy32(cs);
644
    KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoBgrU8Traits, KoAlphaDarkenParamsWrapperCreamy>(cs);
645

646
    QVERIFY(compareTwoOps(true, opAct, opExp));
647 648 649 650 651

    delete opExp;
    delete opAct;
}

652 653 654
void KisCompositionBenchmark::compareRgbF32AlphaDarkenOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
655
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy128(cs);
656
    KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoRgbF32Traits, KoAlphaDarkenParamsWrapperCreamy>(cs);
657 658 659 660 661 662 663

    QVERIFY(compareTwoOps(true, opAct, opExp));

    delete opExp;
    delete opAct;
}

664 665 666
void KisCompositionBenchmark::compareAlphaDarkenOpsNoMask()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
667
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy32(cs);
668
    KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoBgrU8Traits, KoAlphaDarkenParamsWrapperCreamy>(cs);
669 670 671 672 673 674 675

    QVERIFY(compareTwoOps(false, opAct, opExp));

    delete opExp;
    delete opAct;
}

676 677 678 679 680 681 682 683 684
void KisCompositionBenchmark::compareOverOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpOver<KoBgrU8Traits>(cs);

    QVERIFY(compareTwoOps(true, opAct, opExp));

    delete opExp;
685 686 687 688 689 690 691 692 693 694 695 696
    delete opAct;
}

void KisCompositionBenchmark::compareOverOpsNoMask()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpOver<KoBgrU8Traits>(cs);

    QVERIFY(compareTwoOps(false, opAct, opExp));

    delete opExp;
697
    delete opAct;
698 699
}

700 701 702 703 704 705 706 707 708 709 710 711
void KisCompositionBenchmark::compareRgbF32OverOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp128(cs);
    KoCompositeOp *opExp = new KoCompositeOpOver<KoRgbF32Traits>(cs);

    QVERIFY(compareTwoOps(false, opAct, opExp));

    delete opExp;
    delete opAct;
}

712
void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenLegacy()
713 714
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
715
    KoCompositeOp *op = new KoCompositeOpAlphaDarken<KoBgrU8Traits, KoAlphaDarkenParamsWrapperCreamy>(cs);
716
    benchmarkCompositeOp(op, "Legacy");
717 718 719
    delete op;
}

720
void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenOptimized()
721 722
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
723
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy32(cs);
724
    benchmarkCompositeOp(op, "Optimized");
725 726 727
    delete op;
}

728
void KisCompositionBenchmark::testRgb8CompositeOverLegacy()
729 730 731
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = new KoCompositeOpOver<KoBgrU8Traits>(cs);
732
    benchmarkCompositeOp(op, "Legacy");
733 734 735
    delete op;
}

736
void KisCompositionBenchmark::testRgb8CompositeOverOptimized()
737 738 739
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createOverOp32(cs);
740
    benchmarkCompositeOp(op, "Optimized");
741 742 743
    delete op;
}

744 745 746
void KisCompositionBenchmark::testRgbF32CompositeAlphaDarkenLegacy()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
747
    KoCompositeOp *op = new KoCompositeOpAlphaDarken<KoRgbF32Traits, KoAlphaDarkenParamsWrapperCreamy>(cs);
748 749 750 751 752 753 754
    benchmarkCompositeOp(op, "Legacy");
    delete op;
}

void KisCompositionBenchmark::testRgbF32CompositeAlphaDarkenOptimized()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
755
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createAlphaDarkenOpCreamy128(cs);
756 757 758 759
    benchmarkCompositeOp(op, "Optimized");
    delete op;
}

760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775
void KisCompositionBenchmark::testRgbF32CompositeOverLegacy()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
    KoCompositeOp *op = new KoCompositeOpOver<KoRgbF32Traits>(cs);
    benchmarkCompositeOp(op, "RGBF32 Legacy");
    delete op;
}

void KisCompositionBenchmark::testRgbF32CompositeOverOptimized()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createOverOp128(cs);
    benchmarkCompositeOp(op, "RGBF32 Optimized");
    delete op;
}

776
void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenReal_Aligned()
777 778
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
779 780
    const KoCompositeOp *op = cs->compositeOp(COMPOSITE_ALPHA_DARKEN);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
781 782
}

783
void KisCompositionBenchmark::testRgb8CompositeOverReal_Aligned()
784 785
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
786 787
    const KoCompositeOp *op = cs->compositeOp(COMPOSITE_OVER);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
788 789
}

790 791 792 793 794 795 796
void KisCompositionBenchmark::testRgb8CompositeCopyLegacy()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    const KoCompositeOp *op = cs->compositeOp(COMPOSITE_COPY);
    benchmarkCompositeOp(op, "Copy");
}

797
void KisCompositionBenchmark::benchmarkMemcpy()
798
{
799
    QVector<Tile> tiles =
800
        generateTiles(numTiles, 0, 0, ALPHA_UNIT, ALPHA_UNIT, 4);
801

802
    QBENCHMARK_ONCE {
803
        Q_FOREACH (const Tile &tile, tiles) {
804 805 806
            memcpy(tile.dst, tile.src, 4 * numPixels);
        }
    }
807

808
    freeTiles(tiles, 0, 0);
809 810
}

811
#ifdef HAVE_VC
812
    const int vecSize = Vc::float_v::size();
813 814 815 816
    const size_t uint8VecAlignment = qMax(vecSize * sizeof(quint8), sizeof(void*));
    const size_t uint32VecAlignment = qMax(vecSize * sizeof(quint32), sizeof(void*));
    const size_t floatVecAlignment = qMax(vecSize * sizeof(float), sizeof(void*));
#endif
817

818 819 820
void KisCompositionBenchmark::benchmarkUintFloat()
{
#ifdef HAVE_VC
821 822
    using uint_v = Vc::SimdArray<unsigned int, Vc::float_v::size()>;

823
    const int dataSize = 4096;
824
    void *ptr = 0;
825
    int error = MEMALIGN_ALLOC(&ptr, uint8VecAlignment, dataSize);
826 827 828
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
829
    quint8 *iData = (quint8*)ptr;
830
    error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float));
831 832 833
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
834
    float *fData = (float*)ptr;
835 836

    QBENCHMARK {
837
        for (int i = 0; i < dataSize; i += Vc::float_v::size()) {
838 839
            // convert uint -> float directly, this causes
            // static_cast helper be called
840
            Vc::float_v b(uint_v(iData + i));
841 842 843 844
            b.store(fData + i);
        }
    }

845 846
    MEMALIGN_FREE(iData);
    MEMALIGN_FREE(fData);
847 848 849 850 851 852
#endif
}

void KisCompositionBenchmark::benchmarkUintIntFloat()
{
#ifdef HAVE_VC
853 854 855
    using int_v = Vc::SimdArray<int, Vc::float_v::size()>;
    using uint_v = Vc::SimdArray<unsigned int, Vc::float_v::size()>;

856
    const int dataSize = 4096;
857
    void *ptr = 0;
858
    int error = MEMALIGN_ALLOC(&ptr, uint8VecAlignment, dataSize);
859 860 861
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
862
    quint8 *iData = (quint8*)ptr;
863
    error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float));
864 865 866
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
867
    float *fData = (float*)ptr;
868 869

    QBENCHMARK {
870
        for (int i = 0; i < dataSize; i += Vc::float_v::size()) {
871 872
            // convert uint->int->float, that avoids special sign
            // treating, and gives 2.6 times speedup
873
            Vc::float_v b(int_v(uint_v(iData + i)));
874 875 876 877
            b.store(fData + i);
        }
    }

878 879
    MEMALIGN_FREE(iData);
    MEMALIGN_FREE(fData);
880 881 882 883 884 885
#endif
}

void KisCompositionBenchmark::benchmarkFloatUint()
{
#ifdef HAVE_VC
886 887
    using uint_v = Vc::SimdArray<unsigned int, Vc::float_v::size()>;

888
    const int dataSize = 4096;
889
    void *ptr = 0;
890
    int error = MEMALIGN_ALLOC(&ptr, uint32VecAlignment, dataSize * sizeof(quint32));
891 892 893
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
894
    quint32 *iData = (quint32*)ptr;
895
    error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float));
896 897 898
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
899
    float *fData = (float*)ptr;
900 901

    QBENCHMARK {
902
        for (int i = 0; i < dataSize; i += Vc::float_v::size()) {
903
            // conversion float -> uint
904
            uint_v b(Vc::float_v(fData + i));
905 906 907 908 909

            b.store(iData + i);
        }
    }

910 911
    MEMALIGN_FREE(iData);
    MEMALIGN_FREE(fData);
912 913 914 915 916 917
#endif
}

void KisCompositionBenchmark::benchmarkFloatIntUint()
{
#ifdef HAVE_VC
918 919 920
    using int_v = Vc::SimdArray<int, Vc::float_v::size()>;
    using uint_v = Vc::SimdArray<unsigned int, Vc::float_v::size()>;

921
    const int dataSize = 4096;
922
    void *ptr = 0;
923
    int error = MEMALIGN_ALLOC(&ptr, uint32VecAlignment, dataSize * sizeof(quint32));
924 925 926
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
927
    quint32 *iData = (quint32*)ptr;
928
    error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float));
929 930 931
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
932
    float *fData = (float*)ptr;
933 934

    QBENCHMARK {
935
        for (int i = 0; i < dataSize; i += Vc::float_v::size()) {
936
            // conversion float -> int -> uint
937
            uint_v b(int_v(Vc::float_v(fData + i)));
938 939 940 941 942

            b.store(iData + i);
        }
    }

943 944
    MEMALIGN_FREE(iData);
    MEMALIGN_FREE(fData);
945 946 947
#endif
}

948
QTEST_MAIN(KisCompositionBenchmark)
949