kis_composition_benchmark.cpp 26.7 KB
Newer Older
1 2
/*
 *  Copyright (c) 2012 Dmitry Kazakov <dimula73@gmail.com>
3
 *  Copyright (c) 2015 Thorsten Zachmann <zachmann@kde.org>
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

// for calculation of the needed alignment
Yuri Chornoivan's avatar
Yuri Chornoivan committed
21
#include <config-vc.h>
22
#ifdef HAVE_VC
23 24 25 26 27 28
#if defined _MSC_VER
// Lets shut up the "possible loss of data" and "forcing value to bool 'true' or 'false'
#pragma warning ( push )
#pragma warning ( disable : 4244 )
#pragma warning ( disable : 4800 )
#endif
29 30
#include <Vc/Vc>
#include <Vc/IO>
31 32 33
#if defined _MSC_VER
#pragma warning ( pop )
#endif
34

35 36 37 38 39 40 41 42 43 44 45 46 47
#include "kis_composition_benchmark.h"
#include <QTest>

#include <KoColorSpace.h>
#include <KoCompositeOp.h>
#include <KoColorSpaceRegistry.h>

#include <KoColorSpaceTraits.h>
#include <KoCompositeOpAlphaDarken.h>
#include <KoCompositeOpOver.h>
#include "KoOptimizedCompositeOpFactory.h"


48
#include <KoOptimizedCompositeOpOver32.h>
49
#include <KoOptimizedCompositeOpOver128.h>
50
#include <KoOptimizedCompositeOpAlphaDarken32.h>
51 52
#endif

53 54
// for posix_memalign()
#include <stdlib.h>
55

56 57
#include <kis_debug.h>

58 59 60 61 62 63 64 65
#if defined _MSC_VER
#define MEMALIGN_ALLOC(p, a, s) ((*(p)) = _aligned_malloc((s), (a)), *(p) ? 0 : errno)
#define MEMALIGN_FREE(p) _aligned_free((p))
#else
#define MEMALIGN_ALLOC(p, a, s) posix_memalign((p), (a), (s))
#define MEMALIGN_FREE(p) free((p))
#endif

66 67
const int alpha_pos = 3;

68 69 70 71 72 73
enum AlphaRange {
    ALPHA_ZERO,
    ALPHA_UNIT,
    ALPHA_RANDOM
};

74 75 76 77

template <typename channel_type, class RandomGenerator>
inline channel_type generateAlphaValue(AlphaRange range, RandomGenerator &rnd) {
    channel_type value = 0;
78 79 80 81 82

    switch (range) {
    case ALPHA_ZERO:
        break;
    case ALPHA_UNIT:
83
        value = rnd.unit();
84 85
        break;
    case ALPHA_RANDOM:
86
        value = rnd();
87 88 89 90 91 92
        break;
    }

    return value;
}

93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_smallint.hpp>
#include <boost/random/uniform_real.hpp>

template <typename channel_type>
struct RandomGenerator {
    channel_type operator() () {
        qFatal("Wrong template instantiation");
        return channel_type(0);
    }

    channel_type unit() {
        qFatal("Wrong template instantiation");
        return channel_type(0);
    }
};

template <>
struct RandomGenerator<quint8>
112
{
113 114 115 116 117 118 119 120 121 122 123 124 125
    RandomGenerator(int seed)
        : m_smallint(0,255),
          m_rnd(seed)
    {
    }

    quint8 operator() () {
        return m_smallint(m_rnd);
    }

    quint8 unit() {
        return KoColorSpaceMathsTraits<quint8>::unitValue;
    }
126

127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
    boost::uniform_smallint<int> m_smallint;
    boost::mt11213b m_rnd;
};

template <>
struct RandomGenerator<float>
{
    RandomGenerator(int seed)
        : m_rnd(seed)
    {
    }

    float operator() () {
        //return float(m_rnd()) / float(m_rnd.max());
        return m_smallfloat(m_rnd);
    }

    float unit() {
        return KoColorSpaceMathsTraits<float>::unitValue;
    }
147

148 149 150
    boost::uniform_real<float> m_smallfloat;
    boost::mt11213b m_rnd;
};
151

152 153 154 155 156 157
template <>
struct RandomGenerator<double> : RandomGenerator<float>
{
    RandomGenerator(int seed)
        : RandomGenerator<float>(seed)
    {
158
    }
159 160 161 162 163 164 165
};


template <typename channel_type>
void generateDataLine(uint seed, int numPixels, quint8 *srcPixels, quint8 *dstPixels, quint8 *mask, AlphaRange srcAlphaRange, AlphaRange dstAlphaRange)
{
    Q_ASSERT(numPixels >= 4);
166

167 168 169 170 171
    RandomGenerator<channel_type> rnd(seed);
    RandomGenerator<quint8> maskRnd(seed + 1);

    channel_type *srcArray = reinterpret_cast<channel_type*>(srcPixels);
    channel_type *dstArray = reinterpret_cast<channel_type*>(dstPixels);
172 173

    for (int i = 0; i < numPixels; i++) {
174
        for (int j = 0; j < 3; j++) {
175 176 177 178
            channel_type s = rnd();
            channel_type d = rnd();
            *(srcArray++) = s;
            *(dstArray++) = d;
179
        }
180

181 182 183 184
        channel_type sa = generateAlphaValue<channel_type>(srcAlphaRange, rnd);
        channel_type da = generateAlphaValue<channel_type>(dstAlphaRange, rnd);
        *(srcArray++) = sa;
        *(dstArray++) = da;
185

186
        *(mask++) = maskRnd();
187 188 189 190 191 192
    }
}

void printData(int numPixels, quint8 *srcPixels, quint8 *dstPixels, quint8 *mask)
{
    for (int i = 0; i < numPixels; i++) {
193
        dbgKrita << "Src: "
194 195 196 197 198 199
                 << srcPixels[i*4] << "\t"
                 << srcPixels[i*4+1] << "\t"
                 << srcPixels[i*4+2] << "\t"
                 << srcPixels[i*4+3] << "\t"
                 << "Msk:" << mask[i];

200
        dbgKrita << "Dst: "
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
                 << dstPixels[i*4] << "\t"
                 << dstPixels[i*4+1] << "\t"
                 << dstPixels[i*4+2] << "\t"
                 << dstPixels[i*4+3];
    }
}

const int rowStride = 64;
const int totalRows = 64;
const QRect processRect(0,0,64,64);
const int numPixels = rowStride * totalRows;
const int numTiles = 1024;


struct Tile {
    quint8 *src;
    quint8 *dst;
    quint8 *mask;
};
220 221 222
#include <stdint.h>
QVector<Tile> generateTiles(int size,
                            const int srcAlignmentShift,
223 224
                            const int dstAlignmentShift,
                            AlphaRange srcAlphaRange,
225 226
                            AlphaRange dstAlphaRange,
                            const quint32 pixelSize)
227
{
228
    QVector<Tile> tiles(size);
229 230 231 232 233 234 235

#ifdef HAVE_VC
    const int vecSize = Vc::float_v::Size;
#else
    const int vecSize = 1;
#endif

236
    // the 256 are used to make sure that we have a good alignment no matter what build options are used.
237 238
    const size_t pixelAlignment = qMax(size_t(vecSize * sizeof(float)), size_t(256));
    const size_t maskAlignment = qMax(size_t(vecSize), size_t(256));
239
    for (int i = 0; i < size; i++) {
240
        void *ptr = NULL;
241
        int error = MEMALIGN_ALLOC(&ptr, pixelAlignment, numPixels * pixelSize + srcAlignmentShift);
242 243 244
        if (error) {
            qFatal("posix_memalign failed: %d", error);
        }
245
        tiles[i].src = (quint8*)ptr + srcAlignmentShift;
246
        error = MEMALIGN_ALLOC(&ptr, pixelAlignment, numPixels * pixelSize + dstAlignmentShift);
247 248 249
        if (error) {
            qFatal("posix_memalign failed: %d", error);
        }
250
        tiles[i].dst = (quint8*)ptr + dstAlignmentShift;
251
        error = MEMALIGN_ALLOC(&ptr, maskAlignment, numPixels);
252 253 254
        if (error) {
            qFatal("posix_memalign failed: %d", error);
        }
255
        tiles[i].mask = (quint8*)ptr;
256 257 258 259 260 261 262 263

        if (pixelSize == 4) {
            generateDataLine<quint8>(1, numPixels, tiles[i].src, tiles[i].dst, tiles[i].mask, srcAlphaRange, dstAlphaRange);
        } else if (pixelSize == 16) {
            generateDataLine<float>(1, numPixels, tiles[i].src, tiles[i].dst, tiles[i].mask, srcAlphaRange, dstAlphaRange);
        } else {
            qFatal("Pixel size %i is not implemented", pixelSize);
        }
264 265
    }

266 267 268 269 270 271 272
    return tiles;
}

void freeTiles(QVector<Tile> tiles,
               const int srcAlignmentShift,
               const int dstAlignmentShift)
{
273
    Q_FOREACH (const Tile &tile, tiles) {
274 275 276
        MEMALIGN_FREE(tile.src - srcAlignmentShift);
        MEMALIGN_FREE(tile.dst - dstAlignmentShift);
        MEMALIGN_FREE(tile.mask);
277 278 279
    }
}

280 281
template <typename channel_type>
inline bool fuzzyCompare(channel_type a, channel_type b, channel_type prec) {
282 283 284
    return qAbs(a - b) <= prec;
}

285 286
template <typename channel_type>
inline bool comparePixels(channel_type *p1, channel_type *p2, channel_type prec) {
287 288 289 290 291
    return (p1[3] == p2[3] && p1[3] == 0) ||
        (fuzzyCompare(p1[0], p2[0], prec) &&
         fuzzyCompare(p1[1], p2[1], prec) &&
         fuzzyCompare(p1[2], p2[2], prec) &&
         fuzzyCompare(p1[3], p2[3], prec));
292 293 294 295 296 297 298 299 300 301 302 303
}

template <typename channel_type>
bool compareTwoOpsPixels(QVector<Tile> &tiles, channel_type prec) {
    channel_type *dst1 = reinterpret_cast<channel_type*>(tiles[0].dst);
    channel_type *dst2 = reinterpret_cast<channel_type*>(tiles[1].dst);

    channel_type *src1 = reinterpret_cast<channel_type*>(tiles[0].src);
    channel_type *src2 = reinterpret_cast<channel_type*>(tiles[1].src);

    for (int i = 0; i < numPixels; i++) {
        if (!comparePixels<channel_type>(dst1, dst2, prec)) {
304 305 306 307
            dbgKrita << "Wrong result:" << i;
            dbgKrita << "Act: " << dst1[0] << dst1[1] << dst1[2] << dst1[3];
            dbgKrita << "Exp: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
            dbgKrita << "Dif: " << dst1[0] - dst2[0] << dst1[1] - dst2[1] << dst1[2] - dst2[2] << dst1[3] - dst2[3];
308 309 310 311

            channel_type *s1 = src1 + 4 * i;
            channel_type *s2 = src2 + 4 * i;

312 313
            dbgKrita << "SrcA:" << s1[0] << s1[1] << s1[2] << s1[3];
            dbgKrita << "SrcE:" << s2[0] << s2[1] << s2[2] << s2[3];
314

315 316
            dbgKrita << "MskA:" << tiles[0].mask[i];
            dbgKrita << "MskE:" << tiles[1].mask[i];
317

318 319 320 321 322 323
            return false;
        }
        dst1 += 4;
        dst2 += 4;
    }
    return true;
324 325
}

326 327
bool compareTwoOps(bool haveMask, const KoCompositeOp *op1, const KoCompositeOp *op2)
{
328 329 330 331
    Q_ASSERT(op1->colorSpace()->pixelSize() == op2->colorSpace()->pixelSize());
    const quint32 pixelSize = op1->colorSpace()->pixelSize();
    const int alignment = 16;
    QVector<Tile> tiles = generateTiles(2, alignment, alignment, ALPHA_RANDOM, ALPHA_RANDOM, op1->colorSpace()->pixelSize());
332 333 334 335 336 337 338

    KoCompositeOp::ParameterInfo params;
    params.dstRowStride  = 4 * rowStride;
    params.srcRowStride  = 4 * rowStride;
    params.maskRowStride = rowStride;
    params.rows          = processRect.height();
    params.cols          = processRect.width();
339 340
    // This is a hack as in the old version we get a rounding of opacity to this value
    params.opacity       = float(Arithmetic::scale<quint8>(0.5*1.0f))/255.0;
341 342 343 344 345 346 347 348 349 350 351 352 353
    params.flow          = 0.3*1.0f;
    params.channelFlags  = QBitArray();

    params.dstRowStart   = tiles[0].dst;
    params.srcRowStart   = tiles[0].src;
    params.maskRowStart  = haveMask ? tiles[0].mask : 0;
    op1->composite(params);

    params.dstRowStart   = tiles[1].dst;
    params.srcRowStart   = tiles[1].src;
    params.maskRowStart  = haveMask ? tiles[1].mask : 0;
    op2->composite(params);

354 355 356 357 358 359 360 361 362
    bool compareResult = true;
    if (pixelSize == 4) {
        compareResult = compareTwoOpsPixels<quint8>(tiles, 10);
    }
    else if (pixelSize == 16) {
        compareResult = compareTwoOpsPixels<float>(tiles, 0);
    }
    else {
        qFatal("Pixel size %i is not implemented", pixelSize);
363 364
    }

365
    freeTiles(tiles, alignment, alignment);
366

367
    return compareResult;
368 369
}

370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
QString getTestName(bool haveMask,
                    const int srcAlignmentShift,
                    const int dstAlignmentShift,
                    AlphaRange srcAlphaRange,
                    AlphaRange dstAlphaRange)
{

    QString testName;
    testName +=
        !srcAlignmentShift && !dstAlignmentShift ? "Aligned   " :
        !srcAlignmentShift &&  dstAlignmentShift ? "SrcUnalig " :
         srcAlignmentShift && !dstAlignmentShift ? "DstUnalig " :
         srcAlignmentShift &&  dstAlignmentShift ? "Unaligned " : "###";

    testName += haveMask ? "Mask   " : "NoMask ";

    testName +=
        srcAlphaRange == ALPHA_RANDOM ? "SrcRand " :
        srcAlphaRange == ALPHA_ZERO   ? "SrcZero " :
        srcAlphaRange == ALPHA_UNIT   ? "SrcUnit " : "###";

    testName +=
        dstAlphaRange == ALPHA_RANDOM ? "DstRand" :
        dstAlphaRange == ALPHA_ZERO   ? "DstZero" :
        dstAlphaRange == ALPHA_UNIT   ? "DstUnit" : "###";

    return testName;
}

399 400
void benchmarkCompositeOp(const KoCompositeOp *op,
                          bool haveMask,
401 402
                          qreal opacity,
                          qreal flow,
403
                          const int srcAlignmentShift,
404 405 406
                          const int dstAlignmentShift,
                          AlphaRange srcAlphaRange,
                          AlphaRange dstAlphaRange)
407
{
408 409
    QString testName = getTestName(haveMask, srcAlignmentShift, dstAlignmentShift, srcAlphaRange, dstAlphaRange);

410
    QVector<Tile> tiles =
411
        generateTiles(numTiles, srcAlignmentShift, dstAlignmentShift, srcAlphaRange, dstAlphaRange, op->colorSpace()->pixelSize());
412 413 414 415 416 417 418 419 420

    const int tileOffset = 4 * (processRect.y() * rowStride + processRect.x());

    KoCompositeOp::ParameterInfo params;
    params.dstRowStride  = 4 * rowStride;
    params.srcRowStride  = 4 * rowStride;
    params.maskRowStride = rowStride;
    params.rows          = processRect.height();
    params.cols          = processRect.width();
421 422
    params.opacity       = opacity;
    params.flow          = flow;
423 424
    params.channelFlags  = QBitArray();

425 426 427
    QTime timer;
    timer.start();

428
    Q_FOREACH (const Tile &tile, tiles) {
429 430 431 432
        params.dstRowStart   = tile.dst + tileOffset;
        params.srcRowStart   = tile.src + tileOffset;
        params.maskRowStart  = haveMask ? tile.mask : 0;
        op->composite(params);
433 434
    }

435
    dbgKrita << testName << "RESULT:" << timer.elapsed() << "msec";
436

437 438 439
    freeTiles(tiles, srcAlignmentShift, dstAlignmentShift);
}

440 441
void benchmarkCompositeOp(const KoCompositeOp *op, const QString &postfix)
{
442
    dbgKrita << "Testing Composite Op:" << op->id() << "(" << postfix << ")";
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466

    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 8, 0, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 8, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 4, 8, ALPHA_RANDOM, ALPHA_RANDOM);

/// --- Vary the content of the source and destination

    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_RANDOM);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_RANDOM);

/// ---

    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_ZERO);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_ZERO);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_ZERO);

/// ---

    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_RANDOM, ALPHA_UNIT);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_ZERO, ALPHA_UNIT);
    benchmarkCompositeOp(op, false, 1.0, 1.0, 0, 0, ALPHA_UNIT, ALPHA_UNIT);
}
467 468 469 470

#ifdef HAVE_VC

template<class Compositor>
471
void checkRounding(qreal opacity, qreal flow, qreal averageOpacity = -1, quint32 pixelSize = 4)
472 473
{
    QVector<Tile> tiles =
474
        generateTiles(2, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM, pixelSize);
475 476 477 478 479 480 481 482 483 484 485 486 487

    const int vecSize = Vc::float_v::Size;

    const int numBlocks = numPixels / vecSize;

    quint8 *src1 = tiles[0].src;
    quint8 *dst1 = tiles[0].dst;
    quint8 *msk1 = tiles[0].mask;

    quint8 *src2 = tiles[1].src;
    quint8 *dst2 = tiles[1].dst;
    quint8 *msk2 = tiles[1].mask;

488 489 490 491 492 493 494 495 496 497 498 499
    KoCompositeOp::ParameterInfo params;
    params.opacity = opacity;
    params.flow = flow;

    if (averageOpacity >= 0.0) {
        params._lastOpacityData = averageOpacity;
        params.lastOpacity = &params._lastOpacityData;
    }

    params.channelFlags = QBitArray();
    typename Compositor::OptionalParams optionalParams(params);

500 501
    // The error count is needed as 38.5 gets rounded to 38 instead of 39 in the vc version.
    int errorcount = 0;
502
    for (int i = 0; i < numBlocks; i++) {
503
        Compositor::template compositeVector<true,true, VC_IMPL>(src1, dst1, msk1, params.opacity, optionalParams);
504 505
        for (int j = 0; j < vecSize; j++) {

506
            //if (8 * i + j == 7080) {
507 508 509
            //    dbgKrita << "src: " << src2[0] << src2[1] << src2[2] << src2[3];
            //    dbgKrita << "dst: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
            //    dbgKrita << "msk:" << msk2[0];
510 511 512
            //}

            Compositor::template compositeOnePixelScalar<true, VC_IMPL>(src2, dst2, msk2, params.opacity, optionalParams);
513

514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
            bool compareResult = true;
            if (pixelSize == 4) {
                compareResult = comparePixels<quint8>(dst1, dst2, 0);
                if (!compareResult) {
                    ++errorcount;
                    compareResult = comparePixels<quint8>(dst1, dst2, 1);
                    if (!compareResult) {
                        ++errorcount;
                    }
                }
            }
            else if (pixelSize == 16) {
                compareResult = comparePixels<float>(reinterpret_cast<float*>(dst1), reinterpret_cast<float*>(dst2), 0);
            }
            else {
                qFatal("Pixel size %i is not implemented", pixelSize);
            }

            if(!compareResult || errorcount > 1) {
533 534 535
                dbgKrita << "Wrong rounding in pixel:" << 8 * i + j;
                dbgKrita << "Vector version: " << dst1[0] << dst1[1] << dst1[2] << dst1[3];
                dbgKrita << "Scalar version: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
536

537 538
                dbgKrita << "src:" << src1[0] << src1[1] << src1[2] << src1[3];
                dbgKrita << "msk:" << msk1[0];
539 540 541 542

                QFAIL("Wrong rounding");
            }

543 544 545 546
            src1 += pixelSize;
            dst1 += pixelSize;
            src2 += pixelSize;
            dst2 += pixelSize;
547 548 549 550 551 552 553 554 555 556 557
            msk1++;
            msk2++;
        }
    }

    freeTiles(tiles, 0, 0);
}

#endif


558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586
void KisCompositionBenchmark::checkRoundingAlphaDarken_05_03()
{
#ifdef HAVE_VC
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,0.3);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_05()
{
#ifdef HAVE_VC
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,0.5);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_07()
{
#ifdef HAVE_VC
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,0.7);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_10()
{
#ifdef HAVE_VC
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,1.0);
#endif
}

void KisCompositionBenchmark::checkRoundingAlphaDarken_05_10_08()
587 588
{
#ifdef HAVE_VC
589
    checkRounding<AlphaDarkenCompositor32<quint8, quint32> >(0.5,1.0,0.8);
590 591 592 593 594 595
#endif
}

void KisCompositionBenchmark::checkRoundingOver()
{
#ifdef HAVE_VC
596
    checkRounding<OverCompositor32<quint8, quint32, false, true> >(0.5, 0.3);
597 598 599
#endif
}

600 601 602 603 604 605 606
void KisCompositionBenchmark::checkRoundingOverRgbaF32()
{
#ifdef HAVE_VC
    checkRounding<OverCompositor128<float, float, false, true> >(0.5, 0.3, -1, 16);
#endif
}

607 608 609 610 611 612
void KisCompositionBenchmark::compareAlphaDarkenOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoBgrU8Traits>(cs);

613
    QVERIFY(compareTwoOps(true, opAct, opExp));
614 615 616 617 618

    delete opExp;
    delete opAct;
}

619 620 621 622 623 624 625 626 627 628 629 630
void KisCompositionBenchmark::compareAlphaDarkenOpsNoMask()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoBgrU8Traits>(cs);

    QVERIFY(compareTwoOps(false, opAct, opExp));

    delete opExp;
    delete opAct;
}

631 632 633 634 635 636 637 638 639
void KisCompositionBenchmark::compareOverOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpOver<KoBgrU8Traits>(cs);

    QVERIFY(compareTwoOps(true, opAct, opExp));

    delete opExp;
640 641 642 643 644 645 646 647 648 649 650 651
    delete opAct;
}

void KisCompositionBenchmark::compareOverOpsNoMask()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp32(cs);
    KoCompositeOp *opExp = new KoCompositeOpOver<KoBgrU8Traits>(cs);

    QVERIFY(compareTwoOps(false, opAct, opExp));

    delete opExp;
652
    delete opAct;
653 654
}

655 656 657 658 659 660 661 662 663 664 665 666
void KisCompositionBenchmark::compareRgbF32OverOps()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
    KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createOverOp128(cs);
    KoCompositeOp *opExp = new KoCompositeOpOver<KoRgbF32Traits>(cs);

    QVERIFY(compareTwoOps(false, opAct, opExp));

    delete opExp;
    delete opAct;
}

667
void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenLegacy()
668 669 670
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = new KoCompositeOpAlphaDarken<KoBgrU8Traits>(cs);
671
    benchmarkCompositeOp(op, "Legacy");
672 673 674
    delete op;
}

675
void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenOptimized()
676 677 678
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createAlphaDarkenOp32(cs);
679
    benchmarkCompositeOp(op, "Optimized");
680 681 682
    delete op;
}

683
void KisCompositionBenchmark::testRgb8CompositeOverLegacy()
684 685 686
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = new KoCompositeOpOver<KoBgrU8Traits>(cs);
687
    benchmarkCompositeOp(op, "Legacy");
688 689 690
    delete op;
}

691
void KisCompositionBenchmark::testRgb8CompositeOverOptimized()
692 693 694
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createOverOp32(cs);
695
    benchmarkCompositeOp(op, "Optimized");
696 697 698
    delete op;
}

699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
void KisCompositionBenchmark::testRgbF32CompositeOverLegacy()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
    KoCompositeOp *op = new KoCompositeOpOver<KoRgbF32Traits>(cs);
    benchmarkCompositeOp(op, "RGBF32 Legacy");
    delete op;
}

void KisCompositionBenchmark::testRgbF32CompositeOverOptimized()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->colorSpace("RGBA", "F32", "");
    KoCompositeOp *op = KoOptimizedCompositeOpFactory::createOverOp128(cs);
    benchmarkCompositeOp(op, "RGBF32 Optimized");
    delete op;
}

715
void KisCompositionBenchmark::testRgb8CompositeAlphaDarkenReal_Aligned()
716 717
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
718 719
    const KoCompositeOp *op = cs->compositeOp(COMPOSITE_ALPHA_DARKEN);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
720 721
}

722
void KisCompositionBenchmark::testRgb8CompositeOverReal_Aligned()
723 724
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
725 726
    const KoCompositeOp *op = cs->compositeOp(COMPOSITE_OVER);
    benchmarkCompositeOp(op, true, 0.5, 0.3, 0, 0, ALPHA_RANDOM, ALPHA_RANDOM);
727 728
}

729 730 731 732 733 734 735
void KisCompositionBenchmark::testRgb8CompositeCopyLegacy()
{
    const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
    const KoCompositeOp *op = cs->compositeOp(COMPOSITE_COPY);
    benchmarkCompositeOp(op, "Copy");
}

736
void KisCompositionBenchmark::benchmarkMemcpy()
737
{
738
    QVector<Tile> tiles =
739
        generateTiles(numTiles, 0, 0, ALPHA_UNIT, ALPHA_UNIT, 4);
740

741
    QBENCHMARK_ONCE {
742
        Q_FOREACH (const Tile &tile, tiles) {
743 744 745
            memcpy(tile.dst, tile.src, 4 * numPixels);
        }
    }
746

747
    freeTiles(tiles, 0, 0);
748 749
}

750 751
#ifdef HAVE_VC
    const int vecSize = Vc::float_v::Size;
752 753 754 755
    const size_t uint8VecAlignment = qMax(vecSize * sizeof(quint8), sizeof(void*));
    const size_t uint32VecAlignment = qMax(vecSize * sizeof(quint32), sizeof(void*));
    const size_t floatVecAlignment = qMax(vecSize * sizeof(float), sizeof(void*));
#endif
756

757 758 759
void KisCompositionBenchmark::benchmarkUintFloat()
{
#ifdef HAVE_VC
760
    const int dataSize = 4096;
761
    void *ptr = NULL;
762
    int error = MEMALIGN_ALLOC(&ptr, uint8VecAlignment, dataSize);
763 764 765
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
766
    quint8 *iData = (quint8*)ptr;
767
    error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float));
768 769 770
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
771
    float *fData = (float*)ptr;
772 773 774 775 776 777 778 779 780 781

    QBENCHMARK {
        for (int i = 0; i < dataSize; i += Vc::float_v::Size) {
            // convert uint -> float directly, this causes
            // static_cast helper be called
            Vc::float_v b(Vc::uint_v(iData + i));
            b.store(fData + i);
        }
    }

782 783
    MEMALIGN_FREE(iData);
    MEMALIGN_FREE(fData);
784 785 786 787 788 789 790
#endif
}

void KisCompositionBenchmark::benchmarkUintIntFloat()
{
#ifdef HAVE_VC
    const int dataSize = 4096;
791
    void *ptr = NULL;
792
    int error = MEMALIGN_ALLOC(&ptr, uint8VecAlignment, dataSize);
793 794 795
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
796
    quint8 *iData = (quint8*)ptr;
797
    error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float));
798 799 800
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
801
    float *fData = (float*)ptr;
802 803 804 805 806 807 808 809 810 811

    QBENCHMARK {
        for (int i = 0; i < dataSize; i += Vc::float_v::Size) {
            // convert uint->int->float, that avoids special sign
            // treating, and gives 2.6 times speedup
            Vc::float_v b(Vc::int_v(Vc::uint_v(iData + i)));
            b.store(fData + i);
        }
    }

812 813
    MEMALIGN_FREE(iData);
    MEMALIGN_FREE(fData);
814 815 816 817 818 819 820
#endif
}

void KisCompositionBenchmark::benchmarkFloatUint()
{
#ifdef HAVE_VC
    const int dataSize = 4096;
821
    void *ptr = NULL;
822
    int error = MEMALIGN_ALLOC(&ptr, uint32VecAlignment, dataSize * sizeof(quint32));
823 824 825
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
826
    quint32 *iData = (quint32*)ptr;
827
    error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float));
828 829 830
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
831
    float *fData = (float*)ptr;
832 833 834 835 836 837 838 839 840 841

    QBENCHMARK {
        for (int i = 0; i < dataSize; i += Vc::float_v::Size) {
            // conversion float -> uint
            Vc::uint_v b(Vc::float_v(fData + i));

            b.store(iData + i);
        }
    }

842 843
    MEMALIGN_FREE(iData);
    MEMALIGN_FREE(fData);
844 845 846 847 848 849 850
#endif
}

void KisCompositionBenchmark::benchmarkFloatIntUint()
{
#ifdef HAVE_VC
    const int dataSize = 4096;
851
    void *ptr = NULL;
852
    int error = MEMALIGN_ALLOC(&ptr, uint32VecAlignment, dataSize * sizeof(quint32));
853 854 855
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
856
    quint32 *iData = (quint32*)ptr;
857
    error = MEMALIGN_ALLOC(&ptr, floatVecAlignment, dataSize * sizeof(float));
858 859 860
    if (error) {
        qFatal("posix_memalign failed: %d", error);
    }
861
    float *fData = (float*)ptr;
862 863 864 865 866 867 868 869 870 871

    QBENCHMARK {
        for (int i = 0; i < dataSize; i += Vc::float_v::Size) {
            // conversion float -> int -> uint
            Vc::uint_v b(Vc::int_v(Vc::float_v(fData + i)));

            b.store(iData + i);
        }
    }

872 873
    MEMALIGN_FREE(iData);
    MEMALIGN_FREE(fData);
874 875 876
#endif
}

877
QTEST_MAIN(KisCompositionBenchmark)
878