Commit 7e8cb259 authored by Dmitry Kazakov's avatar Dmitry Kazakov

The Vc implementation of the composite ops in ready for testing

All the known bugs are fixed.
parent 8e76b3b6
......@@ -29,6 +29,10 @@
#include <KoCompositeOpOver.h>
#include "KoOptimizedCompositeOpFactory.h"
#include <KoOptimizedCompositeOpOver32.h>
#include <KoOptimizedCompositeOpAlphaDarken32.h>
// for calculation of the needed alignment
#include "config-vc.h"
#ifdef HAVE_VC
......@@ -67,10 +71,10 @@ void generateDataLine(uint seed, int numPixels, quint8 *srcPixels, quint8 *dstPi
for (int i = 0; i < numPixels; i++) {
for (int j = 0; j < 4; j++) {
*(srcPixels++) = 1 + qrand() % 254;
*(dstPixels++) = 1 + qrand() % 254;
*(srcPixels++) = 0 + qrand() % 255;
*(dstPixels++) = 0 + qrand() % 255;
}
*(mask++) = 1 + qrand() % 254;
*(mask++) = 0 + qrand() % 255;
}
}
......@@ -143,6 +147,15 @@ inline bool fuzzyCompare(quint8 a, quint8 b, quint8 prec) {
return qAbs(a - b) <= prec;
}
inline bool comparePixels(quint8 *p1, quint8*p2, quint8 prec) {
return (p1[3] == p2[3] && p1[3] == 0) ||
(fuzzyCompare(p1[0], p2[0], prec) &&
fuzzyCompare(p1[1], p2[1], prec) &&
fuzzyCompare(p1[2], p2[2], prec) &&
fuzzyCompare(p1[3], p2[3], prec));
}
bool compareTwoOps(bool haveMask, const KoCompositeOp *op1, const KoCompositeOp *op2)
{
QVector<Tile> tiles = generateTiles(2, 16, 16);
......@@ -170,10 +183,7 @@ bool compareTwoOps(bool haveMask, const KoCompositeOp *op1, const KoCompositeOp
quint8 *dst1 = tiles[0].dst;
quint8 *dst2 = tiles[1].dst;
for (int i = 0; i < numPixels; i++) {
if (!fuzzyCompare(dst1[0], dst2[0], 2) ||
!fuzzyCompare(dst1[1], dst2[1], 2) ||
!fuzzyCompare(dst1[2], dst2[2], 2) ||
!fuzzyCompare(dst1[3], dst2[3], 2)) {
if (!comparePixels(dst1, dst2, 7)) {
qDebug() << "Wrong result:" << i;
qDebug() << "Act: " << dst1[0] << dst1[1] << dst1[2] << dst1[3];
......@@ -237,13 +247,80 @@ void benchmarkCompositeOp(const KoCompositeOp *op,
freeTiles(tiles, srcAlignmentShift, dstAlignmentShift);
}
#ifdef HAVE_VC
template<class Compositor>
void checkRounding()
{
QVector<Tile> tiles =
generateTiles(2, 0, 0);
const int vecSize = Vc::float_v::Size;
const int numBlocks = numPixels / vecSize;
quint8 *src1 = tiles[0].src;
quint8 *dst1 = tiles[0].dst;
quint8 *msk1 = tiles[0].mask;
quint8 *src2 = tiles[1].src;
quint8 *dst2 = tiles[1].dst;
quint8 *msk2 = tiles[1].mask;
for (int i = 0; i < numBlocks; i++) {
Compositor::template compositeVector<true,true>(src1, dst1, msk1, 0.5, 0.3);
for (int j = 0; j < vecSize; j++) {
Compositor::template compositeOnePixelFloat<true>(src2, dst2, msk2, 0.5, 0.3, QBitArray());
if(!comparePixels(dst1, dst2, 0)) {
qDebug() << "Wrong rounding in pixel:" << 8 * i + j;
qDebug() << "Vector version: " << dst1[0] << dst1[1] << dst1[2] << dst1[3];
qDebug() << "Scalar version: " << dst2[0] << dst2[1] << dst2[2] << dst2[3];
qDebug() << "src:" << src1[0] << src1[1] << src1[2] << src1[3];
qDebug() << "msk:" << msk1[0];
QFAIL("Wrong rounding");
}
src1 += 4;
dst1 += 4;
src2 += 4;
dst2 += 4;
msk1++;
msk2++;
}
}
freeTiles(tiles, 0, 0);
}
#endif
void KisCompositionBenchmark::checkRoundingAlphaDarken()
{
#ifdef HAVE_VC
checkRounding<AlphaDarkenCompositor32<quint8, quint32> >();
#endif
}
void KisCompositionBenchmark::checkRoundingOver()
{
#ifdef HAVE_VC
checkRounding<OverCompositor32<quint8, quint32, false, true> >();
#endif
}
void KisCompositionBenchmark::compareAlphaDarkenOps()
{
const KoColorSpace *cs = KoColorSpaceRegistry::instance()->rgb8();
KoCompositeOp *opAct = KoOptimizedCompositeOpFactory::createAlphaDarkenOp32(cs);
KoCompositeOp *opExp = new KoCompositeOpAlphaDarken<KoBgrU8Traits>(cs);
QVERIFY(compareTwoOps(false, opAct, opExp));
QVERIFY(compareTwoOps(true, opAct, opExp));
delete opExp;
delete opAct;
......@@ -370,5 +447,5 @@ void KisCompositionBenchmark::testRgb8CompositeOverReal_Aligned()
benchmarkCompositeOp(op, true, 0, 0);
}
QTEST_KDEMAIN(KisCompositionBenchmark, GUI)
......@@ -25,6 +25,9 @@ class KisCompositionBenchmark : public QObject
{
Q_OBJECT
private slots:
void checkRoundingAlphaDarken();
void checkRoundingOver();
void compareAlphaDarkenOps();
void compareOverOps();
......
......@@ -71,7 +71,6 @@ struct AlphaDarkenCompositor32 {
dst_alpha = KoStreamedMath::fetch_alpha_32<true>(dst);
src_alpha = msk_norm_alpha * opacity_vec;
Vc::float_v src_c1;
Vc::float_v src_c2;
Vc::float_v src_c3;
......@@ -125,6 +124,51 @@ struct AlphaDarkenCompositor32 {
#endif /* HAVE_VC */
/**
* Composes one pixel of the source into the destination
*/
template <bool haveMask>
static ALWAYS_INLINE void compositeOnePixelFloat(const channels_type *src, channels_type *dst, const quint8 *mask, float opacity, float flow, const QBitArray &channelFlags)
{
Q_UNUSED(channelFlags);
using namespace Arithmetic;
const qint32 alpha_pos = 3;
const float uint8Rec1 = 1.0 / 255.0;
const float uint8Rec2 = 1.0 / (255.0 * 255.0);
const float uint8Max = 255.0;
float dstAlphaNorm = dst[alpha_pos] * uint8Rec1;
float srcAlphaNorm;
float mskAlphaNorm;
opacity *= flow;
if (haveMask) {
mskAlphaNorm = float(*mask) * uint8Rec2 * src[alpha_pos];
srcAlphaNorm = mskAlphaNorm * opacity;
} else {
mskAlphaNorm = src[alpha_pos] * uint8Rec1;
srcAlphaNorm *= opacity;
}
if (dstAlphaNorm != 0.0) {
dst[0] = KoStreamedMath::lerp_mixed_u8_float(dst[0], src[0], srcAlphaNorm);
dst[1] = KoStreamedMath::lerp_mixed_u8_float(dst[1], src[1], srcAlphaNorm);
dst[2] = KoStreamedMath::lerp_mixed_u8_float(dst[2], src[2], srcAlphaNorm);
}
else {
const pixel_type *s = reinterpret_cast<const pixel_type*>(src);
pixel_type *d = reinterpret_cast<pixel_type*>(dst);
*d = *s;
}
float alpha1 = unionShapeOpacity(srcAlphaNorm, dstAlphaNorm); // alpha with 0% flow
float alpha2 = (opacity > dstAlphaNorm) ? lerp(dstAlphaNorm, opacity, mskAlphaNorm) : dstAlphaNorm; // alpha with 100% flow
dst[alpha_pos] = quint8(lerp(alpha1, alpha2, flow) * uint8Max);
}
/**
* Composes one pixel of the source into the destination
*/
......@@ -140,6 +184,7 @@ struct AlphaDarkenCompositor32 {
channels_type dstAlpha = dst[alpha_pos];
channels_type mskAlpha = haveMask ? mul(scale<channels_type>(*mask), srcAlpha) : srcAlpha;
opacity = mul(opacity, flow);
srcAlpha = mul(mskAlpha, opacity);
if(dstAlpha != zeroValue<channels_type>()) {
......
......@@ -36,6 +36,8 @@ struct OverCompositor32 {
template<bool haveMask, bool src_aligned>
static ALWAYS_INLINE void compositeVector(const quint8 *src, quint8 *dst, const quint8 *mask, float opacity, float flow)
{
Q_UNUSED(flow);
Vc::float_v src_alpha;
Vc::float_v dst_alpha;
......@@ -68,19 +70,97 @@ struct OverCompositor32 {
KoStreamedMath::fetch_colors_32<src_aligned>(src, src_c1, src_c2, src_c3);
Vc::float_v src_blend = src_alpha / new_alpha;
KoStreamedMath::fetch_colors_32<true>(dst, dst_c1, dst_c2, dst_c3);
Vc::float_m empty_pixels_mask = new_alpha == Vc::float_v(Vc::Zero);
if (!empty_pixels_mask.isFull()) {
/**
* If new alpha is zero, then dst_alpha is already zero,
* and writing to these pixels will not change anything,
* so let's write there if it does any change to the image
*/
/**
* Division by zero here generates NaNs that
* results in zeros in all the channels, which
* is quite what we need.
*/
Vc::float_v src_blend = src_alpha / new_alpha;
KoStreamedMath::fetch_colors_32<true>(dst, dst_c1, dst_c2, dst_c3);
dst_c1 = src_blend * (src_c1 - dst_c1) + dst_c1;
dst_c2 = src_blend * (src_c2 - dst_c2) + dst_c2;
dst_c3 = src_blend * (src_c3 - dst_c3) + dst_c3;
dst_c1 = src_blend * (src_c1 - dst_c1) + dst_c1;
dst_c2 = src_blend * (src_c2 - dst_c2) + dst_c2;
dst_c3 = src_blend * (src_c3 - dst_c3) + dst_c3;
KoStreamedMath::write_channels_32(dst, new_alpha, dst_c1, dst_c2, dst_c3);
KoStreamedMath::write_channels_32(dst, new_alpha, dst_c1, dst_c2, dst_c3);
}
}
#endif /* HAVE_VC */
template <bool haveMask>
static ALWAYS_INLINE void compositeOnePixelFloat(const channels_type *src, channels_type *dst, const quint8 *mask, float opacity, float flow, const QBitArray &channelFlags)
{
using namespace Arithmetic;
const qint32 alpha_pos = 3;
const float uint8Rec1 = 1.0 / 255.0;
const float uint8Max = 255.0;
float srcAlpha = src[alpha_pos];
srcAlpha *= opacity;
if (haveMask) {
srcAlpha *= float(*mask) * uint8Rec1;
}
if (srcAlpha != 0.0) {
float dstAlpha = dst[alpha_pos];
float srcBlendNorm;
if (dstAlpha == uint8Max) {
srcBlendNorm = srcAlpha * uint8Rec1;
} else {
dstAlpha += (uint8Max - dstAlpha) * srcAlpha * uint8Rec1;
if (dstAlpha != 0.0) {
srcBlendNorm = srcAlpha / dstAlpha;
} else {
srcBlendNorm = 0.0;
}
}
if(allChannelsFlag) {
if (srcBlendNorm == 1.0) {
const pixel_type *s = reinterpret_cast<const pixel_type*>(src);
pixel_type *d = reinterpret_cast<pixel_type*>(dst);
*d = *s;
} else if (srcBlendNorm != 0.0){
dst[0] = KoStreamedMath::lerp_mixed_u8_float(dst[0], src[0], srcBlendNorm);
dst[1] = KoStreamedMath::lerp_mixed_u8_float(dst[1], src[1], srcBlendNorm);
dst[2] = KoStreamedMath::lerp_mixed_u8_float(dst[2], src[2], srcBlendNorm);
}
} else {
if (srcBlendNorm == 1.0) {
if(channelFlags.at(0)) dst[0] = src[0];
if(channelFlags.at(1)) dst[1] = src[1];
if(channelFlags.at(2)) dst[2] = src[2];
} else if (srcBlendNorm != 0.0) {
if(channelFlags.at(0)) dst[0] = KoStreamedMath::lerp_mixed_u8_float(dst[0], src[0], srcBlendNorm);
if(channelFlags.at(1)) dst[1] = KoStreamedMath::lerp_mixed_u8_float(dst[1], src[1], srcBlendNorm);
if(channelFlags.at(2)) dst[2] = KoStreamedMath::lerp_mixed_u8_float(dst[2], src[2], srcBlendNorm);
}
}
if (!alphaLocked) {
dst[alpha_pos] = quint8(dstAlpha);
}
}
}
template <bool haveMask>
static ALWAYS_INLINE void compositeOnePixel(const channels_type *src, channels_type *dst, const quint8 *mask, channels_type opacity, channels_type flow, const QBitArray &channelFlags)
{
......@@ -108,29 +188,30 @@ struct OverCompositor32 {
if (dstAlpha != zeroValue<channels_type>()) {
srcBlend = div<channels_type>(srcAlpha, dstAlpha);
} else {
srcBlend = srcAlpha;
srcBlend = zeroValue<channels_type>();
}
}
if(allChannelsFlag) {
if (srcBlend != zeroValue<channels_type>()) {
dst[0] = lerp(dst[0], src[0], srcBlend);
dst[1] = lerp(dst[1], src[1], srcBlend);
dst[2] = lerp(dst[2], src[2], srcBlend);
} else {
if (srcBlend == unitValue<channels_type>()) {
const pixel_type *s = reinterpret_cast<const pixel_type*>(src);
pixel_type *d = reinterpret_cast<pixel_type*>(dst);
*d = *s;
} else if (srcBlend != zeroValue<channels_type>()) {
dst[0] = lerp(dst[0], src[0], srcBlend);
dst[1] = lerp(dst[1], src[1], srcBlend);
dst[2] = lerp(dst[2], src[2], srcBlend);
}
} else {
if (srcBlend != zeroValue<channels_type>()) {
if(channelFlags.at(0)) dst[0] = lerp(dst[0], src[0], srcBlend);
if(channelFlags.at(1)) dst[1] = lerp(dst[1], src[1], srcBlend);
if(channelFlags.at(2)) dst[2] = lerp(dst[2], src[2], srcBlend);
} else {
if (srcBlend == unitValue<channels_type>()) {
if(channelFlags.at(0)) dst[0] = src[0];
if(channelFlags.at(1)) dst[1] = src[1];
if(channelFlags.at(2)) dst[2] = src[2];
} else if (srcBlend != zeroValue<channels_type>()) {
if(channelFlags.at(0)) dst[0] = lerp(dst[0], src[0], srcBlend);
if(channelFlags.at(1)) dst[1] = lerp(dst[1], src[1], srcBlend);
if(channelFlags.at(2)) dst[2] = lerp(dst[2], src[2], srcBlend);
}
}
......
......@@ -264,7 +264,7 @@ template<bool useMask, bool useFlow, class Compositor>
}
for(int i = 0; i < blockAlign; i++) {
Compositor::template compositeOnePixel<useMask>(src, dst, mask, opacity, flow, params.channelFlags);
Compositor::template compositeOnePixelFloat<useMask>(src, dst, mask, params.opacity, params.flow, params.channelFlags);
src += srcLinearInc;
dst += linearInc;
......@@ -295,7 +295,7 @@ template<bool useMask, bool useFlow, class Compositor>
for(int i = 0; i < blockRest; i++) {
Compositor::template compositeOnePixel<useMask>(src, dst, mask, opacity, flow, params.channelFlags);
Compositor::template compositeOnePixelFloat<useMask>(src, dst, mask, params.opacity, params.flow, params.channelFlags);
src += srcLinearInc;
dst += linearInc;
......@@ -317,6 +317,10 @@ template<bool useMask, bool useFlow, class Compositor>
}
}
static inline quint8 lerp_mixed_u8_float(quint8 a, quint8 b, float alpha) {
return quint8(qint16(b - a) * alpha + a);
}
#else /* if ! defined HAVE_VC */
template<bool useMask, bool useFlow, class Compositor>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment