Commit 7669cce6 authored by Sven Langkamp's avatar Sven Langkamp

initial implementation of brush mask computation with vc

parent 193bf20e
......@@ -12,6 +12,8 @@ if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj")
endif(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
include(CheckFunctionExists)
if (NOT WIN32)
......@@ -42,6 +44,7 @@ macro_log_feature(OIIO_FOUND "OIIO" "The OpenImageIO Lbirary" "https://sites.goo
macro_bool_to_01(OIIO_FOUND HAVE_OIIO)
configure_file(config-oiio.h.cmake ${CMAKE_CURRENT_BINARY_DIR}/config-oiio.h )
find_package(Vc REQUIRED)
if(UNIX)
set(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES};m")
......
......@@ -47,4 +47,4 @@ target_link_libraries(KisStrokeBenchmark ${KDE4_KDEUI_LIBS} kritaimage ${QT_QTTE
target_link_libraries(KisFastMathBenchmark ${KDE4_KDEUI_LIBS} kritaimage ${QT_QTTEST_LIBRARY})
target_link_libraries(KisFloodfillBenchmark ${KDE4_KDEUI_LIBS} kritaimage ${QT_QTTEST_LIBRARY})
target_link_libraries(KisGradientBenchmark ${KDE4_KDEUI_LIBS} kritaimage ${QT_QTTEST_LIBRARY})
target_link_libraries(KisMaskGeneratorBenchmark ${KDE4_KDEUI_LIBS} kritaimage ${QT_QTTEST_LIBRARY})
target_link_libraries(KisMaskGeneratorBenchmark ${KDE4_KDEUI_LIBS} kritaimage ${QT_QTTEST_LIBRARY} ${Vc_LIBRARIES})
......@@ -18,6 +18,8 @@
#include <qtest_kde.h>
#include <Vc/Vc>
#include <Vc/IO>
#include "kis_mask_generator_benchmark.h"
#include "kis_circle_mask_generator.h"
......@@ -37,6 +39,21 @@ void KisMaskGeneratorBenchmark::benchmarkCircle()
}
}
void KisMaskGeneratorBenchmark::benchmarkSIMD()
{
int width = 1000;
float *buffer = Vc::malloc<float, Vc::AlignOnVector>(width);
KisCircleMaskGenerator gen(1000, 0.5, 0.5, 0.5, 2);
QBENCHMARK{
for(int y = 0; y < 1000; ++y)
{
gen.processRowFast(buffer, width, y, 0.0f, 1.0f, 500.0f, 500.0f, 0.5f, 0.5f);
}
}
Vc::free(buffer);
}
void KisMaskGeneratorBenchmark::benchmarkSquare()
{
KisRectangleMaskGenerator gen(1000, 0.5, 0.5, 0.5, 3);
......
......@@ -26,6 +26,7 @@ class KisMaskGeneratorBenchmark : public QObject
Q_OBJECT
private slots:
void benchmarkCircle();
void benchmarkSIMD();
void benchmarkSquare();
};
......
......@@ -232,7 +232,7 @@ set(kritaimage_LIB_SRCS
kde4_add_library(kritaimage SHARED ${kritaimage_LIB_SRCS})
target_link_libraries(kritaimage ${KDE4_KPARTS_LIBS} komain pigmentcms ${KDE4_THREADWEAVER_LIBRARIES} )
target_link_libraries(kritaimage ${KDE4_KPARTS_LIBS} komain pigmentcms ${KDE4_THREADWEAVER_LIBRARIES} ${Vc_LIBRARIES})
target_link_libraries(kritaimage LINK_INTERFACE_LIBRARIES pigmentcms komain ${KDE4_KPARTS_LIBS} )
if(OPENEXR_FOUND)
......
......@@ -68,6 +68,9 @@ public:
*/
virtual quint8 valueAt(qreal x, qreal y) const = 0;
virtual void processRowFast(float* buffer, int width, float y, float cosa, float sina,
float centerX, float centerY, float invScaleX, float invScaleY) {}
virtual bool shouldSupersample() const;
virtual void toXML(QDomDocument& , QDomElement&) const;
......
......@@ -18,6 +18,8 @@
*/
#include <cmath>
#include <Vc/Vc>
#include <Vc/IO>
#include <QDomDocument>
......@@ -25,6 +27,10 @@
#include "kis_circle_mask_generator.h"
#include "kis_base_mask_generator.h"
inline Vc::float_v normeSIMD(Vc::float_v a, Vc::float_v b) {
return a*a + b*b;
}
struct KisCircleMaskGenerator::Private {
double xcoef, ycoef;
double xfadecoef, yfadecoef;
......@@ -104,6 +110,66 @@ quint8 KisCircleMaskGenerator::valueAt(qreal x, qreal y) const
}
}
void KisCircleMaskGenerator::processRowFast(float* buffer, int width, float y, float cosa, float sina,
float centerX, float centerY, float invScaleX, float invScaleY)
{
float y_ = (y - centerY) * invScaleY;
float sinay_ = sina * y_;
float cosay_ = cosa * y_;
float *initValues = Vc::malloc<float, Vc::AlignOnVector>(Vc::float_v::Size);
for(int i = 0; i < Vc::float_v::Size; i++) {
initValues[i] = (float)i;
}
float* bufferPointer = buffer;
Vc::float_v currentIndices(initValues);
Vc::float_v increment((float)Vc::float_v::Size);
Vc::float_v vCenterX(centerX);
Vc::float_v vInvScaleX(invScaleX);
Vc::float_v vCosa(cosa);
Vc::float_v vSina(sina);
Vc::float_v vCosaY_(cosay_);
Vc::float_v vSinaY_(sinay_);
Vc::float_v vXCoeff(d->xcoef);
Vc::float_v vYCoeff(d->ycoef);
Vc::float_v vTransformedFadeX(d->transformedFadeX);
Vc::float_v vTransformedFadeY(d->transformedFadeY);
Vc::float_v vOne(1.0f);
Vc::float_v v255(255.0f);
for (int i=0; i < width; i+= Vc::float_v::Size){
Vc::float_v x_ = (currentIndices - vCenterX) * vInvScaleX;
Vc::float_v xr = x_ * vCosa - vSinaY_;
Vc::float_v yr = x_ * vSina + vCosaY_;
Vc::float_v n = normeSIMD(xr * vXCoeff, yr * vYCoeff);
Vc::float_v vNormFade = normeSIMD(xr * vTransformedFadeX, yr * vTransformedFadeY);
//255 * n * (normeFade - 1) / (normeFade - n)
Vc::float_v vFade = v255 * n * (vNormFade - vOne) / (vNormFade - n);
// Mask out the inner circe of the mask
Vc::float_m mask = vNormFade < vOne;
vFade.setZero(mask);
vFade = Vc::min(vFade, v255);
vFade.store(bufferPointer);
currentIndices = currentIndices + increment;
bufferPointer += Vc::float_v::Size;
}
}
void KisCircleMaskGenerator::toXML(QDomDocument& d, QDomElement& e) const
{
KisMaskGenerator::toXML(d, e);
......
......@@ -37,6 +37,10 @@ public:
virtual ~KisCircleMaskGenerator();
virtual quint8 valueAt(qreal x, qreal y) const;
virtual void processRowFast(float* buffer, int width, float y, float cosa, float sina,
float centerX, float centerY, float invScaleX, float invScaleY);
virtual bool shouldSupersample() const;
virtual void toXML(QDomDocument& , QDomElement&) const;
......
......@@ -26,7 +26,7 @@ set(kritalibbrush_LIB_SRCS
kde4_add_library(kritalibbrush SHARED ${kritalibbrush_LIB_SRCS} )
target_link_libraries(kritalibbrush kritaui )
target_link_libraries(kritalibbrush kritaui ${Vc_LIBRARIES})
target_link_libraries(kritalibbrush LINK_INTERFACE_LIBRARIES kritaui)
set_target_properties(kritalibbrush PROPERTIES
......
......@@ -44,6 +44,9 @@ inline double drand48() {
#include "kis_mask_generator.h"
#include "kis_boundary.h"
#include <Vc/Vc>
#include <Vc/IO>
// 3x3 supersampling
#define SUPERSAMPLING 3
......@@ -133,7 +136,94 @@ struct MaskProcessor
KisMaskGenerator* m_shape;
};
struct SIMDMaskProcessor
{
SIMDMaskProcessor(KisFixedPaintDeviceSP device, const KoColorSpace* cs, qreal randomness, qreal density,
double centerX, double centerY, double invScaleX, double invScaleY, double angle,
KisMaskGenerator* shape)
: m_device(device)
, m_cs(cs)
, m_randomness(randomness)
, m_density(density)
, m_pixelSize(cs->pixelSize())
, m_centerX(centerX)
, m_centerY(centerY)
, m_invScaleX(invScaleX)
, m_invScaleY(invScaleY)
, m_shape(shape)
{
m_cosa = cos(angle);
m_sina = sin(angle);
}
void operator()(QRect& rect)
{
process(rect);
}
void process(QRect& rect){
qreal random = 1.0;
quint8* dabPointer = m_device->data() + rect.y() * rect.width() * m_pixelSize;
quint8 alphaValue = OPACITY_TRANSPARENT_U8;
// this offset is needed when brush size is smaller then fixed device size
int offset = (m_device->bounds().width() - rect.width()) * m_pixelSize;
int width = rect.width();
float *buffer = Vc::malloc<float, Vc::AlignOnVector>(width);
for (int y = rect.y(); y < rect.y() + rect.height(); y++) {
m_shape->processRowFast(buffer, width, y, m_cosa, m_sina, m_centerX, m_centerY, m_invScaleX, m_invScaleY);
if (m_randomness != 0.0 || m_density != 1.0) {
for (int x = 0; x < width; x++) {
if (m_randomness!= 0.0){
random = (1.0 - m_randomness) + m_randomness * float(rand()) / RAND_MAX;
}
alphaValue = quint8( (OPACITY_OPAQUE_U8 - buffer[x]) * random);
// avoid computation of random numbers if density is full
if (m_density != 1.0){
// compute density only for visible pixels of the mask
if (alphaValue != OPACITY_TRANSPARENT_U8){
if ( !(m_density >= drand48()) ){
alphaValue = OPACITY_TRANSPARENT_U8;
}
}
}
m_cs->applyAlphaU8Mask(dabPointer, &alphaValue, 1);
dabPointer += m_pixelSize;
}
} else {
for (int x = 0; x < width; x++) {
alphaValue = quint8( (OPACITY_OPAQUE_U8 - buffer[x]));
m_cs->applyAlphaU8Mask(dabPointer, &alphaValue, 1);
dabPointer += m_pixelSize;
}
}//endfor x
dabPointer += offset;
}//endfor y
Vc::free(buffer);
}
KisFixedPaintDeviceSP m_device;
const KoColorSpace* m_cs;
qreal m_randomness;
qreal m_density;
quint32 m_pixelSize;
double m_centerX;
double m_centerY;
double m_invScaleX;
double m_invScaleY;
double m_cosa;
double m_sina;
KisMaskGenerator* m_shape;
};
struct KisAutoBrush::Private {
KisMaskGenerator* shape;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment