Commit 8dc92f7d authored by Daria Shcherbatyuk's avatar Daria Shcherbatyuk Committed by Dmitry Kazakov

Using Vc::reciprocal() and SSE instead of division in OverCompositor32

Summary:
Instead of the usual division, we can use the division of 1 by the
number (reciprocal) and multiplying it by the dividend in the original
fraction. Also, the use of Vc::reciprocal() and intrinsics using SSE in
total gives us an acceleration of up to 30%.

{F3862276}
{F3862275}.

Reviewers: dkazakov, #krita
Reviewed By: dkazakov, #krita
Subscribers: alvinhochun, woltherav
Tags: #krita

Differential Revision: https://phabricator.kde.org/D7314
parent bd864d90
......@@ -13,7 +13,7 @@ include_directories(SYSTEM
set(LINK_VC_LIB)
if(HAVE_VC)
include_directories(${Vc_INCLUDE_DIR})
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Vc_DEFINITIONS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${Vc_DEFINITIONS}")
set(LINK_VC_LIB ${Vc_LIBRARIES})
endif()
......@@ -38,7 +38,7 @@ set(kis_mask_generator_benchmark_SRCS kis_mask_generator_benchmark.cpp)
set(kis_low_memory_benchmark_SRCS kis_low_memory_benchmark.cpp)
set(kis_filter_selections_benchmark_SRCS kis_filter_selections_benchmark.cpp)
if (UNIX)
#set(kis_composition_benchmark_SRCS kis_composition_benchmark.cpp)
set(kis_composition_benchmark_SRCS kis_composition_benchmark.cpp)
endif()
set(kis_thumbnail_benchmark_SRCS kis_thumbnail_benchmark.cpp)
......@@ -59,7 +59,7 @@ krita_add_benchmark(KisMaskGeneratorBenchmark TESTNAME krita-benchmarks-KisMaskG
krita_add_benchmark(KisLowMemoryBenchmark TESTNAME krita-benchmarks-KisLowMemory ${kis_low_memory_benchmark_SRCS})
krita_add_benchmark(KisFilterSelectionsBenchmark TESTNAME krita-image-KisFilterSelectionsBenchmark ${kis_filter_selections_benchmark_SRCS})
if(UNIX)
#krita_add_benchmark(KisCompositionBenchmark TESTNAME krita-benchmarks-KisComposition ${kis_composition_benchmark_SRCS})
krita_add_benchmark(KisCompositionBenchmark TESTNAME krita-benchmarks-KisComposition ${kis_composition_benchmark_SRCS})
endif()
krita_add_benchmark(KisThumbnailBenchmark TESTNAME krita-benchmarks-KisThumbnail ${kis_thumbnail_benchmark_SRCS})
......@@ -80,10 +80,10 @@ target_link_libraries(KisLowMemoryBenchmark kritaimage Qt5::Test)
target_link_libraries(KisFilterSelectionsBenchmark kritaimage Qt5::Test)
if(UNIX)
#target_link_libraries(KisCompositionBenchmark kritaimage Qt5::Test ${LINK_VC_LIB})
#if(HAVE_VC)
# set_property(TARGET KisCompositionBenchmark APPEND PROPERTY COMPILE_OPTIONS "${Vc_ARCHITECTURE_FLAGS}")
#endif()
target_link_libraries(KisCompositionBenchmark kritaimage Qt5::Test ${LINK_VC_LIB})
if(HAVE_VC)
set_property(TARGET KisCompositionBenchmark APPEND PROPERTY COMPILE_OPTIONS "${Vc_ARCHITECTURE_FLAGS}")
endif()
endif()
target_link_libraries(KisMaskGeneratorBenchmark kritaimage Qt5::Test)
target_link_libraries(KisThumbnailBenchmark kritaimage Qt5::Test)
......
......@@ -26,6 +26,38 @@
#include "KoStreamedMath.h"
template<Vc::Implementation _impl>
struct OptiDiv {
static ALWAYS_INLINE float divScalar(const float& divident, const float& divisor) {
#ifdef __SSE__
float result;
__m128 x = _mm_set_ss(divisor);
__m128 y = _mm_set_ss(divident);
x = _mm_rcp_ss(x);
x = _mm_mul_ss(x, y);
_mm_store_ss(&result, x);
return result;
#else
return divident / divisor;
#endif
}
static ALWAYS_INLINE Vc::float_v divVector(Vc::float_v::AsArg divident, Vc::float_v::AsArg divisor) {
#ifdef __SSE__
return divident * Vc::reciprocal(divisor);
#else
return divident / divisor;
#endif
}
};
template<typename channels_type, typename pixel_type, bool alphaLocked, bool allChannelsFlag>
struct OverCompositor32 {
struct OptionalParams {
......@@ -97,7 +129,11 @@ struct OverCompositor32 {
* be converted to zeroes, which is exactly what we need
*/
new_alpha = dst_alpha + (uint8Max - dst_alpha) * src_alpha * uint8MaxRec1;
src_blend = src_alpha / new_alpha;
// Optimized version of:
// src_blend = src_alpha / new_alpha;
src_blend = OptiDiv<_impl>::divVector(src_alpha, new_alpha);
}
if (!(src_blend == oneValue).isFull()) {
......@@ -156,7 +192,10 @@ struct OverCompositor32 {
}
} else {
dstAlpha += (uint8Max - dstAlpha) * srcAlpha * uint8Rec1;
srcBlendNorm = srcAlpha / dstAlpha;
// Optimized version of:
// srcBlendNorm = srcAlpha / dstAlpha);
srcBlendNorm = OptiDiv<_impl>::divScalar(srcAlpha, dstAlpha);
}
if(allChannelsFlag) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment