Commit e8f75936 authored by Simon Eugster's avatar Simon Eugster

FFT correlation is now used for larger audio samples when aligning.

Additional changes:
* FFTTools class moved to lib/
* FFT correlation usage simplified
* audioOffset executables combined, FFT can be used with --fft
* Debug output defined away
* Comments added
parent 69774d0f
......@@ -2,7 +2,6 @@ set(kdenlive_SRCS
${kdenlive_SRCS}
audioscopes/abstractaudioscopewidget.cpp
audioscopes/audiospectrum.cpp
audioscopes/ffttools.cpp
audioscopes/spectrogram.cpp
PARENT_SCOPE
)
......@@ -11,7 +11,7 @@
#include "audiospectrum.h"
#include "ffttools.h"
#include "lib/audio/fftTools.h"
#include "lib/external/kiss_fft/tools/kiss_fftr.h"
#include <QMenu>
......
......@@ -30,7 +30,7 @@
#include "abstractaudioscopewidget.h"
#include "ui_audiospectrum_ui.h"
#include "lib/external/kiss_fft/tools/kiss_fftr.h"
#include "ffttools.h"
#include "lib/audio/fftTools.h"
class AudioSpectrum_UI;
class AudioSpectrum : public AbstractAudioScopeWidget {
......
......@@ -29,7 +29,7 @@
#include "abstractaudioscopewidget.h"
#include "ui_spectrogram_ui.h"
#include "ffttools.h"
#include "lib/audio/fftTools.h"
class Spectrogram_UI;
class Spectrogram : public AbstractAudioScopeWidget {
......
......@@ -52,9 +52,9 @@ void MoveClipCommand::undo()
void MoveClipCommand::redo()
{
if (m_doIt) {
qDebug() << "Executing move clip command. End now:" << m_endPos;
// qDebug() << "Executing move clip command. End now:" << m_endPos;
m_success = m_view->moveClip(m_startPos, m_endPos, m_refresh, &m_endPos);
qDebug() << "Move clip command executed. End now: " << m_endPos;
// qDebug() << "Move clip command executed. End now: " << m_endPos;
}
m_doIt = true;
}
......
......@@ -86,6 +86,8 @@
#include <QGraphicsDropShadowEffect>
#endif
//#define DEBUG
bool sortGuidesList(const Guide *g1 , const Guide *g2)
{
return (*g1).position() < (*g2).position();
......@@ -4303,10 +4305,12 @@ bool CustomTrackView::moveClip(const ItemInfo &start, const ItemInfo &end, bool
}
Mlt::Producer *prod = item->getProducer(end.track);
#ifdef DEBUG
qDebug() << "Moving item " << (long)item << " from .. to:";
qDebug() << item->info();
qDebug() << start;
qDebug() << end;
#endif
bool success = m_document->renderer()->mltMoveClip((int)(m_document->tracksCount() - start.track), (int)(m_document->tracksCount() - end.track),
(int) start.startPos.frames(m_document->fps()), (int)end.startPos.frames(m_document->fps()),
prod);
......@@ -4341,9 +4345,13 @@ bool CustomTrackView::moveClip(const ItemInfo &start, const ItemInfo &end, bool
if (refresh) m_document->renderer()->doRefresh();
if (out_actualEnd != NULL) {
*out_actualEnd = item->info();
#ifdef DEBUG
qDebug() << "Actual end position updated:" << *out_actualEnd;
#endif
}
#ifdef DEBUG
qDebug() << item->info();
#endif
return success;
}
......@@ -6024,7 +6032,10 @@ void CustomTrackView::alignAudio()
AudioEnvelope *envelope = new AudioEnvelope(clip->getProducer(clip->track()),
clip->info().cropStart.frames(m_document->fps()),
clip->info().cropDuration.frames(m_document->fps()));
int index = m_audioCorrelator->addChild(envelope);
// FFT only for larger vectors. We could use it all time, but for small vectors
// the (anyway not noticeable) overhead is smaller with a nested for loop correlation.
int index = m_audioCorrelator->addChild(envelope, envelope->envelopeSize() > 200);
int shift = m_audioCorrelator->getShift(index);
counter++;
......@@ -6032,12 +6043,14 @@ void CustomTrackView::alignAudio()
envelope->drawEnvelope().save("kdenlive-audio-align-envelope.png");
envelope->dumpInfo();
#ifdef DEBUG
int targetPos = m_audioAlignmentReference->startPos().frames(m_document->fps()) + shift;
qDebug() << "Reference starts at " << m_audioAlignmentReference->startPos().frames(m_document->fps());
qDebug() << "We will start at " << targetPos;
qDebug() << "to shift by " << shift;
qDebug() << "(eventually)";
qDebug() << "(maybe)";
#endif
QUndoCommand *moveCommand = new QUndoCommand();
......@@ -6051,11 +6064,12 @@ void CustomTrackView::alignAudio()
if ( end.startPos.seconds() < 0 ) {
// Clip would start before 0, so crop it first
qDebug() << "Need to crop clip. " << start;
GenTime cropBy = -end.startPos;
#ifdef DEBUG
qDebug() << "Need to crop clip. " << start;
qDebug() << "end.startPos: " << end.startPos.toString() << ", cropBy: " << cropBy.toString();
#endif
ItemInfo resized = start;
resized.startPos += cropBy;
......@@ -6066,8 +6080,10 @@ void CustomTrackView::alignAudio()
start = clip->info();
end.startPos += cropBy;
#ifdef DEBUG
qDebug() << "Clip cropped. " << start;
qDebug() << "Moving to: " << end;
#endif
}
moveCommand->setText(i18n("Auto-align clip"));
......
......@@ -7,5 +7,6 @@ set(kdenlive_SRCS
lib/audio/audioInfo.cpp
lib/audio/audioStreamInfo.cpp
lib/audio/fftCorrelation.cpp
lib/audio/fftTools.cpp
PARENT_SCOPE
)
/***************************************************************************
* Copyright (C) 2012 by Simon Andreas Eugster (simon.eu@gmail.com) *
* This file is part of kdenlive. See www.kdenlive.org. *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
***************************************************************************/
/*
Copyright (C) 2012 Simon A. Eugster (Granjow) <simon.eu@gmail.com>
This file is part of kdenlive. See www.kdenlive.org.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
*/
#include "audioCorrelation.h"
#include "fftCorrelation.h"
#include <QTime>
#include <cmath>
......@@ -29,7 +30,7 @@ AudioCorrelation::~AudioCorrelation()
std::cout << "Envelope deleted." << std::endl;
}
int AudioCorrelation::addChild(AudioEnvelope *envelope)
int AudioCorrelation::addChild(AudioEnvelope *envelope, bool useFFT)
{
envelope->normalizeEnvelope();
......@@ -42,6 +43,56 @@ int AudioCorrelation::addChild(AudioEnvelope *envelope)
const int64_t *envMain = m_mainTrackEnvelope->envelope();
const int64_t *envSub = envelope->envelope();
int64_t max = 0;
if (useFFT) {
FFTCorrelation::correlate(envMain, sizeMain,
envSub, sizeSub,
correlation);
} else {
correlate(envMain, sizeMain,
envSub, sizeSub,
correlation,
&max);
info->setMax(max);
}
m_children.append(envelope);
m_correlations.append(info);
Q_ASSERT(m_correlations.size() == m_children.size());
return m_children.indexOf(envelope);
}
int AudioCorrelation::getShift(int childIndex) const
{
Q_ASSERT(childIndex >= 0);
Q_ASSERT(childIndex < m_correlations.size());
int indexOffset = m_correlations.at(childIndex)->maxIndex();
indexOffset -= m_children.at(childIndex)->envelopeSize();
return indexOffset;
}
AudioCorrelationInfo const* AudioCorrelation::info(int childIndex) const
{
Q_ASSERT(childIndex >= 0);
Q_ASSERT(childIndex < m_correlations.size());
return m_correlations.at(childIndex);
}
void AudioCorrelation::correlate(const int64_t *envMain, int sizeMain,
const int64_t *envSub, int sizeSub,
int64_t *correlation,
int64_t *out_max)
{
Q_ASSERT(correlation != NULL);
int64_t const* left;
int64_t const* right;
int size;
......@@ -92,33 +143,9 @@ int AudioCorrelation::addChild(AudioEnvelope *envelope)
}
}
info->setMax(max);
std::cout << "Correlation calculated. Time taken: " << t.elapsed() << " ms." << std::endl;
m_children.append(envelope);
m_correlations.append(info);
Q_ASSERT(m_correlations.size() == m_children.size());
return m_children.indexOf(envelope);
}
int AudioCorrelation::getShift(int childIndex) const
{
Q_ASSERT(childIndex >= 0);
Q_ASSERT(childIndex < m_correlations.size());
int indexOffset = m_correlations.at(childIndex)->maxIndex();
indexOffset -= m_children.at(childIndex)->envelopeSize();
return indexOffset;
}
AudioCorrelationInfo const* AudioCorrelation::info(int childIndex) const
{
Q_ASSERT(childIndex >= 0);
Q_ASSERT(childIndex < m_correlations.size());
return m_correlations.at(childIndex);
if (out_max != NULL) {
*out_max = max;
}
}
......@@ -15,7 +15,6 @@
#include "audioEnvelope.h"
#include <QList>
class AudioCorrelationInfo;
/**
This class does the correlation between two tracks
......@@ -35,12 +34,19 @@ public:
This object will take ownership of the passed envelope.
\return The child's index
*/
int addChild(AudioEnvelope *envelope);
int addChild(AudioEnvelope *envelope, bool useFFT = false);
const AudioCorrelationInfo *info(int childIndex) const;
int getShift(int childIndex) const;
/**
Correlates the two vectors envMain and envSub.
\c correlation must be a pre-allocated vector of size sizeMain+sizeSub+1.
*/
static void correlate(const int64_t *envMain, int sizeMain,
const int64_t *envSub, int sizeSub,
int64_t *correlation,
int64_t *out_max = NULL);
private:
AudioEnvelope *m_mainTrackEnvelope;
......
......@@ -36,7 +36,6 @@ void AudioCorrelationInfo::setMax(int64_t max)
int64_t AudioCorrelationInfo::max() const
{
Q_ASSERT(m_max > 0);
if (m_max <= 0) {
int width = size();
int64_t max = 0;
......
......@@ -28,9 +28,16 @@ public:
int64_t* correlationVector();
int64_t const* correlationVector() const;
/**
Returns the maximum value in the correlation vector.
If it has not been set before with setMax(), it will be calculated.
*/
int64_t max() const;
void setMax(int64_t max); ///< Can be set to avoid calculating the max again in this function
/**
Returns the index of the largest value in the correlation vector
*/
int maxIndex() const;
QImage toImage(int height = 400) const;
......
......@@ -21,7 +21,22 @@ extern "C"
void FFTCorrelation::correlate(const int64_t *left, const int leftSize,
const int64_t *right, const int rightSize,
float **out_correlated, int &out_size)
int64_t *out_correlated)
{
float correlatedFloat[leftSize+rightSize+1];
correlate(left, leftSize, right, rightSize, correlatedFloat);
// The correlation vector will have entries up to N (number of entries
// of the vector), so converting to integers will not lose that much
// of precision.
for (int i = 0; i < leftSize+rightSize+1; i++) {
out_correlated[i] = correlatedFloat[i];
}
}
void FFTCorrelation::correlate(const int64_t *left, const int leftSize,
const int64_t *right, const int rightSize,
float *out_correlated)
{
QTime t;
t.start();
......@@ -50,21 +65,21 @@ void FFTCorrelation::correlate(const int64_t *left, const int leftSize,
// One side needs to be reverted, since multiplication in frequency domain (fourier space)
// calculates the convolution: \sum l[x]r[N-x] and not the correlation: \sum l[x]r[x]
for (int i = 0; i < leftSize; i++) {
leftF[leftSize-1 - i] = double(left[i])/maxLeft;
leftF[i] = double(left[i])/maxLeft;
}
for (int i = 0; i < rightSize; i++) {
rightF[i] = double(right[i])/maxRight;
rightF[rightSize-1 - i] = double(right[i])/maxRight;
}
// Now we can convolve to get the correlation
convolute(leftF, leftSize, rightF, rightSize, out_correlated, out_size);
convolute(leftF, leftSize, rightF, rightSize, out_correlated);
std::cout << "Correlation (FFT based) computed in " << t.elapsed() << " ms." << std::endl;
}
void FFTCorrelation::convolute(const float *left, const int leftSize,
const float *right, const int rightSize,
float **out_convolved, int &out_size)
float *out_convolved)
{
QTime time;
time.start();
......@@ -95,7 +110,7 @@ void FFTCorrelation::convolute(const float *left, const int leftSize,
// Fill in the data into our new vectors with padding
float leftData[size];
float rightData[size];
*out_convolved = new float[size];
float convolved[size];
std::fill(leftData, leftData+size, 0);
std::fill(rightData, rightData+size, 0);
......@@ -113,9 +128,14 @@ void FFTCorrelation::convolute(const float *left, const int leftSize,
correlatedFFT[i].i = leftFFT[i].r*rightFFT[i].i + leftFFT[i].i*rightFFT[i].r;
}
// Inverse fourier tranformation to get the convolved data
kiss_fftri(ifftConfig, correlatedFFT, *out_convolved);
out_size = size;
// Inverse fourier tranformation to get the convolved data.
// Insert one element at the beginning to obtain the same result
// that we also get with the nested for loop correlation.
*out_convolved = 0;
int out_size = leftSize+rightSize+1;
kiss_fftri(ifftConfig, correlatedFFT, convolved);
std::copy(convolved, convolved+out_size-1, out_convolved+1);
// Finally some cleanup.
kiss_fftr_free(fftConfig);
......
......@@ -13,16 +13,37 @@ the Free Software Foundation, either version 3 of the License, or
#include <inttypes.h>
/**
This class provides methods to calculate convolution
and correlation of two vectors by means of FFT, which
is O(n log n) (convolution in spacial domain would be
O(n²)).
*/
class FFTCorrelation
{
public:
/**
Computes the convolution between \c left and \c right.
\c out_correlated must be a pre-allocated vector of size
\c leftSize + \c rightSize.
*/
static void convolute(const float *left, const int leftSize,
const float *right, const int rightSize,
float **out_convolved, int &out_size);
float *out_convolved);
/**
Computes the correlation between \c left and \c right.
\c out_correlated must be a pre-allocated vector of size
\c leftSize + \c rightSize.
*/
static void correlate(const int64_t *left, const int leftSize,
const int64_t *right, const int rightSize,
float *out_correlated);
static void correlate(const int64_t *left, const int leftSize,
const int64_t *right, const int rightSize,
float **out_correlated, int &out_size);
int64_t *out_correlated);
};
#endif // FFTCORRELATION_H
......@@ -13,7 +13,7 @@
#include <QString>
#include "ffttools.h"
#include "fftTools.h"
// Uncomment for debugging, like writing a GNU Octave .m file to /tmp
//#define DEBUG_FFTTOOLS
......
......@@ -17,21 +17,9 @@ add_executable(audioOffset
../src/lib/audio/audioEnvelope.cpp
../src/lib/audio/audioCorrelation.cpp
../src/lib/audio/audioCorrelationInfo.cpp
)
target_link_libraries(audioOffset
${QT_LIBRARIES}
${LIBMLT_LIBRARY}
${LIBMLTPLUS_LIBRARY}
)
add_executable(audioOffsetFFT
audioOffsetFFT.cpp
../src/lib/audio/audioInfo.cpp
../src/lib/audio/audioStreamInfo.cpp
../src/lib/audio/audioEnvelope.cpp
../src/lib/audio/fftCorrelation.cpp
)
target_link_libraries(audioOffsetFFT
target_link_libraries(audioOffset
${QT_LIBRARIES}
${LIBMLT_LIBRARY}
${LIBMLTPLUS_LIBRARY}
......
/***************************************************************************
* Copyright (C) 2012 by Simon Andreas Eugster (simon.eu@gmail.com) *
* This file is part of kdenlive. See www.kdenlive.org. *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
***************************************************************************/
/*
Copyright (C) 2012 Simon A. Eugster (Granjow) <simon.eu@gmail.com>
This file is part of kdenlive. See www.kdenlive.org.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
*/
#include <QMap>
#include <QFile>
......@@ -34,6 +33,9 @@ void printUsage(const char *path)
<< "how much B needs to be shifted in order to be synchronized with A." << std::endl << std::endl
<< path << " <main audio file> <second audio file>" << std::endl
<< "\t-h, --help\n\t\tDisplay this help" << std::endl
<< "\t--fft\n\t\tUse Fourier Transform (FFT) to calculate the offset. This only takes" << std::endl
<< "\t\tO(n log n) time compared to O(n²) when using normal correlation and should be " << std::endl
<< "\t\tfaster for large data (several minutes)." << std::endl
<< "\t--profile=<profile>\n\t\tUse the given profile for calculation (run: melt -query profiles)" << std::endl
<< "\t--no-images\n\t\tDo not save envelope and correlation images" << std::endl
;
......@@ -47,6 +49,7 @@ int main(int argc, char *argv[])
std::string profile = "atsc_1080p_24";
bool saveImages = true;
bool useFFT = false;
// Load arguments
foreach (QString str, args) {
......@@ -64,6 +67,10 @@ int main(int argc, char *argv[])
} else if (str == "--no-images") {
saveImages = false;
args.removeOne(str);
} else if (str == "--fft") {
useFFT = true;
args.removeOne(str);
}
}
......@@ -95,6 +102,9 @@ int main(int argc, char *argv[])
<< "\n, result will indicate by how much (2) has to be moved." << std::endl
<< "Profile used: " << profile << std::endl
;
if (useFFT) {
std::cout << "Will use FFT based correlation." << std::endl;
}
// Initialize MLT
......@@ -134,10 +144,10 @@ int main(int argc, char *argv[])
// Calculate the correlation and hereby the audio shift
AudioCorrelation corr(envelopeMain);
int index = corr.addChild(envelopeSub);
int index = corr.addChild(envelopeSub, useFFT);
int shift = corr.getShift(index);
std::cout << fileSub << " should be shifted by " << shift << " frames" << std::endl
std::cout << " Should be shifted by " << shift << " frames: " << fileSub << std::endl
<< "\trelative to " << fileMain << std::endl
<< "\tin a " << prodMain.get_fps() << " fps profile (" << profile << ")." << std::endl
;
......
/*
Copyright (C) 2012 Simon A. Eugster (Granjow) <simon.eu@gmail.com>
This file is part of kdenlive. See www.kdenlive.org.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
*/
#include "../src/lib/audio/audioEnvelope.h"
#include "../src/lib/audio/fftCorrelation.h"
#include <QCoreApplication>
#include <QStringList>
#include <QTime>
#include <QImage>
#include <QDebug>
#include <iostream>
#include <cmath>
void printUsage(const char *path)
{
std::cout << "This executable takes two audio/video files A and B and determines " << std::endl
<< "how much B needs to be shifted in order to be synchronized with A." << std::endl
<< "Other than audioOffset this executable will use Fast Fourier Tranform " << std::endl
<< "which should be faster especially for large files." << std::endl << std::endl
<< path << " <main audio file> <second audio file>" << std::endl
<< "\t-h, --help\n\t\tDisplay this help" << std::endl
<< "\t--profile=<profile>\n\t\tUse the given profile for calculation (run: melt -query profiles)" << std::endl
<< "\t--no-images\n\t\tDo not save envelope and correlation images" << std::endl
;
}
int main(int argc, char *argv[])
{
QCoreApplication app(argc, argv);
QStringList args = app.arguments();
args.removeAt(0);
std::string profile = "atsc_1080p_24";
bool saveImages = true;
// Load arguments
foreach (QString str, args) {
if (str.startsWith("--profile=")) {
QString s = str;
s.remove(0, QString("--profile=").length());
profile = s.toStdString();
args.removeOne(str);
} else if (str == "-h" || str == "--help") {
printUsage(argv[0]);
return 0;
} else if (str == "--no-images") {
saveImages = false;
args.removeOne(str);
}
}
if (args.length() < 2) {
printUsage(argv[0]);
return 1;
}
std::string fileMain(args.at(0).toStdString());
args.removeFirst();
std::string fileSub = args.at(0).toStdString();
args.removeFirst();
qDebug() << "Unused arguments: " << args;
if (argc > 2) {
fileMain = argv[1];
fileSub = argv[2];
} else {
std::cout << "Usage: " << argv[0] << " <main audio file> <second audio file>" << std::endl;
return 0;
}
std::cout << "Trying to align (2)\n\t" << fileSub << "\nto fit on (1)\n\t" << fileMain
<< "\n, result will indicate by how much (2) has to be moved." << std::endl
<< "Profile used: " << profile << std::endl
;
// Initialize MLT
Mlt::Factory::init(NULL);
// Load an arbitrary profile
Mlt::Profile prof(profile.c_str());
// Load the MLT producers
Mlt::Producer prodMain(prof, fileMain.c_str());
if (!prodMain.is_valid()) {
std::cout << fileMain << " is invalid." << std::endl;
return 2;
}
Mlt::Producer prodSub(prof, fileSub.c_str());
if (!prodSub.is_valid()) {
std::cout << fileSub << " is invalid." << std::endl;
return 2;
}
// Build the audio envelopes for the correlation
AudioEnvelope *envelopeMain = new AudioEnvelope(&prodMain);
envelopeMain->loadEnvelope();
envelopeMain->loadStdDev();
envelopeMain->dumpInfo();
AudioEnvelope *envelopeSub = new AudioEnvelope(&prodSub);
envelopeSub->loadEnvelope();
envelopeSub->loadStdDev();
envelopeSub->dumpInfo();
float *correlated;
int corrSize = 0;
FFTCorrelation::correlate(envelopeMain->envelope(), envelopeMain->envelopeSize(),
envelopeSub->envelope(), envelopeSub->envelopeSize(),
&correlated, corrSize);
int maxIndex = 0;
float max = 0;
for (int i = 0; i < corrSize; i++) {
if (correlated[i] > max) {
max = correlated[i];
maxIndex = i;
}
}
int shift = envelopeMain->envelopeSize() - maxIndex-1;
qDebug() << "Max correlation value is " << max << " at " << maxIndex;
qDebug() << "Will have to move by " << shift << " frames";
std::cout << fileSub << " should be shifted by " << shift << " frames" << std::endl
<< "\trelative to " << fileMain << std::endl
<< "\tin a " << prodMain.get_fps() << " fps profile (" << profile << ")." << std::endl
;
if (saveImages) {
QString filename = QString("correlation-fft-%1.png")
.arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
QImage img(corrSize/2, 400, QImage::Format_ARGB32);
img.fill(qRgb(255,255,255));
for (int x = 0; x < img.width(); x++) {
float val = fabs(correlated[x]/max);
for (int y = 0; y < img.height()*val; y++) {
img.setPixel(x, img.height()-1-y, qRgb(50,50,50));
}
}
img.save(filename);
qDebug() << "Saved image to " << filename;
}
delete correlated;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment