Commit 753c45e2 authored by Simon Eugster's avatar Simon Eugster

Audio alignment works.

This is at the moment still a brute-force attack (correlation
currently requires O(n²) time) but /works/ and is reasonnably
fast for clips with reasonnable length (not too many minutes).

Later the correlation can be replaced with an FFT version which
would only require O(n log n) time.

Example usage of the executable:
$ ./audioOffset mainVideo.avi someSound.wav

This will e.g. tell you that someSound.wav needs to be shifted
by 42 frames in order to be aligned. The MLT profile can be
specified with --profile=<mlt profile>.
parent 9e475c57
......@@ -7,7 +7,7 @@ include_directories(
)
include(${QT_USE_FILE})
add_executable(audioOffset audioOffset.cpp audioInfo.cpp audioStreamInfo.cpp audioEnvelope.cpp)
add_executable(audioOffset audioOffset.cpp audioInfo.cpp audioStreamInfo.cpp audioEnvelope.cpp audioCorrelation.cpp audioCorrelationInfo.cpp)
target_link_libraries(audioOffset
${QT_LIBRARIES}
${LIBMLT_LIBRARY}
......
#include "audioCorrelation.h"
#include <QTime>
#include <cmath>
#include <iostream>
AudioCorrelation::AudioCorrelation(AudioEnvelope *mainTrackEnvelope) :
m_mainTrackEnvelope(mainTrackEnvelope)
{
m_mainTrackEnvelope->normalizeEnvelope();
}
AudioCorrelation::~AudioCorrelation()
{
}
int AudioCorrelation::addChild(AudioEnvelope *envelope)
{
envelope->normalizeEnvelope();
const int sizeMain = m_mainTrackEnvelope->envelopeSize();
const int sizeSub = envelope->envelopeSize();
AudioCorrelationInfo *info = new AudioCorrelationInfo(sizeMain, sizeSub);
int64_t *correlation = info->correlationVector();
const int64_t *envMain = m_mainTrackEnvelope->envelope();
const int64_t *envSub = envelope->envelope();
int64_t const* left;
int64_t const* right;
int size;
int64_t sum;
int64_t max = 0;
/*
Correlation:
SHIFT \in [-sS..sM]
<--sS----
[ sub ]----sM--->[ sub ]
[ main ]
^ correlation vector index = SHIFT + sS
main is fixed, sub is shifted along main.
*/
QTime t;
t.start();
for (int shift = -sizeSub; shift <= sizeMain; shift++) {
if (shift <= 0) {
left = envSub-shift;
right = envMain;
size = std::min(sizeSub+shift, sizeMain);
} else {
left = envSub;
right = envMain+shift;
size = std::min(sizeSub, sizeMain-shift);
}
sum = 0;
for (int i = 0; i < size; i++) {
sum += (*left) * (*right);
left++;
right++;
}
correlation[sizeSub+shift] = std::abs(sum);
if (sum > max) {
max = sum;
}
}
info->setMax(max);
std::cout << "Correlation calculated. Time taken: " << t.elapsed() << " ms." << std::endl;
m_children.append(envelope);
m_correlations.append(info);
Q_ASSERT(m_correlations.size() == m_children.size());
return m_children.indexOf(envelope);
}
int AudioCorrelation::getShift(int childIndex) const
{
Q_ASSERT(childIndex >= 0);
Q_ASSERT(childIndex < m_correlations.size());
int indexOffset = m_correlations.at(childIndex)->maxIndex();
indexOffset -= m_children.at(childIndex)->envelopeSize();
return indexOffset;
}
AudioCorrelationInfo const* AudioCorrelation::info(int childIndex) const
{
Q_ASSERT(childIndex >= 0);
Q_ASSERT(childIndex < m_correlations.size());
return m_correlations.at(childIndex);
}
#ifndef AUDIOCORRELATION_H
#define AUDIOCORRELATION_H
#include "audioCorrelationInfo.h"
#include "audioEnvelope.h"
#include <QList>
class AudioCorrelationInfo;
class AudioCorrelation
{
public:
AudioCorrelation(AudioEnvelope *mainTrackEnvelope);
~AudioCorrelation();
int addChild(AudioEnvelope *envelope);
// int childIndex(AudioEnvelope *envelope) const;
const AudioCorrelationInfo *info(int childIndex) const;
int getShift(int childIndex) const;
private:
AudioEnvelope *m_mainTrackEnvelope;
QList<AudioEnvelope*> m_children;
QList<AudioCorrelationInfo*> m_correlations;
};
#endif // AUDIOCORRELATION_H
#include "audioCorrelationInfo.h"
#include <iostream>
AudioCorrelationInfo::AudioCorrelationInfo(int mainSize, int subSize) :
m_mainSize(mainSize),
m_subSize(subSize),
m_max(-1)
{
m_correlationVector = new int64_t[m_mainSize+m_subSize+1];
}
AudioCorrelationInfo::~AudioCorrelationInfo()
{
delete m_correlationVector;
}
int AudioCorrelationInfo::size() const
{
return m_mainSize+m_subSize+1;
}
void AudioCorrelationInfo::setMax(int64_t max)
{
m_max = max;
}
int64_t AudioCorrelationInfo::max() const
{
Q_ASSERT(m_max > 0);
if (m_max <= 0) {
int width = size();
int64_t max = 0;
for (int i = 0; i < width; i++) {
if (m_correlationVector[i] > max) {
max = m_correlationVector[i];
}
}
Q_ASSERT(max > 0);
return max;
}
return m_max;
}
int AudioCorrelationInfo::maxIndex() const
{
int64_t max = 0;
int index = 0;
int width = size();
for (int i = 0; i < width; i++) {
if (m_correlationVector[i] > max) {
max = m_correlationVector[i];
index = i;
}
}
return index;
}
int64_t* AudioCorrelationInfo::correlationVector()
{
return m_correlationVector;
}
QImage AudioCorrelationInfo::toImage(int height) const
{
int width = size();
int64_t maxVal = max();
QImage img(width, height, QImage::Format_ARGB32);
img.fill(qRgb(255,255,255));
int val;
for (int x = 0; x < width; x++) {
val = m_correlationVector[x]/double(maxVal)*img.height();
for (int y = img.height()-1; y > img.height() - val - 1; y--) {
img.setPixel(x, y, qRgb(50, 50, 50));
}
}
return img;
}
#ifndef AUDIOCORRELATIONINFO_H
#define AUDIOCORRELATIONINFO_H
#include <QImage>
class AudioCorrelationInfo
{
public:
AudioCorrelationInfo(int mainSize, int subSize);
~AudioCorrelationInfo();
int size() const;
int64_t* correlationVector();
int64_t const* correlationVector() const;
int64_t max() const;
void setMax(int64_t max); ///< Can be set to avoid calculating the max again in this function
int maxIndex() const;
QImage toImage(int height = 400) const;
private:
int m_mainSize;
int m_subSize;
int64_t *m_correlationVector;
int64_t m_max;
};
#endif // AUDIOCORRELATIONINFO_H
......@@ -13,8 +13,11 @@
#include <QFile>
#include <QTime>
#include <QImage>
#include <QDebug>
#include <QFileInfo>
#include <QDateTime>
#include <QStringList>
#include <QCoreApplication>
#include <mlt++/Mlt.h>
#include <iostream>
#include <cstdlib>
......@@ -23,11 +26,62 @@
#include "audioInfo.h"
#include "audioStreamInfo.h"
#include "audioEnvelope.h"
#include "audioCorrelation.h"
void printUsage(const char *path)
{
std::cout << "Usage: " << path << " <main audio file> <second audio file>" << std::endl
<< "\t-h, --help\tDisplay this help" << std::endl
<< "\t--profile=<profile>\tUse the given profile for calculation (run: melt -query profiles)" << std::endl
<< "\t--no-images\tDo not save envelope and correlation images" << std::endl
;
}
int main(int argc, char *argv[])
{
char *fileMain;
char *fileSub;
QCoreApplication app(argc, argv);
QStringList args = app.arguments();
args.removeAt(0);
std::string profile = "atsc_1080p_24";
bool saveImages = true;
// Load arguments
foreach (QString str, args) {
if (str.startsWith("--profile=")) {
QString s = str;
s.remove(0, QString("--profile=").length());
profile = s.toStdString();
args.removeOne(str);
} else if (str == "-h" || str == "--help") {
printUsage(argv[0]);
return 0;
} else if (str == "--no-images") {
saveImages = false;
args.removeOne(str);
}
}
if (args.length() < 2) {
printUsage(argv[0]);
return 1;
}
std::string fileMain(args.at(0).toStdString());
args.removeFirst();
std::string fileSub = args.at(0).toStdString();
args.removeFirst();
qDebug() << "Unused arguments: " << args;
if (argc > 2) {
fileMain = argv[1];
fileSub = argv[2];
......@@ -35,106 +89,79 @@ int main(int argc, char *argv[])
std::cout << "Usage: " << argv[0] << " <main audio file> <second audio file>" << std::endl;
return 0;
}
std::cout << "Trying to align (1)\n\t" << fileSub << "\nto fit on (2)\n\t" << fileMain
<< "\n, result will indicate by how much (1) has to be moved." << std::endl;
std::cout << "Trying to align (2)\n\t" << fileSub << "\nto fit on (1)\n\t" << fileMain
<< "\n, result will indicate by how much (2) has to be moved." << std::endl
<< "Profile used: " << profile << std::endl
;
// Initialize MLT
Mlt::Factory::init(NULL);
// Load an arbitrary profile
Mlt::Profile prof("hdv_1080_25p");
Mlt::Profile prof(profile.c_str());
// Load the MLT producers
Mlt::Producer prodMain(prof, fileMain);
Mlt::Producer prodMain(prof, fileMain.c_str());
if (!prodMain.is_valid()) {
std::cout << fileMain << " is invalid." << std::endl;
return 2;
}
Mlt::Producer prodSub(prof, fileSub);
Mlt::Producer prodSub(prof, fileSub.c_str());
if (!prodSub.is_valid()) {
std::cout << fileSub << " is invalid." << std::endl;
return 2;
}
// Build the audio envelopes for the correlation
AudioEnvelope envelopeMain(&prodMain);
envelopeMain.loadEnvelope();
envelopeMain.loadStdDev();
envelopeMain.dumpInfo();
envelopeMain.normalizeEnvelope();
envelopeMain.dumpInfo();
AudioEnvelope envelopeSub(&prodSub);
envelopeSub.loadEnvelope();
envelopeMain.normalizeEnvelope();
envelopeSub.loadStdDev();
envelopeSub.dumpInfo();
QString outImg = QString("envelope-%1.png")
.arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
envelopeMain.drawEnvelope().save(outImg);
std::cout << "Saved volume envelope as "
<< QFileInfo(outImg).absoluteFilePath().toStdString()
<< std::endl;
const int sizeX = envelopeMain.envelopeSize();
const int sizeY = envelopeSub.envelopeSize();
int64_t correlation[sizeX + sizeY + 1];
const int64_t *envX = envelopeMain.envelope();
const int64_t *envY = envelopeSub.envelope();
int64_t const* left;
int64_t const* right;
int size;
int64_t sum;
int64_t max = 0;
QTime t;
t.start();
for (int shift = -sizeX; shift <= sizeY; shift++) {
if (shift <= 0) {
left = envX-shift;
right = envY;
size = std::min(sizeX+shift, sizeY);
} else {
left = envX;
right = envY+shift;
size = std::min(sizeX, sizeY-shift);
}
sum = 0;
for (int i = 0; i < size; i++) {
sum += (*left) * (*right);
left++;
right++;
}
correlation[sizeX+shift] = std::abs(sum);
std::cout << sum << " ";
if (sum > max) {
max = sum;
}
}
std::cout << "Correlation calculated. Time taken: " << t.elapsed() << " ms." << std::endl;
int val;
QImage img(sizeX + sizeY + 1, 400, QImage::Format_ARGB32);
img.fill(qRgb(255,255,255));
for (int x = 0; x < sizeX+sizeY+1; x++) {
val = correlation[x]/double(max)*img.height();
for (int y = img.height()-1; y > img.height() - val - 1; y--) {
img.setPixel(x, y, qRgb(50, 50, 50));
}
}
outImg = QString("correlation-%1.png")
.arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
img.save(outImg);
std::cout << "Saved volume envelope as "
<< QFileInfo(outImg).absoluteFilePath().toStdString()
<< std::endl;
// Calculate the correlation and hereby the audio shift
AudioCorrelation corr(&envelopeMain);
int index = corr.addChild(&envelopeSub);
int shift = corr.getShift(index);
std::cout << fileSub << " should be shifted by " << shift << " frames" << std::endl
<< "\trelative to " << fileMain << std::endl
<< "\tin a " << prodMain.get_fps() << " fps profile (" << profile << ")." << std::endl
;
if (saveImages) {
QString outImg;
outImg = QString("envelope-main-%1.png")
.arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
envelopeMain.drawEnvelope().save(outImg);
std::cout << "Saved volume envelope as "
<< QFileInfo(outImg).absoluteFilePath().toStdString()
<< std::endl;
outImg = QString("envelope-sub-%1.png")
.arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
envelopeSub.drawEnvelope().save(outImg);
std::cout << "Saved volume envelope as "
<< QFileInfo(outImg).absoluteFilePath().toStdString()
<< std::endl;
outImg = QString("correlation-%1.png")
.arg(QDateTime::currentDateTime().toString("yyyy-MM-dd-hh:mm:ss"));
corr.info(index)->toImage().save(outImg);
std::cout << "Saved correlation image as "
<< QFileInfo(outImg).absoluteFilePath().toStdString()
<< std::endl;
}
return 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment