Commit dce76ace authored by Jean-Baptiste Mardelle's avatar Jean-Baptiste Mardelle
Browse files

Implement unfinished auto subtitles mode (selected track / clip only)

fixes #1459
parent 89f2e438
Pipeline #192163 passed with stage
in 9 minutes and 32 seconds
......@@ -25,9 +25,11 @@
#include <memory>
#include <utility>
SpeechDialog::SpeechDialog(std::shared_ptr<TimelineItemModel> timeline, QPoint zone, bool, bool, QWidget *parent)
SpeechDialog::SpeechDialog(std::shared_ptr<TimelineItemModel> timeline, QPoint zone, int tid, bool, bool, QWidget *parent)
: QDialog(parent)
, m_timeline(timeline)
, m_zone(zone)
, m_tid(-1)
{
setFont(QFontDatabase::systemFont(QFontDatabase::SmallestReadableFont));
......@@ -54,9 +56,70 @@ SpeechDialog::SpeechDialog(std::shared_ptr<TimelineItemModel> timeline, QPoint z
}
}
});
QButtonGroup *buttonGroup = new QButtonGroup(this);
buttonGroup->addButton(timeline_zone);
buttonGroup->addButton(timeline_track);
buttonGroup->addButton(timeline_clips);
connect(buttonGroup, QOverload<QAbstractButton *>::of(&QButtonGroup::buttonClicked), [=, selectedTrack = tid, sourceZone = zone](QAbstractButton *button) {
speech_info->animatedHide();
buttonBox->button(QDialogButtonBox::Apply)->setEnabled(true);
if (button == timeline_clips) {
std::unordered_set<int> selection = timeline->getCurrentSelection();
int cid = -1;
m_tid = -1;
int firstPos = -1;
for (const auto &s : selection) {
// Find first clip
if (!timeline->isClip(s)) {
continue;
}
int pos = timeline->getClipPosition(s);
if (firstPos == -1 || pos < firstPos) {
cid = s;
firstPos = pos;
m_tid = timeline->getClipTrackId(cid);
if (!timeline->isAudioTrack(m_tid)) {
m_tid = timeline->getMirrorAudioTrackId(m_tid);
}
}
}
if (m_tid == -1) {
speech_info->setMessageType(KMessageWidget::Information);
speech_info->setText(i18n("No audio track available for selected clip"));
speech_info->animatedShow();
buttonBox->button(QDialogButtonBox::Apply)->setEnabled(false);
return;
}
if (timeline->isClip(cid)) {
m_zone.setX(timeline->getClipPosition(cid));
m_zone.setY(m_zone.x() + timeline->getClipPlaytime(cid));
} else {
speech_info->setMessageType(KMessageWidget::Information);
speech_info->setText(i18n("Select a clip in timeline to perform analysis"));
speech_info->animatedShow();
buttonBox->button(QDialogButtonBox::Apply)->setEnabled(false);
}
} else {
if (button == timeline_track) {
m_tid = selectedTrack;
if (!timeline->isAudioTrack(m_tid)) {
m_tid = timeline->getMirrorAudioTrackId(m_tid);
}
if (m_tid == -1) {
speech_info->setMessageType(KMessageWidget::Information);
speech_info->setText(i18n("No audio track found"));
speech_info->animatedShow();
buttonBox->button(QDialogButtonBox::Apply)->setEnabled(false);
}
} else {
m_tid = -1;
}
m_zone = sourceZone;
}
});
connect(language_box, static_cast<void (QComboBox::*)(int)>(&QComboBox::activated), this,
[this]() { KdenliveSettings::setVosk_srt_model(language_box->currentText()); });
connect(buttonBox->button(QDialogButtonBox::Apply), &QPushButton::clicked, this, [this, zone]() { slotProcessSpeech(zone); });
connect(buttonBox->button(QDialogButtonBox::Apply), &QPushButton::clicked, this, [this]() { slotProcessSpeech(); });
m_stt->parseVoskDictionaries();
frame_progress->setVisible(false);
button_abort->setIcon(QIcon::fromTheme(QStringLiteral("process-stop")));
......@@ -72,7 +135,7 @@ SpeechDialog::~SpeechDialog()
QObject::disconnect(m_modelsConnection);
}
void SpeechDialog::slotProcessSpeech(QPoint zone)
void SpeechDialog::slotProcessSpeech()
{
m_stt->checkDependencies();
if (!m_stt->checkSetup() || !m_stt->missingDependencies().isEmpty()) {
......@@ -106,8 +169,34 @@ void SpeechDialog::slotProcessSpeech(QPoint zone)
}
m_tmpAudio->close();
m_timeline->sceneList(QDir::temp().absolutePath(), sceneList);
// TODO: do the rendering in another thread to not block the UI
Mlt::Producer producer(*m_timeline->tractor()->profile(), "xml", sceneList.toUtf8().constData());
qDebug() << "=== STARTING RENDER B";
Mlt::Service s(producer);
for (int i = 0; i < 10; i++) {
s = s.producer();
qDebug() << ":::: GOT SERVIE TYPE: " << i << " = " << s.type();
if (s.type() == mlt_service_multitrack_type) {
break;
}
}
Mlt::Multitrack tractor(s);
int trackPos = -1;
if (m_tid > -1) {
trackPos = m_timeline->getTrackMltIndex(m_tid);
}
int tid = 0;
for (int i = 0; i < tractor.count(); i++) {
std::shared_ptr<Mlt::Producer> tk(tractor.track(i));
if (tk->get_int("hide") == 1) {
// Video track, hide it
tk->set("hide", 3);
} else if (tid == 0 || (trackPos > -1 && trackPos != tid)) {
// We only want a specific audio track
tk->set("hide", 3);
}
tid++;
}
Mlt::Consumer xmlConsumer(*m_timeline->tractor()->profile(), "avformat", audio.toUtf8().constData());
if (!xmlConsumer.is_valid() || !producer.is_valid()) {
qDebug() << "=== STARTING CONSUMER ERROR";
......@@ -125,10 +214,11 @@ void SpeechDialog::slotProcessSpeech(QPoint zone)
qApp->processEvents();
xmlConsumer.set("terminate_on_pause", 1);
xmlConsumer.set("properties", "WAV");
producer.set_in_and_out(zone.x(), zone.y());
producer.set_in_and_out(m_zone.x(), m_zone.y());
xmlConsumer.connect(producer);
qDebug() << "=== STARTING RENDER C, IN:" << zone.x() << " - " << zone.y();
m_duration = zone.y() - zone.x();
qDebug() << "=== STARTING RENDER C, IN:" << m_zone.x() << " - " << m_zone.y();
m_duration = m_zone.y() - m_zone.x();
qApp->processEvents();
xmlConsumer.run();
qApp->processEvents();
......@@ -142,20 +232,19 @@ void SpeechDialog::slotProcessSpeech(QPoint zone)
m_speechJob = std::make_unique<QProcess>(this);
connect(m_speechJob.get(), &QProcess::readyReadStandardOutput, this, &SpeechDialog::slotProcessProgress);
connect(m_speechJob.get(), static_cast<void (QProcess::*)(int, QProcess::ExitStatus)>(&QProcess::finished), this,
[this, speech, zone](int, QProcess::ExitStatus status) { slotProcessSpeechStatus(status, speech, zone); });
[this, speech](int, QProcess::ExitStatus status) { slotProcessSpeechStatus(status, speech); });
m_speechJob->start(m_stt->pythonExec(), {m_stt->subtitleScript(), modelDirectory, language, audio, speech});
}
void SpeechDialog::slotProcessSpeechStatus(QProcess::ExitStatus status, const QString &srtFile, const QPoint zone)
void SpeechDialog::slotProcessSpeechStatus(QProcess::ExitStatus status, const QString &srtFile)
{
qDebug() << "/// TERMINATING SPEECH JOB\n\n+++++++++++++++++++++++++++";
if (status == QProcess::CrashExit) {
speech_info->setMessageType(KMessageWidget::Warning);
speech_info->setText(i18n("Speech recognition aborted."));
speech_info->animatedShow();
} else {
if (QFile::exists(srtFile)) {
m_timeline->getSubtitleModel()->importSubtitle(srtFile, zone.x(), true);
m_timeline->getSubtitleModel()->importSubtitle(srtFile, m_zone.x(), true);
speech_info->setMessageType(KMessageWidget::Positive);
speech_info->setText(i18n("Subtitles imported"));
} else {
......
......@@ -27,12 +27,15 @@ class SpeechDialog : public QDialog, public Ui::SpeechDialog_UI
Q_OBJECT
public:
explicit SpeechDialog(std::shared_ptr<TimelineItemModel> timeline, QPoint zone, bool activeTrackOnly = false, bool selectionOnly = false, QWidget *parent = nullptr);
explicit SpeechDialog(std::shared_ptr<TimelineItemModel> timeline, QPoint zone, int tid, bool activeTrackOnly = false, bool selectionOnly = false,
QWidget *parent = nullptr);
~SpeechDialog() override;
private:
std::unique_ptr<QProcess> m_speechJob;
const std::shared_ptr<TimelineItemModel> m_timeline;
QPoint m_zone;
int m_tid;
int m_duration;
std::unique_ptr<QTemporaryFile> m_tmpAudio;
std::unique_ptr<QTemporaryFile> m_tmpSrt;
......@@ -41,7 +44,7 @@ private:
SpeechToText *m_stt;
private slots:
void slotProcessSpeech(QPoint zone);
void slotProcessSpeechStatus(QProcess::ExitStatus status, const QString &srtFile, const QPoint zone);
void slotProcessSpeech();
void slotProcessSpeechStatus(QProcess::ExitStatus status, const QString &srtFile);
void slotProcessProgress();
};
......@@ -1025,6 +1025,7 @@ bool ProjectManager::updateTimeline(int pos, const QString &chunks, const QStrin
qDebug() << "// Project failed to load!!";
}
// Free memory used by original playlist
xmlProd->clear();
xmlProd.reset(nullptr);
const QString groupsData = m_project->getDocumentProperty(QStringLiteral("groups"));
// update track compositing
......
......@@ -4926,7 +4926,7 @@ void TimelineController::exportSubtitle()
void TimelineController::subtitleSpeechRecognition()
{
SpeechDialog d(m_model, m_zone, false, false, qApp->activeWindow());
SpeechDialog d(m_model, m_zone, m_activeTrack, false, false, qApp->activeWindow());
d.exec();
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment