Commit 7dd84cfa authored by Jean-Baptiste Mardelle's avatar Jean-Baptiste Mardelle
Browse files

Various fixes for speech recognition subtitles (show progress, don't terminate...

Various fixes for speech recognition subtitles (show progress, don't terminate before process finished)
parent d596e55a
Pipeline #50389 canceled with stage
in 5 minutes and 57 seconds
......@@ -33,8 +33,11 @@ WORDS_PER_LINE = 7
def transcribe():
results = []
subs = []
progress = 0
while True:
data = process.stdout.read(4000)
print("progress:" + str(progress), file = sys.stdout, flush=True)
progress += 1
if len(data) == 0:
break
if rec.AcceptWaveform(data):
......@@ -56,7 +59,7 @@ def transcribe():
return subs
subtitle = srt.compose(transcribe())
print (subtitle)
#print (subtitle)
with open(sys.argv[4], 'w',encoding='utf8') as f:
f.writelines(subtitle)
f.close()
......@@ -37,7 +37,7 @@ def transcribe():
break
if rec.AcceptWaveform(data):
sentence = rec.Result()
print (sentence)
print (sentence, file = sys.stdout, flush=True)
transcribe()
#with open(sys.argv[3], 'w') as f:
......
......@@ -38,6 +38,7 @@
SpeechDialog::SpeechDialog(const std::shared_ptr<TimelineItemModel> &timeline, QPoint zone, bool activeTrackOnly, bool selectionOnly, QWidget *parent)
: QDialog(parent)
, m_timeline(timeline)
{
setFont(QFontDatabase::systemFont(QFontDatabase::SmallestReadableFont));
......@@ -74,10 +75,17 @@ SpeechDialog::SpeechDialog(const std::shared_ptr<TimelineItemModel> &timeline, Q
connect(language_box, static_cast<void (QComboBox::*)(int)>(&QComboBox::activated), [this]() {
KdenliveSettings::setVosk_srt_model(language_box->currentText());
});
connect(buttonBox->button(QDialogButtonBox::Apply), &QPushButton::clicked, [this, timeline, zone]() {
slotProcessSpeech(timeline, zone);
connect(buttonBox->button(QDialogButtonBox::Apply), &QPushButton::clicked, [this, zone]() {
slotProcessSpeech(zone);
});
parseVoskDictionaries();
frame_progress->setVisible(false);
button_abort->setIcon(QIcon::fromTheme(QStringLiteral("process-stop")));
connect(button_abort, &QToolButton::clicked, [this]() {
if (m_speechJob && m_speechJob->state() == QProcess::Running) {
m_speechJob->kill();
}
});
}
SpeechDialog::~SpeechDialog()
......@@ -93,7 +101,7 @@ void SpeechDialog::updateAvailability()
vosk_config->setVisible(!enabled);
}
void SpeechDialog::slotProcessSpeech(const std::shared_ptr<TimelineItemModel> &timeline, QPoint zone)
void SpeechDialog::slotProcessSpeech(QPoint zone)
{
QString pyExec = QStandardPaths::findExecutable(QStringLiteral("python3"));
if (pyExec.isEmpty()) {
......@@ -110,25 +118,32 @@ void SpeechDialog::slotProcessSpeech(const std::shared_ptr<TimelineItemModel> &t
QString speech;
QString audio;
QTemporaryFile tmpPlaylist(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.mlt")));
QTemporaryFile tmpSpeech(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.srt")));
QTemporaryFile tmpAudio(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.wav")));
m_tmpSrt.reset(new QTemporaryFile(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.srt"))));
m_tmpAudio.reset(new QTemporaryFile(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.wav"))));
if (tmpPlaylist.open()) {
sceneList = tmpPlaylist.fileName();
}
tmpPlaylist.close();
if (tmpSpeech.open()) {
speech = tmpSpeech.fileName();
if (m_tmpSrt->open()) {
speech = m_tmpSrt->fileName();
}
tmpSpeech.close();
if (tmpAudio.open()) {
audio = tmpAudio.fileName();
m_tmpSrt->close();
if (m_tmpAudio->open()) {
audio = m_tmpAudio->fileName();
}
tmpAudio.close();
m_tmpAudio->close();
pCore->getMonitor(Kdenlive::ProjectMonitor)->sceneList(QDir::temp().absolutePath(), sceneList);
Mlt::Producer producer(*timeline->tractor()->profile(), "xml", sceneList.toUtf8().constData());
Mlt::Producer producer(*m_timeline->tractor()->profile(), "xml", sceneList.toUtf8().constData());
qDebug()<<"=== STARTING RENDER B";
Mlt::Consumer xmlConsumer(*timeline->tractor()->profile(), "avformat", audio.toUtf8().constData());
qApp->processEvents();
Mlt::Consumer xmlConsumer(*m_timeline->tractor()->profile(), "avformat", audio.toUtf8().constData());
QString speechScript = QStandardPaths::locate(QStandardPaths::AppDataLocation, QStringLiteral("scripts/speech.py"));
if (speechScript.isEmpty()) {
speech_info->setMessageType(KMessageWidget::Warning);
speech_info->setText(i18n("The speech script was not found, check your install."));
speech_info->animatedShow();
buttonBox->button(QDialogButtonBox::Apply)->setEnabled(true);
return;
}
if (!xmlConsumer.is_valid() || !producer.is_valid()) {
qDebug()<<"=== STARTING CONSUMER ERROR";
if (!producer.is_valid()) {
......@@ -139,26 +154,22 @@ void SpeechDialog::slotProcessSpeech(const std::shared_ptr<TimelineItemModel> &t
qApp->processEvents();
return;
}
speech_progress->setValue(0);
frame_progress->setVisible(true);
buttonBox->button(QDialogButtonBox::Apply)->setEnabled(false);
qApp->processEvents();
xmlConsumer.set("terminate_on_pause", 1);
xmlConsumer.set("properties", "WAV");
producer.set_in_and_out(zone.x(), zone.y());
xmlConsumer.connect(producer);
qDebug()<<"=== STARTING RENDER C, IN:"<<zone.x()<<" - "<<zone.y();
m_duration = zone.y() - zone.x();
qApp->processEvents();
xmlConsumer.run();
qApp->processEvents();
qDebug()<<"=== STARTING RENDER D";
QString language = language_box->currentText();
QString speechScript = QStandardPaths::locate(QStandardPaths::AppDataLocation, QStringLiteral("scripts/speech.py"));
if (speechScript.isEmpty()) {
speech_info->setMessageType(KMessageWidget::Warning);
speech_info->setText(i18n("The speech script was not found, check your install."));
speech_info->animatedShow();
return;
}
qDebug()<<"=== RUNNING SPEECH ANALYSIS: "<<speechScript;
QProcess speechJob;
speech_info->setMessageType(KMessageWidget::Information);
speech_info->setText(i18n("Starting speech recognition"));
qApp->processEvents();
......@@ -167,16 +178,44 @@ void SpeechDialog::slotProcessSpeech(const std::shared_ptr<TimelineItemModel> &t
modelDirectory = QStandardPaths::locate(QStandardPaths::AppDataLocation, QStringLiteral("speechmodels"), QStandardPaths::LocateDirectory);
}
qDebug()<<"==== ANALYSIS SPEECH: "<<modelDirectory<<" - "<<language<<" - "<<audio<<" - "<<speech;
speechJob.start(pyExec, {speechScript, modelDirectory, language, audio, speech});
speechJob.waitForFinished();
if (QFile::exists(speech)) {
timeline->getSubtitleModel()->importSubtitle(speech, zone.x(), true);
speech_info->setMessageType(KMessageWidget::Positive);
speech_info->setText(i18n("Subtitles imported"));
} else {
m_speechJob.reset(new QProcess(this));
connect(m_speechJob.get(), &QProcess::readyReadStandardOutput, this, &SpeechDialog::slotProcessProgress);
connect(m_speechJob.get(), static_cast<void (QProcess::*)(int, QProcess::ExitStatus)>(&QProcess::finished), [this, speech, zone](int, QProcess::ExitStatus status) {
slotProcessSpeechStatus(status, speech, zone);
});
m_speechJob->start(pyExec, {speechScript, modelDirectory, language, audio, speech});
}
void SpeechDialog::slotProcessSpeechStatus(QProcess::ExitStatus status, const QString &srtFile, const QPoint zone)
{
qDebug()<<"/// TERMINATING SPEECH JOB\n\n+++++++++++++++++++++++++++";
if (status == QProcess::CrashExit) {
speech_info->setMessageType(KMessageWidget::Warning);
speech_info->setText(i18n("Speech recognition failed"));
speech_info->setText(i18n("Speech recognition aborted."));
speech_info->animatedShow();
} else {
if (QFile::exists(srtFile)) {
m_timeline->getSubtitleModel()->importSubtitle(srtFile, zone.x(), true);
speech_info->setMessageType(KMessageWidget::Positive);
speech_info->setText(i18n("Subtitles imported"));
} else {
speech_info->setMessageType(KMessageWidget::Warning);
speech_info->setText(i18n("Speech recognition failed"));
}
}
buttonBox->button(QDialogButtonBox::Apply)->setEnabled(true);
frame_progress->setVisible(false);
}
void SpeechDialog::slotProcessProgress()
{
QString saveData = QString::fromUtf8(m_speechJob->readAll());
qDebug()<<"==== GOT SPEECH DATA: "<<saveData;
if (saveData.startsWith(QStringLiteral("progress:"))) {
double prog = saveData.section(QLatin1Char(':'), 1).toInt() * 3.12;
qDebug()<<"=== GOT DATA:\n"<<saveData;
speech_progress->setValue(static_cast<int>(100 * prog / m_duration));
}
}
void SpeechDialog::parseVoskDictionaries()
......
......@@ -24,6 +24,9 @@
#include "timeline2/model/timelineitemmodel.hpp"
#include "definitions.h"
#include <QProcess>
#include <QTemporaryFile>
/**
* @class SpeechDialog
* @brief A dialog for editing markers and guides.
......@@ -39,13 +42,20 @@ public:
~SpeechDialog() override;
private:
std::unique_ptr<QProcess> m_speechJob;
const std::shared_ptr<TimelineItemModel> m_timeline;
int m_duration;
std::unique_ptr<QTemporaryFile> m_tmpAudio;
std::unique_ptr<QTemporaryFile> m_tmpSrt;
QMetaObject::Connection m_availableConnection;
QMetaObject::Connection m_modelsConnection;
void parseVoskDictionaries();
private slots:
void slotProcessSpeech(const std::shared_ptr<TimelineItemModel> &timeline, QPoint zone);
void slotProcessSpeech(QPoint zone);
void slotProcessSpeechStatus(QProcess::ExitStatus status, const QString &srtFile, const QPoint zone);
void updateAvailability();
void slotProcessProgress();
};
#endif
......@@ -196,7 +196,7 @@ void TextBasedEdit::startRecognition()
info_message->animatedShow();
return;
}
m_speechJob.reset(new QProcess);
m_speechJob.reset(new QProcess(this));
info_message->setMessageType(KMessageWidget::Information);
info_message->setText(i18n("Starting speech recognition"));
qApp->processEvents();
......@@ -236,14 +236,6 @@ void TextBasedEdit::startRecognition()
m_speechJob->start(pyExec, {speechScript, modelDirectory, language, m_sourceUrl});
speech_progress->setValue(0);
frame_progress->setVisible(true);
/*if (m_speechJob->QFile::exists(speech)) {
timeline->getSubtitleModel()->importSubtitle(speech, zone.x(), true);
speech_info->setMessageType(KMessageWidget::Positive);
speech_info->setText(i18n("Subtitles imported"));
} else {
speech_info->setMessageType(KMessageWidget::Warning);
speech_info->setText(i18n("Speech recognition failed"));
}*/
}
void TextBasedEdit::updateAvailability()
......@@ -269,7 +261,7 @@ void TextBasedEdit::slotProcessSpeechStatus(int, QProcess::ExitStatus status)
void TextBasedEdit::slotProcessSpeech()
{
QString saveData = QString::fromUtf8(m_speechJob->readAll());
QString saveData = QString::fromUtf8(m_speechJob->readAllStandardOutput());
//saveData.replace(QStringLiteral("\\\""), QStringLiteral("\""));
qDebug()<<"=== GOT DATA:\n"<<saveData;
int ix = 0;
......
......@@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>318</width>
<height>283</height>
<width>370</width>
<height>329</height>
</rect>
</property>
<property name="windowTitle">
......@@ -68,10 +68,55 @@
</property>
</spacer>
</item>
<item row="5" column="0" colspan="2">
<widget class="KMessageWidget" name="speech_info"/>
<item row="6" column="0" colspan="2">
<widget class="QFrame" name="frame_progress">
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="frameShadow">
<enum>QFrame::Plain</enum>
</property>
<layout class="QHBoxLayout" name="horizontalLayout">
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
<item>
<widget class="QToolButton" name="button_abort">
<property name="text">
<string>...</string>
</property>
<property name="autoRaise">
<bool>true</bool>
</property>
</widget>
</item>
<item>
<widget class="QProgressBar" name="speech_progress">
<property name="value">
<number>24</number>
</property>
</widget>
</item>
</layout>
</widget>
</item>
<item row="7" column="0">
<widget class="QToolButton" name="vosk_config">
<property name="text">
<string>...</string>
</property>
</widget>
</item>
<item row="6" column="1">
<item row="7" column="1">
<widget class="QDialogButtonBox" name="buttonBox">
<property name="orientation">
<enum>Qt::Horizontal</enum>
......@@ -81,12 +126,8 @@
</property>
</widget>
</item>
<item row="6" column="0">
<widget class="QToolButton" name="vosk_config">
<property name="text">
<string>...</string>
</property>
</widget>
<item row="5" column="0" colspan="2">
<widget class="KMessageWidget" name="speech_info"/>
</item>
</layout>
</widget>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment