Commit c76ae6b1 authored by Jean-Baptiste Mardelle's avatar Jean-Baptiste Mardelle
Browse files

Fix speech script install, add preview for text based edit widget

parent ba9ea12d
Pipeline #50099 passed with stage
in 10 minutes and 50 seconds
......@@ -3,6 +3,7 @@ add_subdirectory(generators)
add_subdirectory(transitions)
add_subdirectory(icons)
add_subdirectory(lumas)
add_subdirectory(scripts)
add_subdirectory(man)
add_subdirectory(titles)
add_subdirectory(profiles)
......
INSTALL(FILES
speech.py
speech.py
speechtotext.py
DESTINATION ${DATA_INSTALL_DIR}/kdenlive/scripts)
#!/usr/bin/env python3
#pip3 install vosk
#pip3 install srt
from vosk import Model, KaldiRecognizer, SetLogLevel
import sys
import os
import wave
import subprocess
import srt
import json
import datetime
SetLogLevel(-1)
os.chdir(sys.argv[1])
if not os.path.exists(sys.argv[2]):
print ("Please download the model from https://alphacephei.com/vosk/models and unpack as ",sys.argv[2]," in the current folder.")
exit (1)
sample_rate=16000
model = Model(sys.argv[2])
rec = KaldiRecognizer(model, sample_rate)
process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i',
sys.argv[3],
'-ar', str(sample_rate) , '-ac', '1', '-f', 's16le', '-'],
stdout=subprocess.PIPE)
WORDS_PER_LINE = 7
def transcribe():
results = []
subs = []
while True:
data = process.stdout.read(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
sentence = rec.Result()
print (sentence)
transcribe()
#with open(sys.argv[3], 'w') as f:
# f.writelines(subtitle)
#f.close()
......@@ -30,6 +30,7 @@ the Free Software Foundation, either version 3 of the License, or
#include "timeline2/view/timelinecontroller.h"
#include "timeline2/view/timelinewidget.h"
#include "dialogs/subtitleedit.h"
#include "dialogs/textbasededit.h"
#include <mlt++/MltRepository.h>
#include <KMessageBox>
......@@ -197,6 +198,7 @@ void Core::initGUI(const QUrl &Url, const QString &clipsToLoad)
m_library = new LibraryWidget(m_projectManager, m_mainWindow);
m_subtitleWidget = new SubtitleEdit(m_mainWindow);
m_mixerWidget = new MixerManager(m_mainWindow);
m_textEditWidget = new TextBasedEdit(m_mainWindow);
connect(m_library, SIGNAL(addProjectClips(QList<QUrl>)), m_binWidget, SLOT(droppedUrls(QList<QUrl>)));
connect(this, &Core::updateLibraryPath, m_library, &LibraryWidget::slotUpdateLibraryPath);
connect(m_capture.get(), &MediaCapture::recordStateChanged, m_mixerWidget, &MixerManager::recordStateChanged);
......@@ -330,6 +332,11 @@ LibraryWidget *Core::library()
return m_library;
}
TextBasedEdit *Core::textEditWidget()
{
return m_textEditWidget;
}
SubtitleEdit *Core::subtitleWidget()
{
return m_subtitleWidget;
......
......@@ -41,6 +41,7 @@ class ProjectItemModel;
class ProjectManager;
class SubtitleEdit;
class SubtitleModel;
class TextBasedEdit;
namespace Mlt {
class Repository;
......@@ -120,6 +121,8 @@ public:
LibraryWidget *library();
/** @brief Returns a pointer to the subtitle edit. */
SubtitleEdit *subtitleWidget();
/** @brief Returns a pointer to the text based editing widget. */
TextBasedEdit *textEditWidget();
/** @brief Returns a pointer to the audio mixer. */
MixerManager *mixer();
......@@ -260,6 +263,7 @@ private:
Bin *m_binWidget{nullptr};
LibraryWidget *m_library{nullptr};
SubtitleEdit *m_subtitleWidget{nullptr};
TextBasedEdit *m_textEditWidget{nullptr};
MixerManager *m_mixerWidget{nullptr};
/** @brief Current project's profile path */
QString m_currentProfile;
......
......@@ -8,6 +8,7 @@ set(kdenlive_SRCS
dialogs/renderwidget.cpp
dialogs/speechdialog.cpp
dialogs/subtitleedit.cpp
dialogs/textbasededit.cpp
dialogs/titletemplatedialog.cpp
dialogs/wizard.cpp
dialogs/splash.cpp
......
......@@ -1201,7 +1201,7 @@ void RenderWidget::prepareRendering(bool delayedRendering, const QString &chapte
}
playlistPath = projectFolder.absoluteFilePath(renderName);
} else {
QTemporaryFile tmp(QDir::tempPath() + "/kdenlive-XXXXXX.mlt");
QTemporaryFile tmp(QDir::temp().absoluteFilePath(QStringLiteral("kdenlive-XXXXXX.mlt")));
if (!tmp.open()) {
// Something went wrong
return;
......@@ -2903,7 +2903,7 @@ bool RenderWidget::startWaitingRenderJobs()
#else
const QLatin1String ScriptFormat(".sh");
#endif
QTemporaryFile tmp(QDir::tempPath() + QStringLiteral("/kdenlive-XXXXXX") + ScriptFormat);
QTemporaryFile tmp(QDir::temp().absoluteFilePath(QStringLiteral("kdenlive-XXXXXX") + ScriptFormat));
if (!tmp.open()) {
// Something went wrong
return false;
......
/***************************************************************************
* Copyright (C) 2020 by Jean-Baptiste Mardelle *
* This file is part of Kdenlive. See www.kdenlive.org. *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) version 3 or any later version accepted by the *
* membership of KDE e.V. (or its successor approved by the membership *
* of KDE e.V.), which shall act as a proxy defined in Section 14 of *
* version 3 of the license. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
***************************************************************************/
#include "textbasededit.h"
#include "monitor/monitor.h"
#include "bin/bin.h"
#include "bin/projectclip.h"
#include "bin/projectitemmodel.h"
#include "core.h"
#include "kdenlivesettings.h"
#include "timecodedisplay.h"
#include "klocalizedstring.h"
#include "QTextEdit"
#include <QEvent>
#include <QKeyEvent>
#include <QToolButton>
TextBasedEdit::TextBasedEdit(QWidget *parent)
: QWidget(parent)
{
setFont(QFontDatabase::systemFont(QFontDatabase::SmallestReadableFont));
setupUi(this);
m_abortAction = new QAction(i18n("Abort"), this);
connect(m_abortAction, &QAction::triggered, [this]() {
if (m_speechJob && m_speechJob->state() == QProcess::Running) {
m_speechJob->kill();
}
});
connect(button_start, &QPushButton::clicked, this, &TextBasedEdit::startRecognition);
info_message->hide();
slotParseDictionaries();
}
void TextBasedEdit::startRecognition()
{
info_message->hide();
QString pyExec = QStandardPaths::findExecutable(QStringLiteral("python3"));
if (pyExec.isEmpty()) {
info_message->setMessageType(KMessageWidget::Warning);
info_message->setText(i18n("Cannot find python3, please install it on your system."));
info_message->animatedShow();
return;
}
// Start python script
QString language = language_box->currentText();
if (language.isEmpty()) {
info_message->setMessageType(KMessageWidget::Warning);
info_message->setText(i18n("Please install a language model."));
info_message->animatedShow();
return;
}
QString speechScript = QStandardPaths::locate(QStandardPaths::AppDataLocation, QStringLiteral("scripts/speechtotext.py"));
if (speechScript.isEmpty()) {
info_message->setMessageType(KMessageWidget::Warning);
info_message->setText(i18n("The speech script was not found, check your install."));
info_message->animatedShow();
return;
}
m_speechJob.reset(new QProcess);
info_message->setMessageType(KMessageWidget::Information);
info_message->setText(i18n("Starting speech recognition"));
qApp->processEvents();
QString modelDirectory = QStandardPaths::locate(QStandardPaths::AppDataLocation, QStringLiteral("speechmodels"), QStandardPaths::LocateDirectory);
qDebug()<<"==== ANALYSIS SPEECH: "<<modelDirectory<<" - "<<language;
if (m_sourceUrl.isEmpty()) {
const QString cid = pCore->getMonitor(Kdenlive::ClipMonitor)->activeClipId();
std::shared_ptr<AbstractProjectItem> clip = pCore->projectItemModel()->getItemByBinId(cid);
if (clip) {
std::shared_ptr<ProjectClip> clipItem = std::static_pointer_cast<ProjectClip>(clip);
if (clipItem) {
m_sourceUrl = clipItem->url();
}
}
}
if (m_sourceUrl.isEmpty()) {
info_message->setMessageType(KMessageWidget::Information);
info_message->setText(i18n("Select a clip for speech recognition."));
info_message->animatedShow();
return;
}
info_message->setMessageType(KMessageWidget::Information);
info_message->setText(i18n("Starting speech recognition."));
info_message->addAction(m_abortAction);
info_message->animatedShow();
qApp->processEvents();
//m_speechJob->setProcessChannelMode(QProcess::MergedChannels);
connect(m_speechJob.get(), &QProcess::readyReadStandardOutput, this, &TextBasedEdit::slotProcessSpeech);
connect(m_speechJob.get(), static_cast<void (QProcess::*)(int, QProcess::ExitStatus)>(&QProcess::finished), this, &TextBasedEdit::slotProcessSpeechStatus);
textEdit->clear();
qDebug()<<"=== STARTING RECO: "<<speechScript<<" / "<<modelDirectory<<" / "<<language<<" / "<<m_sourceUrl;
m_speechJob->start(pyExec, {speechScript, modelDirectory, language, m_sourceUrl});
/*if (m_speechJob->QFile::exists(speech)) {
timeline->getSubtitleModel()->importSubtitle(speech, zone.x(), true);
speech_info->setMessageType(KMessageWidget::Positive);
speech_info->setText(i18n("Subtitles imported"));
} else {
speech_info->setMessageType(KMessageWidget::Warning);
speech_info->setText(i18n("Speech recognition failed"));
}*/
}
void TextBasedEdit::slotProcessSpeechStatus(int, QProcess::ExitStatus status)
{
info_message->removeAction(m_abortAction);
if (status == QProcess::CrashExit) {
info_message->setMessageType(KMessageWidget::Warning);
info_message->setText(i18n("Speech recognition aborted."));
info_message->animatedShow();
} else {
info_message->setMessageType(KMessageWidget::Positive);
info_message->setText(i18n("Speech recognition finished."));
info_message->animatedShow();
}
}
void TextBasedEdit::slotProcessSpeech()
{
QString saveData = QString::fromUtf8(m_speechJob->readAll());
//saveData.replace(QStringLiteral("\\\""), QStringLiteral("\""));
qDebug()<<"=== GOT DATA:\n"<<saveData;
//QJsonDocument loadDoc(QJsonDocument::fromJson(saveData.toUtf8().constData()));
QJsonParseError error;
auto loadDoc = QJsonDocument::fromJson(saveData.toUtf8(), &error);
qDebug()<<"===JSON ERROR: "<<error.errorString();
if (loadDoc.isArray()) {
qDebug()<<"==== ITEM IS ARRAY";
QJsonArray array = loadDoc.array();
for (int i = 0; i < array.size(); i++) {
QJsonValue val = array.at(i);
qDebug()<<"==== FOUND KEYS: "<<val.toObject().keys();
if (val.isObject() && val.toObject().keys().contains("text")) {
textEdit->append(val.toObject().value("text").toString());
}
}
} else if (loadDoc.isObject()) {
QJsonObject obj = loadDoc.object();
qDebug()<<"==== ITEM IS OBJECT";
if (!obj.isEmpty()) {
textEdit->append(obj["text"].toString());
}
} else if (loadDoc.isEmpty()) {
qDebug()<<"==== EMPTY OBJEC DOC";
}
}
void TextBasedEdit::slotParseDictionaries()
{
language_box->clear();
QString modelDirectory = QStandardPaths::writableLocation(QStandardPaths::AppDataLocation);
QDir dir(modelDirectory);
if (!dir.cd(QStringLiteral("speechmodels"))) {
qDebug()<<"=== /// CANNOT ACCESS SPEECH DICTIONARIES FOLDER";
info_message->setMessageType(KMessageWidget::Information);
info_message->setText(i18n("Download dictionaries from: <a href=\"https://alphacephei.com/vosk/models\">https://alphacephei.com/vosk/models</a>"));
info_message->animatedShow();
button_start->setEnabled(false);
return;
}
QStringList dicts = dir.entryList(QDir::Dirs | QDir::NoDotAndDotDot);
language_box->addItems(dicts);
if (!dicts.isEmpty()) {
info_message->animatedHide();
button_start->setEnabled(true);
} else {
info_message->setMessageType(KMessageWidget::Information);
info_message->setText(i18n("Download dictionaries from: <a href=\"https://alphacephei.com/vosk/models\">https://alphacephei.com/vosk/models</a>"));
info_message->animatedShow();
button_start->setEnabled(false);
}
}
/***************************************************************************
* Copyright (C) 2021 by Jean-Baptiste Mardelle *
* This file is part of Kdenlive. See www.kdenlive.org. *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) version 3 or any later version accepted by the *
* membership of KDE e.V. (or its successor approved by the membership *
* of KDE e.V.), which shall act as a proxy defined in Section 14 of *
* version 3 of the license. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
***************************************************************************/
#ifndef TEXTBASEDEDIT_H
#define TEXTBASEDEDIT_H
#include "ui_textbasededit_ui.h"
#include "definitions.h"
#include <QProcess>
/**
* @class TextBasedEdit: Subtitle edit widget
* @brief A dialog for editing markers and guides.
* @author Jean-Baptiste Mardelle
*/
class TextBasedEdit : public QWidget, public Ui::TextBasedEdit_UI
{
Q_OBJECT
public:
explicit TextBasedEdit(QWidget *parent = nullptr);
private slots:
void startRecognition();
void slotProcessSpeech();
void slotParseDictionaries();
void slotProcessSpeechStatus(int, QProcess::ExitStatus status);
private:
std::unique_ptr<QProcess> m_speechJob;
QString m_binId;
QString m_sourceUrl;
QAction *m_abortAction;
};
#endif
......@@ -966,7 +966,7 @@ void Wizard::testHwEncoders()
{
QProcess hwEncoders;
// Testing vaapi support
QTemporaryFile tmp(QDir::tempPath() + "/XXXXXX.mp4");
QTemporaryFile tmp(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.mp4")));
if (!tmp.open()) {
// Something went wrong
return;
......@@ -1009,7 +1009,7 @@ void Wizard::testHwEncoders()
KdenliveSettings::setVaapiEnabled(vaapiSupported);
// NVIDIA testing
QTemporaryFile tmp2(QDir::tempPath() + "/XXXXXX.mp4");
QTemporaryFile tmp2(QDir::temp().absoluteFilePath(QStringLiteral("XXXXXX.mp4")));
if (!tmp2.open()) {
// Something went wrong
return;
......
......@@ -72,7 +72,7 @@
#include "profiles/profilerepository.hpp"
#include "widgets/progressbutton.h"
#include <config-kdenlive.h>
#include "dialogs/textbasededit.h"
#include "project/dialogs/temporarydata.h"
#ifdef USE_JOGSHUTTLE
......@@ -263,6 +263,7 @@ void MainWindow::init()
QDockWidget *libraryDock = addDock(i18n("Library"), QStringLiteral("library"), pCore->library());
QDockWidget *subtitlesDock = addDock(i18n("Subtitles"), QStringLiteral("Subtitles"), pCore->subtitleWidget());
QDockWidget *textEditingDock = addDock(i18n("Text Edit"), QStringLiteral("textedit"), pCore->textEditWidget());
m_clipMonitor = new Monitor(Kdenlive::ClipMonitor, pCore->monitorManager(), this);
pCore->bin()->setMonitor(m_clipMonitor);
......@@ -351,6 +352,7 @@ void MainWindow::init()
screenGrabDock->close();
libraryDock->close();
subtitlesDock->close();
textEditingDock->close();
spectrumDock->close();
clipDockWidget->close();
......
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>TextBasedEdit_UI</class>
<widget class="QWidget" name="TextBasedEdit_UI">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>392</width>
<height>366</height>
</rect>
</property>
<layout class="QGridLayout" name="gridLayout_2">
<item row="3" column="0">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QComboBox" name="language_box"/>
</item>
</layout>
</item>
<item row="0" column="0" colspan="4">
<widget class="QTextEdit" name="textEdit"/>
</item>
<item row="1" column="0" colspan="4">
<widget class="QFrame" name="frame_position">
<property name="frameShape">
<enum>QFrame::NoFrame</enum>
</property>
<property name="frameShadow">
<enum>QFrame::Raised</enum>
</property>
<layout class="QGridLayout" name="gridLayout">
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
</layout>
</widget>
</item>
<item row="3" column="2">
<widget class="QPushButton" name="button_start">
<property name="text">
<string>Start recognition</string>
</property>
</widget>
</item>
<item row="3" column="1">
<spacer name="horizontalSpacer">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>250</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item row="2" column="0" colspan="3">
<widget class="KMessageWidget" name="info_message"/>
</item>
</layout>
</widget>
<customwidgets>
<customwidget>
<class>KMessageWidget</class>
<extends>QFrame</extends>
<header>kmessagewidget.h</header>
</customwidget>
</customwidgets>
<resources/>
<connections/>
</ui>
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment