Commit b138e207 authored by Jean-Baptiste Mardelle's avatar Jean-Baptiste Mardelle
Browse files

introduce analysis of clip zone or subclip

parent 7dd84cfa
Pipeline #50390 passed with stage
in 10 minutes and 25 seconds
......@@ -22,15 +22,21 @@ if not os.path.exists(sys.argv[2]):
sample_rate=16000
model = Model(sys.argv[2])
rec = KaldiRecognizer(model, sample_rate)
process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i',
# zone rendering
if len(sys.argv) > 4 and (float(sys.argv[4])>0 or float(sys.argv[5])>0):
process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i',
sys.argv[3], '-t', sys.argv[4], '-ss', sys.argv[5],
'-ar', str(sample_rate) , '-ac', '1', '-f', 's16le', '-'],
stdout=subprocess.PIPE)
else:
process = subprocess.Popen(['ffmpeg', '-loglevel', 'quiet', '-i',
sys.argv[3],
'-ar', str(sample_rate) , '-ac', '1', '-f', 's16le', '-'],
stdout=subprocess.PIPE)
WORDS_PER_LINE = 7
def transcribe():
results = []
subs = []
while True:
data = process.stdout.read(4000)
if len(data) == 0:
......
......@@ -23,6 +23,7 @@
#include "monitor/monitor.h"
#include "bin/bin.h"
#include "bin/projectclip.h"
#include "bin/projectsubclip.h"
#include "bin/projectitemmodel.h"
#include "core.h"
#include "mainwindow.h"
......@@ -104,6 +105,11 @@ TextBasedEdit::TextBasedEdit(QWidget *parent)
});
info_message->hide();
speech_zone->setChecked(KdenliveSettings::speech_zone());
connect(speech_zone, &QCheckBox::stateChanged, [this](int state) {
KdenliveSettings::setSpeech_zone(state == Qt::Checked);
});
// Search stuff
search_frame->setVisible(false);
button_search->setIcon(QIcon::fromTheme(QStringLiteral("edit-find")));
......@@ -210,12 +216,33 @@ void TextBasedEdit::startRecognition()
QString clipName;
const QString cid = pCore->getMonitor(Kdenlive::ClipMonitor)->activeClipId();
std::shared_ptr<AbstractProjectItem> clip = pCore->projectItemModel()->getItemByBinId(cid);
if (clip) {
m_offset = 0;
double endPos = 0;
if (clip->itemType() == AbstractProjectItem::ClipItem) {
std::shared_ptr<ProjectClip> clipItem = std::static_pointer_cast<ProjectClip>(clip);
if (clipItem) {
m_sourceUrl = clipItem->url();
clipName = clipItem->clipName();
m_clipDuration = clipItem->duration().seconds();
if (speech_zone->isChecked()) {
// Analyse clip zone only
QPoint zone = clipItem->zone();
m_offset = GenTime(zone.x(), pCore->getCurrentFps()).seconds();
m_clipDuration = GenTime(zone.y() - zone.x(), pCore->getCurrentFps()).seconds();
endPos = m_clipDuration;
} else {
m_clipDuration = clipItem->duration().seconds();
}
}
} else if (clip->itemType() == AbstractProjectItem::SubClipItem) {
std::shared_ptr<ProjectSubClip> clipItem = std::static_pointer_cast<ProjectSubClip>(clip);
if (clipItem) {
auto master = clipItem->getMasterClip();
m_sourceUrl = master->url();
clipName = master->clipName();
QPoint zone = clipItem->zone();
m_offset = GenTime(zone.x(), pCore->getCurrentFps()).seconds();
m_clipDuration = GenTime(zone.y() - zone.x(), pCore->getCurrentFps()).seconds();
endPos = m_clipDuration;
}
}
if (m_sourceUrl.isEmpty()) {
......@@ -232,8 +259,8 @@ void TextBasedEdit::startRecognition()
connect(m_speechJob.get(), &QProcess::readyReadStandardOutput, this, &TextBasedEdit::slotProcessSpeech);
connect(m_speechJob.get(), static_cast<void (QProcess::*)(int, QProcess::ExitStatus)>(&QProcess::finished), this, &TextBasedEdit::slotProcessSpeechStatus);
listWidget->clear();
qDebug()<<"=== STARTING RECO: "<<speechScript<<" / "<<modelDirectory<<" / "<<language<<" / "<<m_sourceUrl;
m_speechJob->start(pyExec, {speechScript, modelDirectory, language, m_sourceUrl});
qDebug()<<"=== STARTING RECO: "<<speechScript<<" / "<<modelDirectory<<" / "<<language<<" / "<<m_sourceUrl<<", START: "<<m_offset<<", DUR: "<<endPos;
m_speechJob->start(pyExec, {speechScript, modelDirectory, language, m_sourceUrl, QString::number(m_offset), QString::number(endPos)});
speech_progress->setValue(0);
frame_progress->setVisible(true);
}
......@@ -311,14 +338,14 @@ void TextBasedEdit::slotProcessSpeech()
QJsonArray obj2 = obj["result"].toArray();
QJsonValue val = obj2.first();
if (val.isObject() && val.toObject().keys().contains("start")) {
double ms = val.toObject().value("start").toDouble();
double ms = val.toObject().value("start").toDouble() + m_offset;
itemText.prepend(QString("%1: ").arg(pCore->timecode().getDisplayTimecode(GenTime(ms), false)));
item->setData(Qt::UserRole, ms);
}
val = obj2.last();
if (val.isObject() && val.toObject().keys().contains("end")) {
double ms = val.toObject().value("end").toDouble();
item->setData(Qt::UserRole + 1, ms);
item->setData(Qt::UserRole + 1, ms + m_offset);
if (m_clipDuration > 0.) {
speech_progress->setValue(static_cast<int>(100 * ms / m_clipDuration));
}
......
......@@ -55,6 +55,7 @@ private:
QString m_binId;
QString m_sourceUrl;
double m_clipDuration;
double m_offset;
};
#endif
......@@ -1152,5 +1152,9 @@
<label>Last selected model for automatic subtitling</label>
<default></default>
</entry>
<entry name="speech_zone" type="Bool">
<label>Last selected model for automatic subtitling</label>
<default>true</default>
</entry>
</group>
</kcfg>
......@@ -6,8 +6,8 @@
<rect>
<x>0</x>
<y>0</y>
<width>514</width>
<height>484</height>
<width>572</width>
<height>631</height>
</rect>
</property>
<layout class="QGridLayout" name="gridLayout_2">
......@@ -94,6 +94,38 @@
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
<item row="0" column="5">
<spacer name="horizontalSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>244</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item row="0" column="3">
<widget class="QToolButton" name="button_search">
<property name="text">
<string>...</string>
</property>
<property name="checkable">
<bool>true</bool>
</property>
</widget>
</item>
<item row="0" column="1">
<widget class="QToolButton" name="button_up">
<property name="text">
......@@ -101,8 +133,8 @@
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QToolButton" name="button_delete">
<item row="0" column="6">
<widget class="QToolButton" name="vosk_config">
<property name="text">
<string>...</string>
</property>
......@@ -115,33 +147,17 @@
</property>
</widget>
</item>
<item row="0" column="4">
<spacer name="horizontalSpacer_2">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
<property name="sizeHint" stdset="0">
<size>
<width>244</width>
<height>20</height>
</size>
</property>
</spacer>
</item>
<item row="0" column="5">
<widget class="QToolButton" name="vosk_config">
<item row="0" column="0">
<widget class="QToolButton" name="button_delete">
<property name="text">
<string>...</string>
</property>
</widget>
</item>
<item row="0" column="3">
<widget class="QToolButton" name="button_search">
<item row="0" column="4">
<widget class="QCheckBox" name="speech_zone">
<property name="text">
<string>...</string>
</property>
<property name="checkable">
<bool>true</bool>
<string>Selected zone only</string>
</property>
</widget>
</item>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment