Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Multimedia
Kdenlive
Commits
900516da
Commit
900516da
authored
Feb 10, 2021
by
Jean-Baptiste Mardelle
Browse files
various improvments for speech text analysis (display silences, fix zone analysis)
parent
b138e207
Pipeline
#50399
passed with stage
in 10 minutes and 39 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
data/scripts/speechtotext.py
View file @
900516da
...
...
@@ -26,7 +26,7 @@ rec = KaldiRecognizer(model, sample_rate)
# zone rendering
if
len
(
sys
.
argv
)
>
4
and
(
float
(
sys
.
argv
[
4
])
>
0
or
float
(
sys
.
argv
[
5
])
>
0
):
process
=
subprocess
.
Popen
([
'ffmpeg'
,
'-loglevel'
,
'quiet'
,
'-i'
,
sys
.
argv
[
3
],
'-
t
'
,
sys
.
argv
[
4
],
'-
ss
'
,
sys
.
argv
[
5
],
sys
.
argv
[
3
],
'-
ss
'
,
sys
.
argv
[
4
],
'-
t
'
,
sys
.
argv
[
5
],
'-ar'
,
str
(
sample_rate
)
,
'-ac'
,
'1'
,
'-f'
,
's16le'
,
'-'
],
stdout
=
subprocess
.
PIPE
)
else
:
...
...
@@ -40,10 +40,10 @@ def transcribe():
while
True
:
data
=
process
.
stdout
.
read
(
4000
)
if
len
(
data
)
==
0
:
print
(
rec
.
FinalResult
(),
file
=
sys
.
stdout
,
flush
=
True
)
break
if
rec
.
AcceptWaveform
(
data
):
sentence
=
rec
.
Result
()
print
(
sentence
,
file
=
sys
.
stdout
,
flush
=
True
)
print
(
rec
.
Result
(),
file
=
sys
.
stdout
,
flush
=
True
)
transcribe
()
#with open(sys.argv[3], 'w') as f:
...
...
src/dialogs/textbasededit.cpp
View file @
900516da
...
...
@@ -217,6 +217,7 @@ void TextBasedEdit::startRecognition()
const
QString
cid
=
pCore
->
getMonitor
(
Kdenlive
::
ClipMonitor
)
->
activeClipId
();
std
::
shared_ptr
<
AbstractProjectItem
>
clip
=
pCore
->
projectItemModel
()
->
getItemByBinId
(
cid
);
m_offset
=
0
;
m_lastPosition
=
0
;
double
endPos
=
0
;
if
(
clip
->
itemType
()
==
AbstractProjectItem
::
ClipItem
)
{
std
::
shared_ptr
<
ProjectClip
>
clipItem
=
std
::
static_pointer_cast
<
ProjectClip
>
(
clip
);
...
...
@@ -226,6 +227,7 @@ void TextBasedEdit::startRecognition()
if
(
speech_zone
->
isChecked
())
{
// Analyse clip zone only
QPoint
zone
=
clipItem
->
zone
();
m_lastPosition
=
zone
.
x
();
m_offset
=
GenTime
(
zone
.
x
(),
pCore
->
getCurrentFps
()).
seconds
();
m_clipDuration
=
GenTime
(
zone
.
y
()
-
zone
.
x
(),
pCore
->
getCurrentFps
()).
seconds
();
endPos
=
m_clipDuration
;
...
...
@@ -240,6 +242,7 @@ void TextBasedEdit::startRecognition()
m_sourceUrl
=
master
->
url
();
clipName
=
master
->
clipName
();
QPoint
zone
=
clipItem
->
zone
();
m_lastPosition
=
zone
.
x
();
m_offset
=
GenTime
(
zone
.
x
(),
pCore
->
getCurrentFps
()).
seconds
();
m_clipDuration
=
GenTime
(
zone
.
y
()
-
zone
.
x
(),
pCore
->
getCurrentFps
()).
seconds
();
endPos
=
m_clipDuration
;
...
...
@@ -330,7 +333,7 @@ void TextBasedEdit::slotProcessSpeech()
qDebug
()
<<
"==== ITEM IS OBJECT"
;
if
(
!
obj
.
isEmpty
())
{
QString
itemText
=
obj
[
"text"
].
toString
();
QListWidgetItem
*
item
=
new
QListWidgetItem
(
listWidget
)
;
QListWidgetItem
*
item
=
new
QListWidgetItem
;
if
(
obj
[
"result"
].
isObject
())
{
qDebug
()
<<
"==== RESULT IS OBJECT"
;
}
else
if
(
obj
[
"result"
].
isArray
())
{
...
...
@@ -339,6 +342,17 @@ void TextBasedEdit::slotProcessSpeech()
QJsonValue
val
=
obj2
.
first
();
if
(
val
.
isObject
()
&&
val
.
toObject
().
keys
().
contains
(
"start"
))
{
double
ms
=
val
.
toObject
().
value
(
"start"
).
toDouble
()
+
m_offset
;
GenTime
startPos
(
ms
);
if
(
startPos
.
frames
(
pCore
->
getCurrentFps
())
>
m_lastPosition
+
1
)
{
// Insert space item
QListWidgetItem
*
spacer
=
new
QListWidgetItem
(
listWidget
);
GenTime
silenceStart
(
m_lastPosition
,
pCore
->
getCurrentFps
());
spacer
->
setData
(
Qt
::
UserRole
,
silenceStart
.
seconds
());
spacer
->
setData
(
Qt
::
UserRole
+
1
,
GenTime
(
startPos
.
frames
(
pCore
->
getCurrentFps
())
-
1
,
pCore
->
getCurrentFps
()).
seconds
());
spacer
->
setText
(
i18n
(
"%1: no speech"
,
pCore
->
timecode
().
getDisplayTimecode
(
silenceStart
,
false
)));
spacer
->
setData
(
Qt
::
UserRole
+
2
,
1
);
spacer
->
setBackground
(
Qt
::
blue
);
}
itemText
.
prepend
(
QString
(
"%1: "
).
arg
(
pCore
->
timecode
().
getDisplayTimecode
(
GenTime
(
ms
),
false
)));
item
->
setData
(
Qt
::
UserRole
,
ms
);
}
...
...
@@ -346,12 +360,14 @@ void TextBasedEdit::slotProcessSpeech()
if
(
val
.
isObject
()
&&
val
.
toObject
().
keys
().
contains
(
"end"
))
{
double
ms
=
val
.
toObject
().
value
(
"end"
).
toDouble
();
item
->
setData
(
Qt
::
UserRole
+
1
,
ms
+
m_offset
);
m_lastPosition
=
GenTime
(
ms
+
m_offset
).
frames
(
pCore
->
getCurrentFps
());
if
(
m_clipDuration
>
0.
)
{
speech_progress
->
setValue
(
static_cast
<
int
>
(
100
*
ms
/
m_clipDuration
));
}
}
}
item
->
setText
(
itemText
);
listWidget
->
addItem
(
item
);
}
}
else
if
(
loadDoc
.
isEmpty
())
{
qDebug
()
<<
"==== EMPTY OBJEC DOC"
;
...
...
src/dialogs/textbasededit.h
View file @
900516da
...
...
@@ -56,6 +56,7 @@ private:
QString
m_sourceUrl
;
double
m_clipDuration
;
double
m_offset
;
int
m_lastPosition
;
};
#endif
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment