From 76204769f21a5721929e5b32f105732a5c59d3bd Mon Sep 17 00:00:00 2001 From: limjoe Date: Mon, 17 Jun 2024 11:28:54 +0800 Subject: [PATCH 1/4] Transcript: Support webvtt style setting --- platform/transcript.go | 20 +++++++++++++++++--- ui/src/pages/ScenarioTranscript.js | 19 ++++++++++++++++++- ui/src/resources/locale.json | 12 ++++++++++++ 3 files changed, 47 insertions(+), 4 deletions(-) diff --git a/platform/transcript.go b/platform/transcript.go index aba75f8d..a630983b 100644 --- a/platform/transcript.go +++ b/platform/transcript.go @@ -8,6 +8,7 @@ import ( "encoding/json" "fmt" "io" + "log" "net/http" "os" "os/exec" @@ -760,13 +761,21 @@ func (v *TranscriptWorker) Handle(ctx context.Context, handler *http.ServeMux) e var vttBody strings.Builder vttBody.WriteString(fmt.Sprintf("WEBVTT\n\n")) + if v.task.config.WebVttCueStyle != "" { + vttBody.WriteString(fmt.Sprintf("%s", v.task.config.WebVttCueStyle)) + vttBody.WriteString(fmt.Sprintf("\n\n")) + } for _, as := range segment.AsrText.Segments { s := segment.StreamStarttime + time.Duration(as.Start*float64(time.Second)) e := segment.StreamStarttime + time.Duration(as.End*float64(time.Second)) vttBody.WriteString(fmt.Sprintf("%02d:%02d:%02d.%03d --> ", int(s.Hours()), int(s.Minutes())%60, int(s.Seconds())%60, int(s.Milliseconds())%1000)) - vttBody.WriteString(fmt.Sprintf("%02d:%02d:%02d.%03d\n", + vttBody.WriteString(fmt.Sprintf("%02d:%02d:%02d.%03d", int(e.Hours()), int(e.Minutes())%60, int(e.Seconds())%60, int(e.Milliseconds())%1000)) + if v.task.config.WebVttCueSetting != "" { + vttBody.WriteString(fmt.Sprintf(" %s", v.task.config.WebVttCueSetting)) + } + vttBody.WriteString("\n") vttBody.WriteString(fmt.Sprintf("%v\n\n", as.Text)) } @@ -809,6 +818,7 @@ func (v *TranscriptWorker) Handle(ctx context.Context, handler *http.ServeMux) e var tsFiles []*TsFile segments := v.task.overlaySegments() + log.Printf("segments: %s", segments) for _, segment := range segments { tsFiles = append(tsFiles, segment.OverlayFile) } @@ -1219,6 +1229,10 @@ type TranscriptConfig struct { EnableOverlay bool `json:"overlayEnabled"` // Whether enable WebVTT subtitle. EnableWebVTT bool `json:"webvttEnabled"` + // WebVTT CUE style + WebVttCueStyle string `json:"webVttCueStyle"` + // WebVTT CUE Setting + WebVttCueSetting string `json:"webVttCueSetting"` } func NewTranscriptConfig() *TranscriptConfig { @@ -1228,9 +1242,9 @@ func NewTranscriptConfig() *TranscriptConfig { } func (v TranscriptConfig) String() string { - return fmt.Sprintf("all=%v, key=%vB, organization=%v, base=%v, lang=%v, overlay=%v, forceStyle=%v, videoCodecParams=%v, webvtt=%v", + return fmt.Sprintf("all=%v, key=%vB, organization=%v, base=%v, lang=%v, overlay=%v, forceStyle=%v, videoCodecParams=%v, webvtt=%v, webVttCueStyle=%v, webVttCueSetting=%v", v.All, len(v.SecretKey), v.Organization, v.BaseURL, v.Language, v.EnableOverlay, v.ForceStyle, - v.VideoCodecParams, v.EnableWebVTT) + v.VideoCodecParams, v.EnableWebVTT, v.WebVttCueStyle, v.WebVttCueSetting) } func (v *TranscriptConfig) Load(ctx context.Context) error { diff --git a/ui/src/pages/ScenarioTranscript.js b/ui/src/pages/ScenarioTranscript.js index eef83717..197c4745 100644 --- a/ui/src/pages/ScenarioTranscript.js +++ b/ui/src/pages/ScenarioTranscript.js @@ -56,6 +56,8 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default const [videoCodecParams, setVideoCodecParams] = React.useState(defaultConf.videoCodecParams || '-c:v libx264 -profile:v main -preset:v medium -tune zerolatency -bf 0'); const [overlayEnabled, setOverlayEnabled] = React.useState(defaultConf.overlayEnabled); const [webvttEnabled, setWebvttEnabled] = React.useState(defaultConf.webvttEnabled); + const [webVttCueStyle, setWebvttCueStyle] = React.useState(defaultConf.webVttCueStyle); + const [webVttCueSetting, setWebvttCueSetting] = React.useState(defaultConf.webVttCueSetting); const [liveQueue, setLiveQueue] = React.useState(); const [asrQueue, setAsrQueue] = React.useState(); @@ -100,6 +102,7 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default uuid, all: !!enabled, secretKey, organization, baseURL, lang: targetLanguage, overlayEnabled: !!overlayEnabled, forceStyle, videoCodecParams, webvttEnabled: !!webvttEnabled, + webVttCueStyle, webVttCueSetting }, { headers: Token.loadBearerHeader(), }).then(res => { @@ -107,7 +110,7 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default console.log(`Transcript: Apply config ok, uuid=${uuid}.`); success && success(); }).catch(handleError); - }, [t, handleError, secretKey, baseURL, targetLanguage, overlayEnabled, forceStyle, videoCodecParams, webvttEnabled, uuid, organization]); + }, [t, handleError, secretKey, baseURL, targetLanguage, overlayEnabled, forceStyle, videoCodecParams, webvttEnabled, uuid, organization, webVttCueStyle, webVttCueSetting]); const resetTask = React.useCallback(() => { setOperating(true); @@ -365,6 +368,20 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default setWebvttEnabled(!webvttEnabled)} /> + + {t('transcript.vttcuesettings')} + * {t('transcript.vttcuesettings1')}.   + {t('helper.see')} WebVTT Cues Settings. + + setWebvttCueSetting(e.target.value)} /> + + + {t('transcript.vttstyle')} + * {t('transcript.vttstyle1')}.   + {t('helper.see')} WebVTT Cues Style. + + setWebvttCueStyle(e.target.value)} /> + }

diff --git a/ui/src/resources/locale.json b/ui/src/resources/locale.json index 97ec2da8..7477c90a 100644 --- a/ui/src/resources/locale.json +++ b/ui/src/resources/locale.json @@ -386,6 +386,12 @@ "pvtt": "预览WebVTT字幕", "vtt2": "是否开启WebVTT字幕", "vtt": "WebVTT字幕", + "vttstyle":"WebVTT字幕样式", + "vttstyle1":"配置WebVTT字幕样式", + "vttcuesettings": "WebVTT CUE 设置", + "vttcuesettings1": "配置 WebVTT CUE", + "vtthref1": "https://developer.mozilla.org/zh-CN/docs/Web/API/WebVTT_API#%E5%9C%A8_webvtt_%E6%96%87%E4%BB%B6%E8%87%AA%E8%BA%AB%E4%B8%AD%E5%AE%9A%E4%B9%89%E6%A0%B7%E5%BC%8F", + "vtthref2": "https://developer.mozilla.org/zh-CN/docs/Web/API/WebVTT_API#cue_%E8%AE%BE%E7%BD%AE", "ole":"是否开启视频叠加字幕", "trans2": "https://ossrs.net/lts/zh-cn/faq-oryx#how-to-setup-the-video-codec-parameters-for-ai-transcript", "trans1": "自定义视频转码参数", @@ -884,6 +890,12 @@ "pvtt": "Preview live stream with WebVTT", "vtt2": "Enable WebVTT subtitle", "vtt": "WebVTT Subtitle", + "vttstyle":"WebVTT Style", + "vttstyle1":"Style of WebVTT Cue", + "vttcuesettings": "WebVTT CUE SETTINGS", + "vttcuesettings1": "Setting WebVTT CUE", + "vtthref1": "https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#within_the_webvtt_file_itself", + "vtthref2": "https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#webvtt_cues", "ole": "Enable video overlay subtitle", "trans2": "https://ossrs.io/lts/en-us/faq-oryx#how-to-setup-the-video-codec-parameters-for-ai-transcript", "trans1": "Customize video transcode parameters", From ed070fadc95874c96dfc5778f3b291ceca6534b5 Mon Sep 17 00:00:00 2001 From: limjoe Date: Mon, 17 Jun 2024 16:20:49 +0800 Subject: [PATCH 2/4] Transcript: Add webvtt style default setting --- ui/src/pages/ScenarioTranscript.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/src/pages/ScenarioTranscript.js b/ui/src/pages/ScenarioTranscript.js index 197c4745..c67b060e 100644 --- a/ui/src/pages/ScenarioTranscript.js +++ b/ui/src/pages/ScenarioTranscript.js @@ -56,8 +56,8 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default const [videoCodecParams, setVideoCodecParams] = React.useState(defaultConf.videoCodecParams || '-c:v libx264 -profile:v main -preset:v medium -tune zerolatency -bf 0'); const [overlayEnabled, setOverlayEnabled] = React.useState(defaultConf.overlayEnabled); const [webvttEnabled, setWebvttEnabled] = React.useState(defaultConf.webvttEnabled); - const [webVttCueStyle, setWebvttCueStyle] = React.useState(defaultConf.webVttCueStyle); - const [webVttCueSetting, setWebvttCueSetting] = React.useState(defaultConf.webVttCueSetting); + const [webVttCueSetting, setWebvttCueSetting] = React.useState(defaultConf.webVttCueSetting || 'line:63% position:22% align:start'); + const [webVttCueStyle, setWebvttCueStyle] = React.useState(defaultConf.webVttCueStyle || 'STYLE\n::cue { background-color: blue; color: red; font-size: 40px; }'); const [liveQueue, setLiveQueue] = React.useState(); const [asrQueue, setAsrQueue] = React.useState(); From bd3592c65f88c75f4ce2e797376d971b4a76764e Mon Sep 17 00:00:00 2001 From: limjoe Date: Fri, 21 Jun 2024 16:08:01 +0800 Subject: [PATCH 3/4] Transcript: WebVTT Cue setting code format --- platform/transcript.go | 24 ++++++++++++------------ ui/src/pages/ScenarioTranscript.js | 12 ++++++------ ui/src/resources/locale.json | 16 ++++++++-------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/platform/transcript.go b/platform/transcript.go index a630983b..45fee69b 100644 --- a/platform/transcript.go +++ b/platform/transcript.go @@ -8,7 +8,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "os" "os/exec" @@ -761,8 +760,10 @@ func (v *TranscriptWorker) Handle(ctx context.Context, handler *http.ServeMux) e var vttBody strings.Builder vttBody.WriteString(fmt.Sprintf("WEBVTT\n\n")) - if v.task.config.WebVttCueStyle != "" { - vttBody.WriteString(fmt.Sprintf("%s", v.task.config.WebVttCueStyle)) + // Insert the CSS rules into the WebVTT file to set the cue style + // But the color setting only takes effect in Safari browser + if v.task.config.WebVTTCueStyle != "" { + vttBody.WriteString(fmt.Sprintf("%s", v.task.config.WebVTTCueStyle)) vttBody.WriteString(fmt.Sprintf("\n\n")) } for _, as := range segment.AsrText.Segments { @@ -772,8 +773,8 @@ func (v *TranscriptWorker) Handle(ctx context.Context, handler *http.ServeMux) e int(s.Hours()), int(s.Minutes())%60, int(s.Seconds())%60, int(s.Milliseconds())%1000)) vttBody.WriteString(fmt.Sprintf("%02d:%02d:%02d.%03d", int(e.Hours()), int(e.Minutes())%60, int(e.Seconds())%60, int(e.Milliseconds())%1000)) - if v.task.config.WebVttCueSetting != "" { - vttBody.WriteString(fmt.Sprintf(" %s", v.task.config.WebVttCueSetting)) + if v.task.config.WebVTTCueSetting != "" { + vttBody.WriteString(fmt.Sprintf(" %s", v.task.config.WebVTTCueSetting)) } vttBody.WriteString("\n") vttBody.WriteString(fmt.Sprintf("%v\n\n", as.Text)) @@ -818,7 +819,6 @@ func (v *TranscriptWorker) Handle(ctx context.Context, handler *http.ServeMux) e var tsFiles []*TsFile segments := v.task.overlaySegments() - log.Printf("segments: %s", segments) for _, segment := range segments { tsFiles = append(tsFiles, segment.OverlayFile) } @@ -1229,10 +1229,10 @@ type TranscriptConfig struct { EnableOverlay bool `json:"overlayEnabled"` // Whether enable WebVTT subtitle. EnableWebVTT bool `json:"webvttEnabled"` - // WebVTT CUE style - WebVttCueStyle string `json:"webVttCueStyle"` - // WebVTT CUE Setting - WebVttCueSetting string `json:"webVttCueSetting"` + // WebVTT CUE style + WebVTTCueStyle string `json:"webvttCueStyle"` + // WebVTT CUE Setting + WebVTTCueSetting string `json:"webvttCueSetting"` } func NewTranscriptConfig() *TranscriptConfig { @@ -1242,9 +1242,9 @@ func NewTranscriptConfig() *TranscriptConfig { } func (v TranscriptConfig) String() string { - return fmt.Sprintf("all=%v, key=%vB, organization=%v, base=%v, lang=%v, overlay=%v, forceStyle=%v, videoCodecParams=%v, webvtt=%v, webVttCueStyle=%v, webVttCueSetting=%v", + return fmt.Sprintf("all=%v, key=%vB, organization=%v, base=%v, lang=%v, overlay=%v, forceStyle=%v, videoCodecParams=%v, webvtt=%v, webvttCueStyle=%v, webvttCueSetting=%v", v.All, len(v.SecretKey), v.Organization, v.BaseURL, v.Language, v.EnableOverlay, v.ForceStyle, - v.VideoCodecParams, v.EnableWebVTT, v.WebVttCueStyle, v.WebVttCueSetting) + v.VideoCodecParams, v.EnableWebVTT, v.WebVTTCueStyle, v.WebVTTCueSetting) } func (v *TranscriptConfig) Load(ctx context.Context) error { diff --git a/ui/src/pages/ScenarioTranscript.js b/ui/src/pages/ScenarioTranscript.js index c67b060e..266ea9ad 100644 --- a/ui/src/pages/ScenarioTranscript.js +++ b/ui/src/pages/ScenarioTranscript.js @@ -56,8 +56,8 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default const [videoCodecParams, setVideoCodecParams] = React.useState(defaultConf.videoCodecParams || '-c:v libx264 -profile:v main -preset:v medium -tune zerolatency -bf 0'); const [overlayEnabled, setOverlayEnabled] = React.useState(defaultConf.overlayEnabled); const [webvttEnabled, setWebvttEnabled] = React.useState(defaultConf.webvttEnabled); - const [webVttCueSetting, setWebvttCueSetting] = React.useState(defaultConf.webVttCueSetting || 'line:63% position:22% align:start'); - const [webVttCueStyle, setWebvttCueStyle] = React.useState(defaultConf.webVttCueStyle || 'STYLE\n::cue { background-color: blue; color: red; font-size: 40px; }'); + const [webvttCueSetting, setWebvttCueSetting] = React.useState(defaultConf.webvttCueSetting || 'line:63% position:22% align:start'); + const [webvttCueStyle, setWebvttCueStyle] = React.useState(defaultConf.webvttCueStyle || 'STYLE\n::cue { background-color: blue; color: red; font-size: 40px; }'); const [liveQueue, setLiveQueue] = React.useState(); const [asrQueue, setAsrQueue] = React.useState(); @@ -102,7 +102,7 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default uuid, all: !!enabled, secretKey, organization, baseURL, lang: targetLanguage, overlayEnabled: !!overlayEnabled, forceStyle, videoCodecParams, webvttEnabled: !!webvttEnabled, - webVttCueStyle, webVttCueSetting + webvttCueStyle, webvttCueSetting }, { headers: Token.loadBearerHeader(), }).then(res => { @@ -110,7 +110,7 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default console.log(`Transcript: Apply config ok, uuid=${uuid}.`); success && success(); }).catch(handleError); - }, [t, handleError, secretKey, baseURL, targetLanguage, overlayEnabled, forceStyle, videoCodecParams, webvttEnabled, uuid, organization, webVttCueStyle, webVttCueSetting]); + }, [t, handleError, secretKey, baseURL, targetLanguage, overlayEnabled, forceStyle, videoCodecParams, webvttEnabled, uuid, organization, webvttCueStyle, webvttCueSetting]); const resetTask = React.useCallback(() => { setOperating(true); @@ -373,14 +373,14 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default * {t('transcript.vttcuesettings1')}.   {t('helper.see')} WebVTT Cues Settings. - setWebvttCueSetting(e.target.value)} /> + setWebvttCueSetting(e.target.value)} /> {t('transcript.vttstyle')} * {t('transcript.vttstyle1')}.   {t('helper.see')} WebVTT Cues Style. - setWebvttCueStyle(e.target.value)} /> + setWebvttCueStyle(e.target.value)} /> } diff --git a/ui/src/resources/locale.json b/ui/src/resources/locale.json index 7477c90a..ec71d530 100644 --- a/ui/src/resources/locale.json +++ b/ui/src/resources/locale.json @@ -386,10 +386,10 @@ "pvtt": "预览WebVTT字幕", "vtt2": "是否开启WebVTT字幕", "vtt": "WebVTT字幕", - "vttstyle":"WebVTT字幕样式", - "vttstyle1":"配置WebVTT字幕样式", - "vttcuesettings": "WebVTT CUE 设置", - "vttcuesettings1": "配置 WebVTT CUE", + "vttstyle":"WebVTT Cue 字幕样式", + "vttstyle1":"配置 WebVTT Cue 字幕样式", + "vttcuesettings": "WebVTT Cue 设置", + "vttcuesettings1": "配置 WebVTT Cue", "vtthref1": "https://developer.mozilla.org/zh-CN/docs/Web/API/WebVTT_API#%E5%9C%A8_webvtt_%E6%96%87%E4%BB%B6%E8%87%AA%E8%BA%AB%E4%B8%AD%E5%AE%9A%E4%B9%89%E6%A0%B7%E5%BC%8F", "vtthref2": "https://developer.mozilla.org/zh-CN/docs/Web/API/WebVTT_API#cue_%E8%AE%BE%E7%BD%AE", "ole":"是否开启视频叠加字幕", @@ -890,10 +890,10 @@ "pvtt": "Preview live stream with WebVTT", "vtt2": "Enable WebVTT subtitle", "vtt": "WebVTT Subtitle", - "vttstyle":"WebVTT Style", - "vttstyle1":"Style of WebVTT Cue", - "vttcuesettings": "WebVTT CUE SETTINGS", - "vttcuesettings1": "Setting WebVTT CUE", + "vttstyle":"WebVTT Cue Style", + "vttstyle1":"Style of WebVTT Cues", + "vttcuesettings": "WebVTT Cue Settings", + "vttcuesettings1": "WebVTT Cue Settings", "vtthref1": "https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#within_the_webvtt_file_itself", "vtthref2": "https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#webvtt_cues", "ole": "Enable video overlay subtitle", From 257d623488bbce052d6dfcd61d8033262944366c Mon Sep 17 00:00:00 2001 From: limjoe Date: Mon, 24 Jun 2024 22:54:42 +0800 Subject: [PATCH 4/4] Transcript: update webvtt style default setting --- ui/src/pages/ScenarioTranscript.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ui/src/pages/ScenarioTranscript.js b/ui/src/pages/ScenarioTranscript.js index 266ea9ad..281572c5 100644 --- a/ui/src/pages/ScenarioTranscript.js +++ b/ui/src/pages/ScenarioTranscript.js @@ -56,8 +56,8 @@ function ScenarioTranscriptImpl({activeKey, defaultEnabled, defaultConf, default const [videoCodecParams, setVideoCodecParams] = React.useState(defaultConf.videoCodecParams || '-c:v libx264 -profile:v main -preset:v medium -tune zerolatency -bf 0'); const [overlayEnabled, setOverlayEnabled] = React.useState(defaultConf.overlayEnabled); const [webvttEnabled, setWebvttEnabled] = React.useState(defaultConf.webvttEnabled); - const [webvttCueSetting, setWebvttCueSetting] = React.useState(defaultConf.webvttCueSetting || 'line:63% position:22% align:start'); - const [webvttCueStyle, setWebvttCueStyle] = React.useState(defaultConf.webvttCueStyle || 'STYLE\n::cue { background-color: blue; color: red; font-size: 40px; }'); + const [webvttCueSetting, setWebvttCueSetting] = React.useState(defaultConf.webvttCueSetting || 'line:80% align:start'); + const [webvttCueStyle, setWebvttCueStyle] = React.useState(defaultConf.webvttCueStyle || 'STYLE\n::cue { background-color: blue; color: red; font-size: 18px; }'); const [liveQueue, setLiveQueue] = React.useState(); const [asrQueue, setAsrQueue] = React.useState();