To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit df7880a2 authored by Christof Gerber's avatar Christof Gerber
Browse files

Add minWindowLenght option

parent 205c6c06
Pipeline #35978 passed with stages
in 6 minutes and 46 seconds
......@@ -4,11 +4,15 @@ go 1.12
require (
github.com/asticode/go-astisub v0.0.0-20190514140258-c0ed7925c393
github.com/pierrec/lz4 v2.0.5+incompatible // indirect
github.com/aws/aws-sdk-go v1.19.45 // indirect
github.com/sirupsen/logrus v1.4.2
github.com/stretchr/testify v1.3.0
gitlab.ethz.ch/chgerber/MessageComposition v0.0.0-20190606100759-1b591f45e7e5
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607113039-396e9d537c9b
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340
google.golang.org/api v0.5.0
go.mongodb.org/mongo-driver v1.0.3
golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444 // indirect
google.golang.org/api v0.6.0
google.golang.org/appengine v1.6.1 // indirect
)
This diff is collapsed.
......@@ -11,6 +11,7 @@ import (
"gitlab.ethz.ch/chgerber/MessageComposition/src/pkg/util"
"gitlab.ethz.ch/chgerber/annotation"
"gitlab.ethz.ch/chgerber/monitor"
"go.mongodb.org/mongo-driver/mongo"
"google.golang.org/api/googleapi/transport"
"google.golang.org/api/option"
"google.golang.org/api/youtube/v3"
......@@ -21,7 +22,6 @@ import (
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
)
......@@ -32,7 +32,7 @@ type Subtitle struct {
// Annotations turns the subtitles into annotations with the src field set as given
// Method of the annotation.Caption IF
func (s *Subtitle) Annotations(src string) []*annotation.Annotation {
func (s *Subtitle) Annotations(src string, args ...int) []*annotation.Annotation {
return annotation.SubToAnnotation(s.Subtitles, src)
}
......@@ -382,7 +382,7 @@ func GetPlaylistVideos(playListID string, apiKey string) (members []string, err
// LoadYouTubeAnnotationsVideo downloads the subtitles of the specified YouTube video and creates annotations in the specified mongo db collection
// user supplied YouTube subtitles are preferred over auto-generated ones
func LoadYouTubeAnnotationsVideo(videoID string, language string, mongoCollection string) error {
func LoadYouTubeAnnotationsVideo(videoID string, language string, collection *mongo.Collection) error {
var subs annotation.Caption
err := errors.New("")
......@@ -405,12 +405,6 @@ func LoadYouTubeAnnotationsVideo(videoID string, language string, mongoCollectio
log.WithFields(log.Fields{"videoID": videoID, "language": language, "quantity": len(annotations)}).Trace("Subtitle items found")
mongoPort, err := strconv.Atoi(os.Getenv("MONGO_PORT"))
collection, err := annotation.ConnectMongoDBCollection(os.Getenv("MONGO_HOST"), mongoPort, os.Getenv("MONGO_DB_NAME"), mongoCollection)
if err != nil {
return err
}
err = annotation.UploadToDB(annotations, collection)
if err != nil {
return err
......@@ -421,7 +415,7 @@ func LoadYouTubeAnnotationsVideo(videoID string, language string, mongoCollectio
// LoadYouTubeAnnotationsPlaylist downloads the subtitles of the specified YouTube playlist and loads them to the specified mongo db collection
// user supplied YouTube subtitles are preferred over auto-generated ones
func LoadYouTubeAnnotationsPlaylist(playlistID string, language string, mongoCollection string) error {
func LoadYouTubeAnnotationsPlaylist(playlistID string, language string, collection *mongo.Collection) error {
defer monitor.Elapsed()()
members, err := GetPlaylistVideos(playlistID, os.Getenv("YOUTUBE_DATA_API_KEY"))
......@@ -431,7 +425,7 @@ func LoadYouTubeAnnotationsPlaylist(playlistID string, language string, mongoCol
log.WithFields(log.Fields{"playlistID": playlistID, "language": language, "quantity": len(members)}).Trace("Videos found in playlist")
for _, videoID := range members {
err := LoadYouTubeAnnotationsVideo(videoID, language, mongoCollection)
err := LoadYouTubeAnnotationsVideo(videoID, language, collection)
if err != nil {
log.Warning(err)
continue
......@@ -483,14 +477,30 @@ func parseSRV3(file []byte) (*TimedText, error) {
// Annotations turns the TimedText into annotations with the src field set as given
// Method of the annotation.Caption IF
func (s *TimedText) Annotations(src string) []*annotation.Annotation {
// Args default values
// 1. minWindowLength := 1
// 2. maxWindowLength := 5
func (s *TimedText) Annotations(src string, args ...int) []*annotation.Annotation {
// Default values
minWindowLength := 1
maxWindowLength := 5
for i, val := range args {
if i == 0 {
minWindowLength = val
}
if i == 1 {
maxWindowLength = val
}
}
return fromTimedText(s, 3, src)
return fromTimedText(s, minWindowLength, maxWindowLength, src)
}
// fromTimedText applies the window slide approach to create annotations from TimedText
// window slide only applied within one paragraph
func fromTimedText(subtitle *TimedText, windowLength int, src string) []*annotation.Annotation {
func fromTimedText(subtitle *TimedText, minWindowLength int, maxWindowLength int, src string) []*annotation.Annotation {
var windowAnnotations []*annotation.Annotation
......@@ -516,7 +526,7 @@ func fromTimedText(subtitle *TimedText, windowLength int, src string) []*annotat
// Handle automatic captions with segments (word granularity timing)
for startSegmentIdx := range paragraph.Segments {
for wLength := 1; wLength <= windowLength; wLength++ {
for wLength := minWindowLength; wLength <= maxWindowLength; wLength++ {
// Detect if sliding window end exceeds segment end
if startSegmentIdx+wLength > len(paragraph.Segments) {
......
......@@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
)
......@@ -173,7 +174,7 @@ func TestTimedTextToAnnotationAutomaticCaption(t *testing.T) {
timedText, err := parseSRV3(byteValue)
assert.Equal(t, nil, err)
a := timedText.Annotations("youtube://VIlLpnJJl_4")
a := timedText.Annotations("youtube://VIlLpnJJl_4", 1, 3)
aExpect := annotation.Annotation{
Src: "youtube://VIlLpnJJl_4",
......@@ -242,18 +243,29 @@ func TestNumberOfProducedAnnotations(t *testing.T) {
timedText, err := parseSRV3([]byte(snipped))
assert.Equal(t, nil, err)
for windowLength := 3; windowLength <= 8; windowLength++ {
var numPermutations int
for minWindowLength := 3; minWindowLength <= 8; minWindowLength++ {
for maxWindowLength := minWindowLength; maxWindowLength <= 8; maxWindowLength++ {
var numPermutations int
// Calculate sum of sliding windows ( sum_i=1-to-window-length(NumSegments-i+1)
for i := 1; i <= windowLength; i++ {
numPermutations += len(timedText.Body.Paragraphs[0].Segments) - i + 1
}
// Calculate sum of sliding windows ( sum_i=1-to-window-length(NumSegments-i+1)
for i := minWindowLength; i <= maxWindowLength; i++ {
numPermutations += len(timedText.Body.Paragraphs[0].Segments) - i + 1
}
// Test for the right number of annotations
annotations := fromTimedText(timedText, minWindowLength, maxWindowLength, "")
assert.Equal(t, numPermutations, len(annotations))
// Test that all annotations are in the range of the given window length range
for _, a := range annotations {
a := fromTimedText(timedText, windowLength, "")
assert.Equal(t, numPermutations, len(a))
assert.Equal(t, true, len(strings.Fields(a.Subtitle.Text)) >= minWindowLength)
assert.Equal(t, true, len(strings.Fields(a.Subtitle.Text)) <= maxWindowLength)
}
}
}
}
func TestSRV3parseManualCaption(t *testing.T) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment