Commit df7880a2 authored by Christof Gerber's avatar Christof Gerber
Browse files

Add minWindowLenght option

parent 205c6c06
Pipeline #35978 passed with stages
in 6 minutes and 46 seconds
...@@ -4,11 +4,15 @@ go 1.12 ...@@ -4,11 +4,15 @@ go 1.12
require ( require (
github.com/asticode/go-astisub v0.0.0-20190514140258-c0ed7925c393 github.com/asticode/go-astisub v0.0.0-20190514140258-c0ed7925c393
github.com/pierrec/lz4 v2.0.5+incompatible // indirect github.com/aws/aws-sdk-go v1.19.45 // indirect
github.com/sirupsen/logrus v1.4.2 github.com/sirupsen/logrus v1.4.2
github.com/stretchr/testify v1.3.0 github.com/stretchr/testify v1.3.0
gitlab.ethz.ch/chgerber/MessageComposition v0.0.0-20190606100759-1b591f45e7e5 gitlab.ethz.ch/chgerber/MessageComposition v0.0.0-20190606100759-1b591f45e7e5
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607113039-396e9d537c9b gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340 gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340
google.golang.org/api v0.5.0 go.mongodb.org/mongo-driver v1.0.3
golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444 // indirect
google.golang.org/api v0.6.0
google.golang.org/appengine v1.6.1 // indirect
) )
This diff is collapsed.
...@@ -11,6 +11,7 @@ import ( ...@@ -11,6 +11,7 @@ import (
"gitlab.ethz.ch/chgerber/MessageComposition/src/pkg/util" "gitlab.ethz.ch/chgerber/MessageComposition/src/pkg/util"
"gitlab.ethz.ch/chgerber/annotation" "gitlab.ethz.ch/chgerber/annotation"
"gitlab.ethz.ch/chgerber/monitor" "gitlab.ethz.ch/chgerber/monitor"
"go.mongodb.org/mongo-driver/mongo"
"google.golang.org/api/googleapi/transport" "google.golang.org/api/googleapi/transport"
"google.golang.org/api/option" "google.golang.org/api/option"
"google.golang.org/api/youtube/v3" "google.golang.org/api/youtube/v3"
...@@ -21,7 +22,6 @@ import ( ...@@ -21,7 +22,6 @@ import (
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"regexp" "regexp"
"strconv"
"strings" "strings"
) )
...@@ -32,7 +32,7 @@ type Subtitle struct { ...@@ -32,7 +32,7 @@ type Subtitle struct {
// Annotations turns the subtitles into annotations with the src field set as given // Annotations turns the subtitles into annotations with the src field set as given
// Method of the annotation.Caption IF // Method of the annotation.Caption IF
func (s *Subtitle) Annotations(src string) []*annotation.Annotation { func (s *Subtitle) Annotations(src string, args ...int) []*annotation.Annotation {
return annotation.SubToAnnotation(s.Subtitles, src) return annotation.SubToAnnotation(s.Subtitles, src)
} }
...@@ -382,7 +382,7 @@ func GetPlaylistVideos(playListID string, apiKey string) (members []string, err ...@@ -382,7 +382,7 @@ func GetPlaylistVideos(playListID string, apiKey string) (members []string, err
// LoadYouTubeAnnotationsVideo downloads the subtitles of the specified YouTube video and creates annotations in the specified mongo db collection // LoadYouTubeAnnotationsVideo downloads the subtitles of the specified YouTube video and creates annotations in the specified mongo db collection
// user supplied YouTube subtitles are preferred over auto-generated ones // user supplied YouTube subtitles are preferred over auto-generated ones
func LoadYouTubeAnnotationsVideo(videoID string, language string, mongoCollection string) error { func LoadYouTubeAnnotationsVideo(videoID string, language string, collection *mongo.Collection) error {
var subs annotation.Caption var subs annotation.Caption
err := errors.New("") err := errors.New("")
...@@ -405,12 +405,6 @@ func LoadYouTubeAnnotationsVideo(videoID string, language string, mongoCollectio ...@@ -405,12 +405,6 @@ func LoadYouTubeAnnotationsVideo(videoID string, language string, mongoCollectio
log.WithFields(log.Fields{"videoID": videoID, "language": language, "quantity": len(annotations)}).Trace("Subtitle items found") log.WithFields(log.Fields{"videoID": videoID, "language": language, "quantity": len(annotations)}).Trace("Subtitle items found")
mongoPort, err := strconv.Atoi(os.Getenv("MONGO_PORT"))
collection, err := annotation.ConnectMongoDBCollection(os.Getenv("MONGO_HOST"), mongoPort, os.Getenv("MONGO_DB_NAME"), mongoCollection)
if err != nil {
return err
}
err = annotation.UploadToDB(annotations, collection) err = annotation.UploadToDB(annotations, collection)
if err != nil { if err != nil {
return err return err
...@@ -421,7 +415,7 @@ func LoadYouTubeAnnotationsVideo(videoID string, language string, mongoCollectio ...@@ -421,7 +415,7 @@ func LoadYouTubeAnnotationsVideo(videoID string, language string, mongoCollectio
// LoadYouTubeAnnotationsPlaylist downloads the subtitles of the specified YouTube playlist and loads them to the specified mongo db collection // LoadYouTubeAnnotationsPlaylist downloads the subtitles of the specified YouTube playlist and loads them to the specified mongo db collection
// user supplied YouTube subtitles are preferred over auto-generated ones // user supplied YouTube subtitles are preferred over auto-generated ones
func LoadYouTubeAnnotationsPlaylist(playlistID string, language string, mongoCollection string) error { func LoadYouTubeAnnotationsPlaylist(playlistID string, language string, collection *mongo.Collection) error {
defer monitor.Elapsed()() defer monitor.Elapsed()()
members, err := GetPlaylistVideos(playlistID, os.Getenv("YOUTUBE_DATA_API_KEY")) members, err := GetPlaylistVideos(playlistID, os.Getenv("YOUTUBE_DATA_API_KEY"))
...@@ -431,7 +425,7 @@ func LoadYouTubeAnnotationsPlaylist(playlistID string, language string, mongoCol ...@@ -431,7 +425,7 @@ func LoadYouTubeAnnotationsPlaylist(playlistID string, language string, mongoCol
log.WithFields(log.Fields{"playlistID": playlistID, "language": language, "quantity": len(members)}).Trace("Videos found in playlist") log.WithFields(log.Fields{"playlistID": playlistID, "language": language, "quantity": len(members)}).Trace("Videos found in playlist")
for _, videoID := range members { for _, videoID := range members {
err := LoadYouTubeAnnotationsVideo(videoID, language, mongoCollection) err := LoadYouTubeAnnotationsVideo(videoID, language, collection)
if err != nil { if err != nil {
log.Warning(err) log.Warning(err)
continue continue
...@@ -483,14 +477,30 @@ func parseSRV3(file []byte) (*TimedText, error) { ...@@ -483,14 +477,30 @@ func parseSRV3(file []byte) (*TimedText, error) {
// Annotations turns the TimedText into annotations with the src field set as given // Annotations turns the TimedText into annotations with the src field set as given
// Method of the annotation.Caption IF // Method of the annotation.Caption IF
func (s *TimedText) Annotations(src string) []*annotation.Annotation { // Args default values
// 1. minWindowLength := 1
// 2. maxWindowLength := 5
func (s *TimedText) Annotations(src string, args ...int) []*annotation.Annotation {
// Default values
minWindowLength := 1
maxWindowLength := 5
for i, val := range args {
if i == 0 {
minWindowLength = val
}
if i == 1 {
maxWindowLength = val
}
}
return fromTimedText(s, 3, src) return fromTimedText(s, minWindowLength, maxWindowLength, src)
} }
// fromTimedText applies the window slide approach to create annotations from TimedText // fromTimedText applies the window slide approach to create annotations from TimedText
// window slide only applied within one paragraph // window slide only applied within one paragraph
func fromTimedText(subtitle *TimedText, windowLength int, src string) []*annotation.Annotation { func fromTimedText(subtitle *TimedText, minWindowLength int, maxWindowLength int, src string) []*annotation.Annotation {
var windowAnnotations []*annotation.Annotation var windowAnnotations []*annotation.Annotation
...@@ -516,7 +526,7 @@ func fromTimedText(subtitle *TimedText, windowLength int, src string) []*annotat ...@@ -516,7 +526,7 @@ func fromTimedText(subtitle *TimedText, windowLength int, src string) []*annotat
// Handle automatic captions with segments (word granularity timing) // Handle automatic captions with segments (word granularity timing)
for startSegmentIdx := range paragraph.Segments { for startSegmentIdx := range paragraph.Segments {
for wLength := 1; wLength <= windowLength; wLength++ { for wLength := minWindowLength; wLength <= maxWindowLength; wLength++ {
// Detect if sliding window end exceeds segment end // Detect if sliding window end exceeds segment end
if startSegmentIdx+wLength > len(paragraph.Segments) { if startSegmentIdx+wLength > len(paragraph.Segments) {
......
...@@ -9,6 +9,7 @@ import ( ...@@ -9,6 +9,7 @@ import (
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strings"
"testing" "testing"
) )
...@@ -173,7 +174,7 @@ func TestTimedTextToAnnotationAutomaticCaption(t *testing.T) { ...@@ -173,7 +174,7 @@ func TestTimedTextToAnnotationAutomaticCaption(t *testing.T) {
timedText, err := parseSRV3(byteValue) timedText, err := parseSRV3(byteValue)
assert.Equal(t, nil, err) assert.Equal(t, nil, err)
a := timedText.Annotations("youtube://VIlLpnJJl_4") a := timedText.Annotations("youtube://VIlLpnJJl_4", 1, 3)
aExpect := annotation.Annotation{ aExpect := annotation.Annotation{
Src: "youtube://VIlLpnJJl_4", Src: "youtube://VIlLpnJJl_4",
...@@ -242,18 +243,29 @@ func TestNumberOfProducedAnnotations(t *testing.T) { ...@@ -242,18 +243,29 @@ func TestNumberOfProducedAnnotations(t *testing.T) {
timedText, err := parseSRV3([]byte(snipped)) timedText, err := parseSRV3([]byte(snipped))
assert.Equal(t, nil, err) assert.Equal(t, nil, err)
for windowLength := 3; windowLength <= 8; windowLength++ { for minWindowLength := 3; minWindowLength <= 8; minWindowLength++ {
var numPermutations int for maxWindowLength := minWindowLength; maxWindowLength <= 8; maxWindowLength++ {
var numPermutations int
// Calculate sum of sliding windows ( sum_i=1-to-window-length(NumSegments-i+1) // Calculate sum of sliding windows ( sum_i=1-to-window-length(NumSegments-i+1)
for i := 1; i <= windowLength; i++ { for i := minWindowLength; i <= maxWindowLength; i++ {
numPermutations += len(timedText.Body.Paragraphs[0].Segments) - i + 1 numPermutations += len(timedText.Body.Paragraphs[0].Segments) - i + 1
} }
// Test for the right number of annotations
annotations := fromTimedText(timedText, minWindowLength, maxWindowLength, "")
assert.Equal(t, numPermutations, len(annotations))
// Test that all annotations are in the range of the given window length range
for _, a := range annotations {
a := fromTimedText(timedText, windowLength, "") assert.Equal(t, true, len(strings.Fields(a.Subtitle.Text)) >= minWindowLength)
assert.Equal(t, numPermutations, len(a)) assert.Equal(t, true, len(strings.Fields(a.Subtitle.Text)) <= maxWindowLength)
}
}
} }
} }
func TestSRV3parseManualCaption(t *testing.T) { func TestSRV3parseManualCaption(t *testing.T) {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment