Commit 18bfb3a6 authored by Christof Gerber's avatar Christof Gerber

Merge branch 'feature/annotation-segments'

parents 8a279874 1d153a66
Pipeline #39827 passed with stages
in 7 minutes and 57 seconds
......@@ -25,6 +25,10 @@ youtube-dl https://youtu.be/1yRFJcCg6xo
Download video info
```bash
youtube-dl --skip-download --write-info-json https://youtu.be/1yRFJcCg6xo
# pretty print in command line
youtube-dl --skip-download -j https://youtu.be/1yRFJcCg6xo | jq
```
Download video & thumnail
......
......@@ -4,17 +4,12 @@ go 1.12
require (
github.com/asticode/go-astisub v0.0.0-20190514140258-c0ed7925c393
github.com/aws/aws-sdk-go v1.19.45 // indirect
github.com/sirupsen/logrus v1.4.2
github.com/stretchr/testify v1.3.0
gitlab.ethz.ch/chgerber/MessageComposition v0.0.0-20190606100759-1b591f45e7e5
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e
gitlab.ethz.ch/chgerber/annotation/v2 v2.0.0
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340
go.mongodb.org/mongo-driver v1.0.3
golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8 // indirect
golang.org/x/net v0.0.0-20190611141213-3f473d35a33a // indirect
golang.org/x/sys v0.0.0-20190613101156-ab3f67ed278a // indirect
golang.org/x/tools v0.0.0-20190612232758-d4e310b4a8a5 // indirect
golang.org/x/tools v0.0.0-20190806215303-88ddfcebc769 // indirect
google.golang.org/api v0.6.0
google.golang.org/appengine v1.6.1 // indirect
)
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0 h1:eOI3/cP2VTU6uZLDYAoic+eyzzB9YyGmJ7eIjl8rOPg=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0 h1:ROfEUZz+Gh5pa62DJWXSaonyu3StP6EA6lPEXPI6mCo=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.39.0 h1:UgQP9na6OTfp4dsAiz/eFpFA1C6tPdH5wiRdi19tuMw=
cloud.google.com/go v0.39.0/go.mod h1:rVLT6fkc8chs9sfPtFc1SBH6em7n+ZoXaG+87tDISts=
......@@ -29,8 +30,8 @@ github.com/aws/aws-sdk-go v1.19.27/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpi
github.com/aws/aws-sdk-go v1.19.43/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.19.44 h1:5MoLvCkdpSGZkMSZSBXqq7WLodttWYu4SxLn/jr2y2g=
github.com/aws/aws-sdk-go v1.19.44/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.19.45 h1:jAxmC8qqa7mW531FDgM8Ahbqlb3zmiHgTpJU6fY3vJ0=
github.com/aws/aws-sdk-go v1.19.45/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.20.0 h1:t74VM7opfCwwbe+wg6eys4a2wLqky6Znitr7BsqYPUg=
github.com/aws/aws-sdk-go v1.20.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
......@@ -154,8 +155,8 @@ gitlab.ethz.ch/chgerber/MessageComposition v0.0.0-20190606100759-1b591f45e7e5 h1
gitlab.ethz.ch/chgerber/MessageComposition v0.0.0-20190606100759-1b591f45e7e5/go.mod h1:LO7T0+5IEo+C73/lyVT+mZzBobPfBshNk4u6gsTnDYE=
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190605204157-8c12804d664b h1:bpfv2ZyuSmJ8EIIoSz1SkiZZ5wvOP6tqqToI0iPKhE4=
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190605204157-8c12804d664b/go.mod h1:uO/B1tPARzCcEcMc0+tMIs8S25SEbDaEDR2WzK6uUgU=
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e h1:p/dSyq+rPpgbp+kwkypqd3C+wN8wqc8M+6Gr6t0SHdI=
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e/go.mod h1:nV+TRAQXU64l8aTlhNBBZ5h+enXSIuwNZYhzolmvxw8=
gitlab.ethz.ch/chgerber/annotation/v2 v2.0.0 h1:04E+T0VHCCAHCsrGj973vocyUCx6I35Maodr70Z+tlk=
gitlab.ethz.ch/chgerber/annotation/v2 v2.0.0/go.mod h1:LdyyrAsy8kSnM84LMqdtVDZB+mpTeA7ugE52jMer0Ww=
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190412120244-dcc5be809fae/go.mod h1:HqYam5C8lDlOiYTcNgbvi11jILmk1puFjAvVoehT36o=
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340 h1:8r29nRz4ldWxchnPY6DcJnivPDo5eXmvhNC2kdW8vpg=
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340/go.mod h1:pJs/THBAEx9qNq3xQdrGq+wcnQRxpNSpAj4dQH1cRMk=
......@@ -187,8 +188,8 @@ golang.org/x/crypto v0.0.0-20190513172903-22d7a77e9e5f/go.mod h1:yigFU9vqHzYiE8U
golang.org/x/crypto v0.0.0-20190530122614-20be4c3c3ed5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5 h1:58fnuSXlxZmFdJyvtTFVmVhcMLU6v5fEb/ok4wyqtNU=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8 h1:1wopBVtVdWnn03fZelqdXTqk7U7zPQCb+T4rbU9ZEoU=
golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 h1:HuIa8hRrWRSrqYzx1qI49NNxhdi2PrY7gxVSq1JjLDc=
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
......@@ -221,12 +222,9 @@ golang.org/x/net v0.0.0-20190522155817-f3200d17e092 h1:4QSRKanuywn15aTZvI/mIDEgP
golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65 h1:+rhAzEzT3f4JtomfC371qB+0Ola2caSKcY69NUBZrRQ=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2vkQUEzcY+JNKWfcgAU0=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190607181551-461777fb6f67 h1:rJJxsykSlULwd2P2+pg/rtnwN2FrWp4IuCxOSyS0V00=
golang.org/x/net v0.0.0-20190607181551-461777fb6f67/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190611141213-3f473d35a33a h1:+KkCgOMgnKSgenxTBoiwkMqTiouMIy/3o8RLdmSbGoY=
golang.org/x/net v0.0.0-20190611141213-3f473d35a33a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80 h1:Ao/3l156eZf2AW5wK8a7/smtodRU+gha3+BeqJ69lRk=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421 h1:Wo7BWFiOk0QRFMLYMqJGFMd9CgUAcGx7V+qEg/h5IBI=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
......@@ -262,13 +260,8 @@ golang.org/x/sys v0.0.0-20190528183647-3626398d7749 h1:oG2HS+e2B9VqK95y67B5MgJIJ
golang.org/x/sys v0.0.0-20190528183647-3626398d7749/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed h1:uPxWBzB3+mlnjy9W58qY1j/cjyFjutgw/Vhan2zLy/A=
golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444 h1:/d2cWp6PSamH4jDPFLyO150psQdqvtoNX8Zjg3AQ31g=
golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190610081024-1e42afee0f76 h1:QSmW7Q3mFdAGjtAd0byXmFJ55inUydyZ4WQmiuItAIQ=
golang.org/x/sys v0.0.0-20190610081024-1e42afee0f76/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190613101156-ab3f67ed278a h1:sPlwkA5W19gtxRApEyGyqWg4ngTrMzOJ43fOsWrgYEE=
golang.org/x/sys v0.0.0-20190613101156-ab3f67ed278a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
......@@ -290,11 +283,9 @@ golang.org/x/tools v0.0.0-20190525145741-7be61e1b0e51/go.mod h1:RgjU9mgBXZiqYHBn
golang.org/x/tools v0.0.0-20190529010454-aa71c3f32488/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190530171427-2b03ca6e44eb/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190603231351-8aaa1484dc10/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190610190622-bca362e842d4 h1:mlrypYegmpBUm3gXAR2XkF+d8w2b6xGzTmuTERlpUyU=
golang.org/x/tools v0.0.0-20190610190622-bca362e842d4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190612232758-d4e310b4a8a5 h1:WfRBLVK37R+k1gUOKuZX8JtangyEXmuopHz5tazlZRo=
golang.org/x/tools v0.0.0-20190612232758-d4e310b4a8a5/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190806215303-88ddfcebc769 h1:D/+0wZ7qKh5vQqpbxJGPnaMv1tuCCKmn6heUpPt3FOk=
golang.org/x/tools v0.0.0-20190806215303-88ddfcebc769/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk=
google.golang.org/api v0.3.2 h1:iTp+3yyl/KOtxa/d1/JUE0GGSoR6FuW5udver22iwpw=
google.golang.org/api v0.3.2/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk=
......@@ -309,8 +300,6 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.0 h1:Tfd7cKwKbFRsI8RMAD3oqqw7JPFRrvFlOsfbgVkjOOw=
google.golang.org/appengine v1.6.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1 h1:QzqyMA1tlu6CgqCDUtU9V+ZKhLFT2dkJuANu5QaxI3I=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19 h1:Lj2SnHtxkRGJDqnGaSjo+CCdIieEnwVazbOXILwQemk=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
......@@ -324,6 +313,7 @@ google.golang.org/genproto v0.0.0-20190605220351-eb0b1bdb6ae6/go.mod h1:z3L6/3dT
google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
google.golang.org/grpc v1.19.0 h1:cfg4PD8YEdSFnm7qLV4++93WcmhH2nIUhMjhdCvl3j8=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1 h1:Hz2g2wirWK7H0qIIhGIqRGTuMwTE8HEKFnDZZ7lm9NU=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.1 h1:j6XxA85m/6txkUCHvzlV5f+HBNl/1r5cZ2A/3IEFOO8=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
......
......@@ -9,7 +9,7 @@ import (
"github.com/asticode/go-astisub"
log "github.com/sirupsen/logrus"
"gitlab.ethz.ch/chgerber/MessageComposition/src/pkg/util"
"gitlab.ethz.ch/chgerber/annotation"
"gitlab.ethz.ch/chgerber/annotation/v2"
"gitlab.ethz.ch/chgerber/monitor"
"go.mongodb.org/mongo-driver/mongo"
"google.golang.org/api/googleapi/transport"
......@@ -32,7 +32,7 @@ type Subtitle struct {
// Annotations turns the subtitles into annotations with the src field set as given
// Method of the annotation.Caption IF
func (s *Subtitle) Annotations(src string, args ...int) []*annotation.Annotation {
func (s *Subtitle) Annotations(src string) []*annotation.Annotation {
return annotation.SubToAnnotation(s.Subtitles, src)
}
......@@ -477,6 +477,15 @@ type Segment struct {
Value string `xml:",chardata"`
}
// SegmentsText returns the space delimited concatenation of all segment values
func (p *Paragraph) SegmentsText() string {
var texts []string
for _, segment := range p.Segments {
texts = append(texts, strings.TrimSpace(segment.Value))
}
return util.WordsToString(texts)
}
// parseSRV decodes a youtubedl subtitle of type .srv3 into a TimedStruct
func parseSRV3(file []byte) (*TimedText, error) {
var timedText TimedText
......@@ -493,35 +502,26 @@ func parseSRV3(file []byte) (*TimedText, error) {
// Args default values
// 1. minWindowLength := 1
// 2. maxWindowLength := 5
func (s *TimedText) Annotations(src string, args ...int) []*annotation.Annotation {
// Default values
minWindowLength := 1
maxWindowLength := 5
func (s *TimedText) Annotations(src string) []*annotation.Annotation {
for i, val := range args {
if i == 0 {
minWindowLength = val
}
if i == 1 {
maxWindowLength = val
}
}
return fromTimedText(s, minWindowLength, maxWindowLength, src)
return fromTimedText(s, src)
}
// fromTimedText applies the window slide approach to create annotations from TimedText
// window slide only applied within the paragraph
// TODO: apply sliding window also across paragraphs. Because paragraph boundary != sentence boundaries
func fromTimedText(subtitle *TimedText, minWindowLength int, maxWindowLength int, src string) []*annotation.Annotation {
// fromTimedText creates one Annotation for each Paragraph
// TODO find smart way which segments to combine into one annotation (e.g. when t_pause > x || len(segments) > 10)
func fromTimedText(subtitle *TimedText, src string) []*annotation.Annotation {
var windowAnnotations []*annotation.Annotation
var annos []*annotation.Annotation
count := 0
for paragraphNum, paragraph := range subtitle.Body.Paragraphs {
// Handle manual captions which have no segments (no word granularity timing)
if len(paragraph.Segments) == 0 && strings.Join(strings.Fields(paragraph.Value), " ") != "" {
// if non-empty paragraph
if len(paragraph.Segments) > 0 || strings.Join(strings.Fields(paragraph.Value), " ") != "" {
count++
// Create annotation for the paragraph
a := annotation.Annotation{Src: src, Subtitle: annotation.Subtitle{Count: count}}
// handle when current paragraph subtitle stays displayed even when next one appears
var endTime int
......@@ -536,76 +536,48 @@ func fromTimedText(subtitle *TimedText, minWindowLength int, maxWindowLength int
endTime = paragraph.Time + paragraph.Duration
}
// Create annotation for the paragraph
a := &annotation.Annotation{
Src: src,
Subtitle: annotation.Subtitle{
Count: paragraphNum,
Text: paragraph.Value,
Start: paragraph.Time,
End: endTime,
},
}
windowAnnotations = append(windowAnnotations, a)
}
// Handle automatic captions with segments (word granularity timing)
for startSegmentIdx := range paragraph.Segments {
for wLength := minWindowLength; wLength <= maxWindowLength; wLength++ {
a.Subtitle.Start = paragraph.Time
a.Subtitle.End = endTime
// Detect if sliding window end exceeds segment end
if startSegmentIdx+wLength > len(paragraph.Segments) {
break
}
// Collect the values (text) of the current window
var windowSegmentsValue []string
for i := startSegmentIdx; i < startSegmentIdx+wLength; i++ {
windowSegmentsValue = append(windowSegmentsValue, paragraph.Segments[i].Value)
}
// Set segments of annotation
// Handle automatic capti0xc00046af50,ons with segments (word granularity timing)
var segments []annotation.TimedText
for segmentIdx, segment := range paragraph.Segments {
// Compute the start time of the current window
startSegment := paragraph.Segments[startSegmentIdx]
startTime := paragraph.Time + startSegment.Time
startTime := paragraph.Time + segment.Time
var endTime int
// if current segment not last one
if startSegmentIdx+wLength < len(paragraph.Segments) {
nextSegment := paragraph.Segments[startSegmentIdx+wLength]
endTime = paragraph.Time + nextSegment.Time
if segmentIdx < len(paragraph.Segments)-1 {
endTime = paragraph.Time + paragraph.Segments[segmentIdx+1].Time
} else {
if paragraphNum < len(subtitle.Body.Paragraphs)-1 {
// handle when current segment/paragraph subtitle stays displayed even when next one appears
if paragraph.Time+paragraph.Duration > subtitle.Body.Paragraphs[paragraphNum+1].Time {
endTime = subtitle.Body.Paragraphs[paragraphNum+1].Time
} else {
endTime = paragraph.Time + paragraph.Duration
}
} else {
endTime = paragraph.Time + paragraph.Duration
}
endTime = a.Subtitle.End
}
// Create annotation of the current window
a := &annotation.Annotation{
Src: src,
Subtitle: annotation.Subtitle{
Count: paragraphNum,
Text: util.WordsToString(windowSegmentsValue),
Start: startTime,
End: endTime,
},
s := annotation.TimedText{
Text: strings.TrimSpace(segment.Value),
Start: startTime,
End: endTime,
}
windowAnnotations = append(windowAnnotations, a)
segments = append(segments, s)
}
// set paragraph value if non-empty and no segments present and
if len(paragraph.Segments) == 0 {
a.Subtitle.Text = paragraph.Value
} else {
a.Subtitle.Text = paragraph.SegmentsText()
}
a.Subtitle.Segments = segments
annos = append(annos, &a)
}
}
return windowAnnotations
return annos
}
func parseSub(file string, format SubtitleFormat) (annotation.Caption, error) {
......
......@@ -4,12 +4,11 @@ import (
"fmt"
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"gitlab.ethz.ch/chgerber/annotation"
"gitlab.ethz.ch/chgerber/annotation/v2"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
)
......@@ -174,15 +173,25 @@ func TestTimedTextToAnnotationAutomaticCaption(t *testing.T) {
timedText, err := parseSRV3(byteValue)
assert.Equal(t, nil, err)
a := timedText.Annotations("youtube://VIlLpnJJl_4", 1, 3)
a := timedText.Annotations("youtube://VIlLpnJJl_4")
aExpect := annotation.Annotation{
Src: "youtube://VIlLpnJJl_4",
Subtitle: annotation.Subtitle{
Count: 0,
Text: "you're",
Count: 1,
Text: "you're back in my day we use differently",
Start: 170,
End: 1429,
End: 4539,
Segments: []annotation.TimedText{
{Start: 170, End: 1429, Text: "you're"},
{Start: 1429, End: 2429, Text: "back"},
{Start: 2429, End: 2460, Text: "in"},
{Start: 2460, End: 2909, Text: "my"},
{Start: 2909, End: 2970, Text: "day"},
{Start: 2970, End: 3510, Text: "we"},
{Start: 3510, End: 3720, Text: "use"},
{Start: 3720, End: 4539, Text: "differently"},
},
},
}
assert.Equal(t, aExpect, *a[0])
......@@ -190,39 +199,24 @@ func TestTimedTextToAnnotationAutomaticCaption(t *testing.T) {
aExpect = annotation.Annotation{
Src: "youtube://VIlLpnJJl_4",
Subtitle: annotation.Subtitle{
Count: 0,
Text: "we",
Start: 2970,
End: 3510,
Count: 16,
Text: "hundreds of thousands and millions of",
Start: 59760,
End: 60889,
Segments: []annotation.TimedText{
{Start: 59760, End: 60090, Text: "hundreds"},
{Start: 60090, End: 60149, Text: "of"},
{Start: 60149, End: 60510, Text: "thousands"},
{Start: 60510, End: 60629, Text: "and"},
{Start: 60629, End: 60690, Text: "millions"},
{Start: 60690, End: 60889, Text: "of"},
},
},
}
assert.Equal(t, aExpect, *a[15])
aExpect = annotation.Annotation{
Src: "youtube://VIlLpnJJl_4",
Subtitle: annotation.Subtitle{
Count: 2,
Text: "12 hours a",
Start: 6120,
End: 6930,
},
}
assert.Equal(t, aExpect, *a[32])
// test endTime of last segment of paragraph
aExpect = annotation.Annotation{
Src: "youtube://VIlLpnJJl_4",
Subtitle: annotation.Subtitle{
Count: 2,
Text: "just to",
Start: 7140,
End: 7970,
},
}
assert.Equal(t, aExpect, *a[43])
}
func TestNumberOfProducedAnnotations(t *testing.T) {
func TestTimedTextToAnnotations(t *testing.T) {
snipped := `
<?xml version="1.0" encoding="utf-8" ?>
......@@ -247,6 +241,23 @@ func TestNumberOfProducedAnnotations(t *testing.T) {
<s t="3340" ac="252">use</s>
<s p="2" t="3550" ac="0">differently</s>
</p>
<p t="4549" d="5681" w="1">
<s p="2" ac="144">stream</s>
<s p="2" t="1000" ac="94">snape</s>
<s p="1" t="1151" ac="200">for</s>
<s t="1571" ac="226">12</s>
<s t="1871" ac="252">hours</s>
<s p="2" t="2111" ac="161">a</s>
<s t="2381" ac="252">day</s>
<s t="2591" ac="252">just</s>
<s p="2" t="2830" ac="187">to</s>
</p>
<p t="12794" d="3556">Philosophers, dramatists, theologians</p>
<p t="16374" d="2206">have grappled with this question for centuries:</p>
<p t="18604" d="1532">what makes people go wrong?</p>
<p t="20160" d="2976">Interestingly, I asked this question when I was a little kid.</p>
<p t="75530" d="1450"> (Laughter) </p>
<p t="77250" d="4140"> Or maybe you were thinking &quot;Where do you get her confidence?&quot; </p>
</body>
</timedtext>
`
......@@ -255,29 +266,37 @@ func TestNumberOfProducedAnnotations(t *testing.T) {
timedText, err := parseSRV3([]byte(snipped))
assert.Equal(t, nil, err)
for minWindowLength := 3; minWindowLength <= 8; minWindowLength++ {
for maxWindowLength := minWindowLength; maxWindowLength <= 8; maxWindowLength++ {
var numPermutations int
// Test for the right number of annotations
annotations := fromTimedText(timedText, "")
assert.Equal(t, 8, len(annotations))
assert.Equal(t, 8, len(annotations[0].Subtitle.Segments))
assert.Equal(t, 9, len(annotations[1].Subtitle.Segments))
assert.Equal(t, 0, len(annotations[2].Subtitle.Segments))
assert.Equal(t, 0, len(annotations[3].Subtitle.Segments))
// Calculate sum of sliding windows ( sum_i=1-to-window-length(NumSegments-i+1)
for i := minWindowLength; i <= maxWindowLength; i++ {
numPermutations += len(timedText.Body.Paragraphs[0].Segments) - i + 1
}
// Test if segment text matches
for i, a := range annotations {
// Test for the right number of annotations
annotations := fromTimedText(timedText, minWindowLength, maxWindowLength, "")
assert.Equal(t, numPermutations, len(annotations))
// Test that all annotations are in the range of the given window length range
for _, a := range annotations {
assert.Equal(t, true, len(strings.Fields(a.Subtitle.Text)) >= minWindowLength)
assert.Equal(t, true, len(strings.Fields(a.Subtitle.Text)) <= maxWindowLength)
}
// TODO test also text of paragraphs with value but zero segments
for segmentIdx, segment := range a.Subtitle.Segments {
assert.Equal(t, timedText.Body.Paragraphs[i].Segments[segmentIdx].Value, segment.Text)
}
if len(a.Subtitle.Segments) == 0 {
assert.Equal(t, timedText.Body.Paragraphs[i].Value, a.Subtitle.Text)
}
}
aExpect := annotation.Annotation{
Src: "",
Subtitle: annotation.Subtitle{
Count: 8,
Text: " Or maybe you were thinking \"Where do you get her confidence?\" ",
Start: 77250,
End: 81390,
},
}
assert.Equal(t, aExpect, *annotations[7])
}
func TestSRV3parseManualCaption(t *testing.T) {
......@@ -301,50 +320,3 @@ func TestSRV3parseManualCaption(t *testing.T) {
assert.Equal(t, 1532, timedText.Body.Paragraphs[2].Duration)
assert.Equal(t, "what makes people go wrong?", timedText.Body.Paragraphs[2].Value)
}
func TestTimedTextToAnnotationManualCaption(t *testing.T) {
xmlFile, err := os.Open("testData/subManual-OsFEV35tWsg.en.srv3")
assert.Equal(t, nil, err)
// defer the closing of our xmlFile so that we can parse it later on
defer xmlFile.Close()
// read our opened xmlFile as a byte array.
byteValue, err := ioutil.ReadAll(xmlFile)
assert.Equal(t, nil, err)
timedText, err := parseSRV3(byteValue)
assert.Equal(t, nil, err)
a := timedText.Annotations("youtube://OsFEV35tWsg")
aExpect := annotation.Annotation{
Src: "youtube://OsFEV35tWsg",
Subtitle: annotation.Subtitle{
Count: 0,
Text: "Philosophers, dramatists, theologians",
Start: 12794,
End: 16350,
},
}
assert.Equal(t, aExpect, *a[0])
aExpect = annotation.Annotation{
Src: "youtube://OsFEV35tWsg",
Subtitle: annotation.Subtitle{
Count: 15,
Text: "with them on the good side,\nthe others on the bad side --",
Start: 56652,
End: 59325,
},
}
assert.Equal(t, aExpect, *a[15])
aExpect = annotation.Annotation{
Src: "youtube://OsFEV35tWsg",
Subtitle: annotation.Subtitle{
Count: 32,
Text: "And apparently, he disobeyed God,",
Start: 104661,
End: 107866,
},
}
assert.Equal(t, aExpect, *a[32])
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment