To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit be6a0d71 authored by Christof Gerber's avatar Christof Gerber
Browse files

SRV3 to Annotations with segments

parent ba362a69
Pipeline #36620 passed with stages
in 6 minutes and 55 seconds
...@@ -4,17 +4,12 @@ go 1.12 ...@@ -4,17 +4,12 @@ go 1.12
require ( require (
github.com/asticode/go-astisub v0.0.0-20190514140258-c0ed7925c393 github.com/asticode/go-astisub v0.0.0-20190514140258-c0ed7925c393
github.com/aws/aws-sdk-go v1.19.45 // indirect
github.com/sirupsen/logrus v1.4.2 github.com/sirupsen/logrus v1.4.2
github.com/stretchr/testify v1.3.0 github.com/stretchr/testify v1.3.0
gitlab.ethz.ch/chgerber/MessageComposition v0.0.0-20190606100759-1b591f45e7e5 gitlab.ethz.ch/chgerber/MessageComposition v0.0.0-20190606100759-1b591f45e7e5
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e gitlab.ethz.ch/chgerber/annotation v0.0.0-20190615221508-5f3c40b49097
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340 gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340
go.mongodb.org/mongo-driver v1.0.3 go.mongodb.org/mongo-driver v1.0.3
golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8 // indirect
golang.org/x/net v0.0.0-20190611141213-3f473d35a33a // indirect
golang.org/x/sys v0.0.0-20190613101156-ab3f67ed278a // indirect
golang.org/x/tools v0.0.0-20190612232758-d4e310b4a8a5 // indirect
google.golang.org/api v0.6.0 google.golang.org/api v0.6.0
google.golang.org/appengine v1.6.1 // indirect google.golang.org/appengine v1.6.1 // indirect
) )
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0 h1:eOI3/cP2VTU6uZLDYAoic+eyzzB9YyGmJ7eIjl8rOPg= cloud.google.com/go v0.34.0 h1:eOI3/cP2VTU6uZLDYAoic+eyzzB9YyGmJ7eIjl8rOPg=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0 h1:ROfEUZz+Gh5pa62DJWXSaonyu3StP6EA6lPEXPI6mCo=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.39.0 h1:UgQP9na6OTfp4dsAiz/eFpFA1C6tPdH5wiRdi19tuMw= cloud.google.com/go v0.39.0 h1:UgQP9na6OTfp4dsAiz/eFpFA1C6tPdH5wiRdi19tuMw=
cloud.google.com/go v0.39.0/go.mod h1:rVLT6fkc8chs9sfPtFc1SBH6em7n+ZoXaG+87tDISts= cloud.google.com/go v0.39.0/go.mod h1:rVLT6fkc8chs9sfPtFc1SBH6em7n+ZoXaG+87tDISts=
...@@ -31,6 +32,8 @@ github.com/aws/aws-sdk-go v1.19.44 h1:5MoLvCkdpSGZkMSZSBXqq7WLodttWYu4SxLn/jr2y2 ...@@ -31,6 +32,8 @@ github.com/aws/aws-sdk-go v1.19.44 h1:5MoLvCkdpSGZkMSZSBXqq7WLodttWYu4SxLn/jr2y2
github.com/aws/aws-sdk-go v1.19.44/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.19.44/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.19.45 h1:jAxmC8qqa7mW531FDgM8Ahbqlb3zmiHgTpJU6fY3vJ0= github.com/aws/aws-sdk-go v1.19.45 h1:jAxmC8qqa7mW531FDgM8Ahbqlb3zmiHgTpJU6fY3vJ0=
github.com/aws/aws-sdk-go v1.19.45/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.19.45/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/aws/aws-sdk-go v1.20.0 h1:t74VM7opfCwwbe+wg6eys4a2wLqky6Znitr7BsqYPUg=
github.com/aws/aws-sdk-go v1.20.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
...@@ -156,6 +159,8 @@ gitlab.ethz.ch/chgerber/annotation v0.0.0-20190605204157-8c12804d664b h1:bpfv2Zy ...@@ -156,6 +159,8 @@ gitlab.ethz.ch/chgerber/annotation v0.0.0-20190605204157-8c12804d664b h1:bpfv2Zy
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190605204157-8c12804d664b/go.mod h1:uO/B1tPARzCcEcMc0+tMIs8S25SEbDaEDR2WzK6uUgU= gitlab.ethz.ch/chgerber/annotation v0.0.0-20190605204157-8c12804d664b/go.mod h1:uO/B1tPARzCcEcMc0+tMIs8S25SEbDaEDR2WzK6uUgU=
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e h1:p/dSyq+rPpgbp+kwkypqd3C+wN8wqc8M+6Gr6t0SHdI= gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e h1:p/dSyq+rPpgbp+kwkypqd3C+wN8wqc8M+6Gr6t0SHdI=
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e/go.mod h1:nV+TRAQXU64l8aTlhNBBZ5h+enXSIuwNZYhzolmvxw8= gitlab.ethz.ch/chgerber/annotation v0.0.0-20190607145645-0049de6d439e/go.mod h1:nV+TRAQXU64l8aTlhNBBZ5h+enXSIuwNZYhzolmvxw8=
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190615221508-5f3c40b49097 h1:Nqpn1eEnT6zuMeFxh/xfl5FLK3A19VIuYH2i3jQ7TkI=
gitlab.ethz.ch/chgerber/annotation v0.0.0-20190615221508-5f3c40b49097/go.mod h1:EgiYFN6qrKkKD7LOYKBKpTZ4vB8yJZvped94MHxR5g8=
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190412120244-dcc5be809fae/go.mod h1:HqYam5C8lDlOiYTcNgbvi11jILmk1puFjAvVoehT36o= gitlab.ethz.ch/chgerber/monitor v0.0.0-20190412120244-dcc5be809fae/go.mod h1:HqYam5C8lDlOiYTcNgbvi11jILmk1puFjAvVoehT36o=
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340 h1:8r29nRz4ldWxchnPY6DcJnivPDo5eXmvhNC2kdW8vpg= gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340 h1:8r29nRz4ldWxchnPY6DcJnivPDo5eXmvhNC2kdW8vpg=
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340/go.mod h1:pJs/THBAEx9qNq3xQdrGq+wcnQRxpNSpAj4dQH1cRMk= gitlab.ethz.ch/chgerber/monitor v0.0.0-20190527191251-2bb9dd731340/go.mod h1:pJs/THBAEx9qNq3xQdrGq+wcnQRxpNSpAj4dQH1cRMk=
...@@ -227,6 +232,8 @@ golang.org/x/net v0.0.0-20190607181551-461777fb6f67 h1:rJJxsykSlULwd2P2+pg/rtnwN ...@@ -227,6 +232,8 @@ golang.org/x/net v0.0.0-20190607181551-461777fb6f67 h1:rJJxsykSlULwd2P2+pg/rtnwN
golang.org/x/net v0.0.0-20190607181551-461777fb6f67/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190607181551-461777fb6f67/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190611141213-3f473d35a33a h1:+KkCgOMgnKSgenxTBoiwkMqTiouMIy/3o8RLdmSbGoY= golang.org/x/net v0.0.0-20190611141213-3f473d35a33a h1:+KkCgOMgnKSgenxTBoiwkMqTiouMIy/3o8RLdmSbGoY=
golang.org/x/net v0.0.0-20190611141213-3f473d35a33a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190611141213-3f473d35a33a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980 h1:dfGZHvZk057jK2MCeWus/TowKpJ8y4AmooUzdBSR9GU=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421 h1:Wo7BWFiOk0QRFMLYMqJGFMd9CgUAcGx7V+qEg/h5IBI= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421 h1:Wo7BWFiOk0QRFMLYMqJGFMd9CgUAcGx7V+qEg/h5IBI=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
...@@ -269,6 +276,10 @@ golang.org/x/sys v0.0.0-20190610081024-1e42afee0f76 h1:QSmW7Q3mFdAGjtAd0byXmFJ55 ...@@ -269,6 +276,10 @@ golang.org/x/sys v0.0.0-20190610081024-1e42afee0f76 h1:QSmW7Q3mFdAGjtAd0byXmFJ55
golang.org/x/sys v0.0.0-20190610081024-1e42afee0f76/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190610081024-1e42afee0f76/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190613101156-ab3f67ed278a h1:sPlwkA5W19gtxRApEyGyqWg4ngTrMzOJ43fOsWrgYEE= golang.org/x/sys v0.0.0-20190613101156-ab3f67ed278a h1:sPlwkA5W19gtxRApEyGyqWg4ngTrMzOJ43fOsWrgYEE=
golang.org/x/sys v0.0.0-20190613101156-ab3f67ed278a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190613101156-ab3f67ed278a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190614084037-d442b75600c5 h1:tQrtnaPeNyfkuD2UMixVD6lAa7WngkIFvtWcdzNeq80=
golang.org/x/sys v0.0.0-20190614084037-d442b75600c5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190614160838-b47fdc937951 h1:ZUgGZ7PSkne6oY+VgAvayrB16owfm9/DKAtgWubzgzU=
golang.org/x/sys v0.0.0-20190614160838-b47fdc937951/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
...@@ -295,6 +306,9 @@ golang.org/x/tools v0.0.0-20190610190622-bca362e842d4 h1:mlrypYegmpBUm3gXAR2XkF+ ...@@ -295,6 +306,9 @@ golang.org/x/tools v0.0.0-20190610190622-bca362e842d4 h1:mlrypYegmpBUm3gXAR2XkF+
golang.org/x/tools v0.0.0-20190610190622-bca362e842d4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190610190622-bca362e842d4/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190612232758-d4e310b4a8a5 h1:WfRBLVK37R+k1gUOKuZX8JtangyEXmuopHz5tazlZRo= golang.org/x/tools v0.0.0-20190612232758-d4e310b4a8a5 h1:WfRBLVK37R+k1gUOKuZX8JtangyEXmuopHz5tazlZRo=
golang.org/x/tools v0.0.0-20190612232758-d4e310b4a8a5/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190612232758-d4e310b4a8a5/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190614152001-1edc8e83c897/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59 h1:QjA/9ArTfVTLfEhClDCG7SGrZkZixxWpwNCDiwJfh88=
golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= google.golang.org/api v0.3.1/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk=
google.golang.org/api v0.3.2 h1:iTp+3yyl/KOtxa/d1/JUE0GGSoR6FuW5udver22iwpw= google.golang.org/api v0.3.2 h1:iTp+3yyl/KOtxa/d1/JUE0GGSoR6FuW5udver22iwpw=
google.golang.org/api v0.3.2/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk= google.golang.org/api v0.3.2/go.mod h1:6wY9I6uQWHQ8EM57III9mq/AjF+i8G65rmVagqKMtkk=
...@@ -324,6 +338,7 @@ google.golang.org/genproto v0.0.0-20190605220351-eb0b1bdb6ae6/go.mod h1:z3L6/3dT ...@@ -324,6 +338,7 @@ google.golang.org/genproto v0.0.0-20190605220351-eb0b1bdb6ae6/go.mod h1:z3L6/3dT
google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
google.golang.org/grpc v1.19.0 h1:cfg4PD8YEdSFnm7qLV4++93WcmhH2nIUhMjhdCvl3j8= google.golang.org/grpc v1.19.0 h1:cfg4PD8YEdSFnm7qLV4++93WcmhH2nIUhMjhdCvl3j8=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1 h1:Hz2g2wirWK7H0qIIhGIqRGTuMwTE8HEKFnDZZ7lm9NU=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.1 h1:j6XxA85m/6txkUCHvzlV5f+HBNl/1r5cZ2A/3IEFOO8= google.golang.org/grpc v1.21.1 h1:j6XxA85m/6txkUCHvzlV5f+HBNl/1r5cZ2A/3IEFOO8=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
......
...@@ -32,7 +32,7 @@ type Subtitle struct { ...@@ -32,7 +32,7 @@ type Subtitle struct {
// Annotations turns the subtitles into annotations with the src field set as given // Annotations turns the subtitles into annotations with the src field set as given
// Method of the annotation.Caption IF // Method of the annotation.Caption IF
func (s *Subtitle) Annotations(src string, args ...int) []*annotation.Annotation { func (s *Subtitle) Annotations(src string) []*annotation.Annotation {
return annotation.SubToAnnotation(s.Subtitles, src) return annotation.SubToAnnotation(s.Subtitles, src)
} }
...@@ -408,7 +408,8 @@ func loadYouTubeAnnotationsVideo(videoID string, language string, collection *mo ...@@ -408,7 +408,8 @@ func loadYouTubeAnnotationsVideo(videoID string, language string, collection *mo
if err != nil { if err != nil {
return err return err
} }
} else {
return err
} }
} }
...@@ -475,6 +476,15 @@ type Segment struct { ...@@ -475,6 +476,15 @@ type Segment struct {
Value string `xml:",chardata"` Value string `xml:",chardata"`
} }
// SegmentsText returns the space delimited concatenation of all segment values
func (p *Paragraph) SegmentsText() string {
var texts []string
for _, segment := range p.Segments {
texts = append(texts, strings.TrimSpace(segment.Value))
}
return util.WordsToString(texts)
}
// parseSRV decodes a youtubedl subtitle of type .srv3 into a TimedStruct // parseSRV decodes a youtubedl subtitle of type .srv3 into a TimedStruct
func parseSRV3(file []byte) (*TimedText, error) { func parseSRV3(file []byte) (*TimedText, error) {
var timedText TimedText var timedText TimedText
...@@ -491,35 +501,26 @@ func parseSRV3(file []byte) (*TimedText, error) { ...@@ -491,35 +501,26 @@ func parseSRV3(file []byte) (*TimedText, error) {
// Args default values // Args default values
// 1. minWindowLength := 1 // 1. minWindowLength := 1
// 2. maxWindowLength := 5 // 2. maxWindowLength := 5
func (s *TimedText) Annotations(src string, args ...int) []*annotation.Annotation { func (s *TimedText) Annotations(src string) []*annotation.Annotation {
// Default values
minWindowLength := 1
maxWindowLength := 5
for i, val := range args {
if i == 0 {
minWindowLength = val
}
if i == 1 {
maxWindowLength = val
}
}
return fromTimedText(s, minWindowLength, maxWindowLength, src) return fromTimedText(s, src)
} }
// fromTimedText applies the window slide approach to create annotations from TimedText // fromTimedText creates one Annotation for each Paragraph
// window slide only applied within the paragraph // TODO find smart way which segments to combine into one annotation (e.g. when t_pause > x || len(segments) > 10)
// TODO: apply sliding window also across paragraphs. Because paragraph boundary != sentence boundaries func fromTimedText(subtitle *TimedText, src string) []*annotation.Annotation {
func fromTimedText(subtitle *TimedText, minWindowLength int, maxWindowLength int, src string) []*annotation.Annotation {
var windowAnnotations []*annotation.Annotation var annos []*annotation.Annotation
count := 0
for paragraphNum, paragraph := range subtitle.Body.Paragraphs { for paragraphNum, paragraph := range subtitle.Body.Paragraphs {
// Handle manual captions which have no segments (no word granularity timing) // if non-empty paragraph
if len(paragraph.Segments) == 0 && strings.Join(strings.Fields(paragraph.Value), " ") != "" { if len(paragraph.Segments) > 0 || strings.Join(strings.Fields(paragraph.Value), " ") != "" {
count++
// Create annotation for the paragraph
a := annotation.Annotation{Src: src, Subtitle: annotation.Subtitle{Count: count}}
// handle when current paragraph subtitle stays displayed even when next one appears // handle when current paragraph subtitle stays displayed even when next one appears
var endTime int var endTime int
...@@ -534,76 +535,48 @@ func fromTimedText(subtitle *TimedText, minWindowLength int, maxWindowLength int ...@@ -534,76 +535,48 @@ func fromTimedText(subtitle *TimedText, minWindowLength int, maxWindowLength int
endTime = paragraph.Time + paragraph.Duration endTime = paragraph.Time + paragraph.Duration
} }
// Create annotation for the paragraph a.Subtitle.Start = paragraph.Time
a := &annotation.Annotation{ a.Subtitle.End = endTime
Src: src,
Subtitle: annotation.Subtitle{
Count: paragraphNum,
Text: paragraph.Value,
Start: paragraph.Time,
End: endTime,
},
}
windowAnnotations = append(windowAnnotations, a) // Set segments of annotation
} // Handle automatic capti0xc00046af50,ons with segments (word granularity timing)
var segments []annotation.TimedText
// Handle automatic captions with segments (word granularity timing) for segmentIdx, segment := range paragraph.Segments {
for startSegmentIdx := range paragraph.Segments {
for wLength := minWindowLength; wLength <= maxWindowLength; wLength++ {
// Detect if sliding window end exceeds segment end
if startSegmentIdx+wLength > len(paragraph.Segments) {
break
}
// Collect the values (text) of the current window
var windowSegmentsValue []string
for i := startSegmentIdx; i < startSegmentIdx+wLength; i++ {
windowSegmentsValue = append(windowSegmentsValue, paragraph.Segments[i].Value)
}
// Compute the start time of the current window // Compute the start time of the current window
startSegment := paragraph.Segments[startSegmentIdx] startTime := paragraph.Time + segment.Time
startTime := paragraph.Time + startSegment.Time
var endTime int var endTime int
// if current segment not last one // if current segment not last one
if startSegmentIdx+wLength < len(paragraph.Segments) { if segmentIdx < len(paragraph.Segments)-1 {
nextSegment := paragraph.Segments[startSegmentIdx+wLength] endTime = paragraph.Time + paragraph.Segments[segmentIdx+1].Time
endTime = paragraph.Time + nextSegment.Time
} else { } else {
if paragraphNum < len(subtitle.Body.Paragraphs)-1 { endTime = a.Subtitle.End
// handle when current segment/paragraph subtitle stays displayed even when next one appears
if paragraph.Time+paragraph.Duration > subtitle.Body.Paragraphs[paragraphNum+1].Time {
endTime = subtitle.Body.Paragraphs[paragraphNum+1].Time
} else {
endTime = paragraph.Time + paragraph.Duration
}
} else {
endTime = paragraph.Time + paragraph.Duration
}
} }
// Create annotation of the current window s := annotation.TimedText{
a := &annotation.Annotation{ Text: strings.TrimSpace(segment.Value),
Src: src, Start: startTime,
Subtitle: annotation.Subtitle{ End: endTime,
Count: paragraphNum,
Text: util.WordsToString(windowSegmentsValue),
Start: startTime,
End: endTime,
},
} }
windowAnnotations = append(windowAnnotations, a) segments = append(segments, s)
}
// set paragraph value if non-empty and no segments present and
if len(paragraph.Segments) == 0 {
a.Subtitle.Text = paragraph.Value
} else {
a.Subtitle.Text = paragraph.SegmentsText()
} }
a.Subtitle.Segments = segments
annos = append(annos, &a)
} }
} }
return windowAnnotations return annos
} }
func parseSub(file string, format SubtitleFormat) (annotation.Caption, error) { func parseSub(file string, format SubtitleFormat) (annotation.Caption, error) {
......
...@@ -9,7 +9,6 @@ import ( ...@@ -9,7 +9,6 @@ import (
"os" "os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"strings"
"testing" "testing"
) )
...@@ -174,15 +173,25 @@ func TestTimedTextToAnnotationAutomaticCaption(t *testing.T) { ...@@ -174,15 +173,25 @@ func TestTimedTextToAnnotationAutomaticCaption(t *testing.T) {
timedText, err := parseSRV3(byteValue) timedText, err := parseSRV3(byteValue)
assert.Equal(t, nil, err) assert.Equal(t, nil, err)
a := timedText.Annotations("youtube://VIlLpnJJl_4", 1, 3) a := timedText.Annotations("youtube://VIlLpnJJl_4")
aExpect := annotation.Annotation{ aExpect := annotation.Annotation{
Src: "youtube://VIlLpnJJl_4", Src: "youtube://VIlLpnJJl_4",
Subtitle: annotation.Subtitle{ Subtitle: annotation.Subtitle{
Count: 0, Count: 1,
Text: "you're", Text: "you're back in my day we use differently",
Start: 170, Start: 170,
End: 1429, End: 4539,
Segments: []annotation.TimedText{
{Start: 170, End: 1429, Text: "you're"},
{Start: 1429, End: 2429, Text: "back"},
{Start: 2429, End: 2460, Text: "in"},
{Start: 2460, End: 2909, Text: "my"},
{Start: 2909, End: 2970, Text: "day"},
{Start: 2970, End: 3510, Text: "we"},
{Start: 3510, End: 3720, Text: "use"},
{Start: 3720, End: 4539, Text: "differently"},
},
}, },
} }
assert.Equal(t, aExpect, *a[0]) assert.Equal(t, aExpect, *a[0])
...@@ -190,39 +199,24 @@ func TestTimedTextToAnnotationAutomaticCaption(t *testing.T) { ...@@ -190,39 +199,24 @@ func TestTimedTextToAnnotationAutomaticCaption(t *testing.T) {
aExpect = annotation.Annotation{ aExpect = annotation.Annotation{
Src: "youtube://VIlLpnJJl_4", Src: "youtube://VIlLpnJJl_4",
Subtitle: annotation.Subtitle{ Subtitle: annotation.Subtitle{
Count: 0, Count: 16,
Text: "we", Text: "hundreds of thousands and millions of",
Start: 2970, Start: 59760,
End: 3510, End: 60889,
Segments: []annotation.TimedText{
{Start: 59760, End: 60090, Text: "hundreds"},
{Start: 60090, End: 60149, Text: "of"},
{Start: 60149, End: 60510, Text: "thousands"},
{Start: 60510, End: 60629, Text: "and"},
{Start: 60629, End: 60690, Text: "millions"},
{Start: 60690, End: 60889, Text: "of"},
},
}, },
} }
assert.Equal(t, aExpect, *a[15]) assert.Equal(t, aExpect, *a[15])
aExpect = annotation.Annotation{
Src: "youtube://VIlLpnJJl_4",
Subtitle: annotation.Subtitle{
Count: 2,
Text: "12 hours a",
Start: 6120,
End: 6930,
},
}
assert.Equal(t, aExpect, *a[32])
// test endTime of last segment of paragraph
aExpect = annotation.Annotation{
Src: "youtube://VIlLpnJJl_4",
Subtitle: annotation.Subtitle{
Count: 2,
Text: "just to",
Start: 7140,
End: 7970,
},
}
assert.Equal(t, aExpect, *a[43])
} }
func TestNumberOfProducedAnnotations(t *testing.T) { func TestTimedTextToAnnotations(t *testing.T) {
snipped := ` snipped := `
<?xml version="1.0" encoding="utf-8" ?> <?xml version="1.0" encoding="utf-8" ?>
...@@ -247,6 +241,23 @@ func TestNumberOfProducedAnnotations(t *testing.T) { ...@@ -247,6 +241,23 @@ func TestNumberOfProducedAnnotations(t *testing.T) {
<s t="3340" ac="252">use</s> <s t="3340" ac="252">use</s>
<s p="2" t="3550" ac="0">differently</s> <s p="2" t="3550" ac="0">differently</s>
</p> </p>
<p t="4549" d="5681" w="1">
<s p="2" ac="144">stream</s>
<s p="2" t="1000" ac="94">snape</s>
<s p="1" t="1151" ac="200">for</s>
<s t="1571" ac="226">12</s>
<s t="1871" ac="252">hours</s>
<s p="2" t="2111" ac="161">a</s>
<s t="2381" ac="252">day</s>
<s t="2591" ac="252">just</s>
<s p="2" t="2830" ac="187">to</s>
</p>
<p t="12794" d="3556">Philosophers, dramatists, theologians</p>
<p t="16374" d="2206">have grappled with this question for centuries:</p>
<p t="18604" d="1532">what makes people go wrong?</p>
<p t="20160" d="2976">Interestingly, I asked this question when I was a little kid.</p>
<p t="75530" d="1450"> (Laughter) </p>
<p t="77250" d="4140"> Or maybe you were thinking &quot;Where do you get her confidence?&quot; </p>
</body> </body>
</timedtext> </timedtext>
` `
...@@ -255,29 +266,37 @@ func TestNumberOfProducedAnnotations(t *testing.T) { ...@@ -255,29 +266,37 @@ func TestNumberOfProducedAnnotations(t *testing.T) {
timedText, err := parseSRV3([]byte(snipped)) timedText, err := parseSRV3([]byte(snipped))
assert.Equal(t, nil, err) assert.Equal(t, nil, err)
for minWindowLength := 3; minWindowLength <= 8; minWindowLength++ { // Test for the right number of annotations
for maxWindowLength := minWindowLength; maxWindowLength <= 8; maxWindowLength++ { annotations := fromTimedText(timedText, "")
var numPermutations int assert.Equal(t, 8, len(annotations))
assert.Equal(t, 8, len(annotations[0].Subtitle.Segments))
assert.Equal(t, 9, len(annotations[1].Subtitle.Segments))
assert.Equal(t, 0, len(annotations[2].Subtitle.Segments))
assert.Equal(t, 0, len(annotations[3].Subtitle.Segments))
// Calculate sum of sliding windows ( sum_i=1-to-window-length(NumSegments-i+1) // Test if segment text matches
for i := minWindowLength; i <= maxWindowLength; i++ { for i, a := range annotations {
numPermutations += len(timedText.Body.Paragraphs[0].Segments) - i + 1
}
// Test for the right number of annotations // TODO test also text of paragraphs with value but zero segments
annotations := fromTimedText(timedText, minWindowLength, maxWindowLength, "") for segmentIdx, segment := range a.Subtitle.Segments {
assert.Equal(t, numPermutations, len(annotations)) assert.Equal(t, timedText.Body.Paragraphs[i].Segments[segmentIdx].Value, segment.Text)
}
// Test that all annotations are in the range of the given window length range
for _, a := range annotations {
assert.Equal(t, true, len(strings.Fields(a.Subtitle.Text)) >= minWindowLength)
assert.Equal(t, true, len(strings.Fields(a.Subtitle.Text)) <= maxWindowLength)
}
if len(a.Subtitle.Segments) == 0 {
assert.Equal(t, timedText.Body.Paragraphs[i].Value, a.Subtitle.Text)
} }
} }
aExpect := annotation.Annotation{
Src: "",
Subtitle: annotation.Subtitle{
Count: 8,
Text: " Or maybe you were thinking \"Where do you get her confidence?\" ",
Start: 77250,
End: 81390,
},
}
assert.Equal(t, aExpect, *annotations[7])
} }
func TestSRV3parseManualCaption(t *testing.T) { func TestSRV3parseManualCaption(t *testing.T) {
...@@ -301,50 +320,3 @@ func TestSRV3parseManualCaption(t *testing.T) { ...@@ -301,50 +320,3 @@ func TestSRV3parseManualCaption(t *testing.T) {
assert.Equal(t, 1532, timedText.Body.Paragraphs[2].Duration) assert.Equal(t, 1532, timedText.Body.Paragraphs[2].Duration)
assert.Equal(t, "what makes people go wrong?", timedText.Body.Paragraphs[2].Value) assert.Equal(t, "what makes people go wrong?", timedText.Body.Paragraphs[2].Value)
} }
func TestTimedTextToAnnotationManualCaption(t *testing.T) {
xmlFile, err := os.Open("testData/subManual-OsFEV35tWsg.en.srv3")
assert.Equal(t, nil, err)
// defer the closing of our xmlFile so that we can parse it later on
defer xmlFile.Close()
// read our opened xmlFile as a byte array.
byteValue, err := ioutil.ReadAll(xmlFile)
assert.Equal(t, nil, err)
timedText, err := parseSRV3(byteValue)
assert.Equal(t, nil, err)
a := timedText.Annotations("youtube://OsFEV35tWsg")
aExpect := annotation.Annotation{
Src: "youtube://OsFEV35tWsg",
Subtitle: annotation.Subtitle{
Count: 0,
Text: "Philosophers, dramatists, theologians",
Start: 12794,
End: 16350,
},
}
assert.Equal(t, aExpect, *a[0])
aExpect = annotation.Annotation{
Src: "youtube://OsFEV35tWsg",
Subtitle: annotation.Subtitle{
Count: 15,
Text: "with them on the good side,\nthe others on the bad side --",
Start: 56652,
End: 59325,
},
}
assert.Equal(t, aExpect, *a[15])
aExpect = annotation.Annotation{
Src: "youtube://OsFEV35tWsg",
Subtitle: annotation.Subtitle{
Count: 32,
Text: "And apparently, he disobeyed God,",
Start: 104661,
End: 107866,
},
}
assert.Equal(t, aExpect, *a[32])
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment