To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit fdbf50fb authored by Christof Gerber's avatar Christof Gerber
Browse files

Add GetSubtile and GetVideoURL methods

parent a3f16821
module gitlab.ethz.ch/chgerber/youtube-dl
module gitlab.ethz.ch/chgerber/youtubedl
go 1.12
replace gitlab.ethz.ch/chgerber/annotation => ../annotation
require (
github.com/asticode/go-astiamqp v1.0.0 // indirect
github.com/asticode/go-astisub v0.0.0-20181231080834-e2ca1c7ce8f4
github.com/asticode/go-astits v0.0.0-20190105100228-f4a041fc41e5 // indirect
github.com/pkg/errors v0.8.1 // indirect
github.com/sirupsen/logrus v1.4.1
github.com/stretchr/testify v1.3.0
golang.org/x/text v0.3.0 // indirect
gitlab.ethz.ch/chgerber/annotation v1.0.0
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190405175925-bdc7d43004ec // indirect
)
......@@ -11,10 +11,18 @@ github.com/asticode/go-astits v0.0.0-20190105100228-f4a041fc41e5/go.mod h1:IO8ru
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/mackerelio/go-osstat v0.0.0-20190208070000-fcbbc2a9dcc2 h1:Q20K8NloaHNlJuzViBjV9yxFb0kQck/FE16s+ftsegg=
github.com/mackerelio/go-osstat v0.0.0-20190208070000-fcbbc2a9dcc2/go.mod h1:sRByAXz76nwXkhnEDpyxB17EbiP+qYBT+oA/CdhC5fQ=
github.com/mattn/go-colorable v0.0.9 h1:UVL0vNpWh04HeJXV0KLcaT7r06gOH2l4OW6ddYRUIY4=
github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
github.com/mattn/go-isatty v0.0.4 h1:bnP0vzxcAdeI1zdubAl5PjU6zsERjGZb7raWodagDYs=
......@@ -26,17 +34,36 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/sirupsen/logrus v1.2.0 h1:juTguoYk5qI21pwyTXY3B3Y5cOTH3ZUyZCg1v/mihuo=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.1 h1:GL2rEmy6nsikmW0r8opw9JIRScdMF5hA8cOYLH7In1k=
github.com/sirupsen/logrus v1.4.1/go.mod h1:ni0Sbl8bgC9z8RoU9G6nDWqqs/fq4eDPysMBDgk/93Q=
github.com/streadway/amqp v0.0.0-20181205114330-a314942b2fd9/go.mod h1:1WNBiOZtZQLpVAyu0iTduoJL9hEsMloAK5XWrtW0xdY=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/tidwall/pretty v0.0.0-20190325153808-1166b9ac2b65 h1:rQ229MBgvW68s1/g6f1/63TgYwYxfF4E+bi/KC19P8g=
github.com/tidwall/pretty v0.0.0-20190325153808-1166b9ac2b65/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c h1:u40Z8hqBAAQyv+vATcGgV0YCnDjqSL7/q/JyPhhJSPk=
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I=
github.com/xdg/stringprep v1.0.0 h1:d9X0esnoa3dFsV0FG35rAT0RIhYFlPq7MiP+DW89La0=
github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y=
gitlab.ethz.ch/chgerber/MessageComposition v1.0.1 h1:76h9jVxhCcP4Snwft1YbJXSNy1/3oF4c3iMKI3ZUYLw=
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190405175925-bdc7d43004ec h1:efwGm29k9Xya1HcccUwufHapZ6FDwLR0e9UVdUBAcTA=
gitlab.ethz.ch/chgerber/monitor v0.0.0-20190405175925-bdc7d43004ec/go.mod h1:HqYam5C8lDlOiYTcNgbvi11jILmk1puFjAvVoehT36o=
go.mongodb.org/mongo-driver v1.0.0 h1:KxPRDyfB2xXnDE2My8acoOWBQkfv3tz0SaWTRZjJR0c=
go.mongodb.org/mongo-driver v1.0.0/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9 h1:mKdxBk7AujPs8kU4m80U72y/zjbZ3UcXC7dClwKbUI0=
golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5 h1:bselrhR0Or1vomJZC8ZIjWtbDmn9OYFLX5Ik9alpJpE=
golang.org/x/crypto v0.0.0-20190404164418-38d8ce5564a5/go.mod h1:WFFai1msRO1wXaEeE5yQxYXgSfI8pQAWXbQop6sCtWE=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6 h1:bjcUS9ztw9kFmmIxJInhon/0Is3p+EHBKNgquIzo1OI=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181217223516-dcdaa6325bcb h1:zzdd4xkMwu/GRxhSUJaCPh4/jil9kAbsU7AUmXboO+A=
golang.org/x/sys v0.0.0-20181217223516-dcdaa6325bcb/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e h1:nFYrTHrdrAOpShe27kaFHjsqYSEQ0KWqdWLu3xuZJts=
golang.org/x/sys v0.0.0-20190403152447-81d4e9dc473e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
package youtube_dl
package youtubedl
import (
"encoding/json"
"log"
"fmt"
"github.com/asticode/go-astisub"
log "github.com/sirupsen/logrus"
"net/url"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
)
type SubtitleFormat string
func (sf SubtitleFormat) String() string {
return string(sf)
}
const (
SubVTT SubtitleFormat = "vtt"
SubSRT SubtitleFormat = "srt"
SubSSA SubtitleFormat = "ssa"
SubSTL SubtitleFormat = "stl"
SubTTML SubtitleFormat = "ttml"
)
var SupportedSubs = map[SubtitleFormat]bool{
SubVTT: true,
SubSRT: true,
SubSSA: true,
SubSTL: true,
SubTTML: true,
}
type ErrYoutubeDL string
func (e ErrYoutubeDL) Error() string {
return string(e)
}
func runYoutubeDL(args []string) (output []byte, err error) {
cmd := exec.Command("youtube-dl", args...)
output, err = cmd.Output()
output, err = cmd.CombinedOutput()
if err != nil {
return nil, err
}
......@@ -38,24 +72,195 @@ func getVideoInfo(id string) (info map[string]interface{}, err error) {
return info, err
}
// SubList stores two maps[language][sub-formats]
// Automatic Captions are the automatically generated youtube captions
// Subtitles are the proper Subtitles
type SubList struct {
AutomaticCaptions map[string][]string
Subtitles map[string][]string
}
func listSubs(id string) (subs string, err error) {
func NewSubList() *SubList {
return &SubList{
AutomaticCaptions: make(map[string][]string),
Subtitles: make(map[string][]string),
}
}
func listSubs(id string) (subs *SubList, err error) {
subs = NewSubList()
args := []string{
"--skip-download",
"--list-subs",
"https://youtu.be/" + id,
}
output, err := runYoutubeDL(args)
if err != nil {
return nil, err
}
lines := strings.Split(string(output), "\n")
section := ""
for _, line := range lines {
log.Print(line)
if strings.Contains(strings.ToLower(line), "youtube") {
continue
}
if strings.Contains(strings.ToLower(line), "available automatic captions") {
section = "automatic captions"
continue
}
if strings.Contains(strings.ToLower(line), "available subtitles") {
section = "available subtitles"
continue
}
if strings.Contains(strings.ToLower(line), "language formats") {
continue
}
if section != "" {
space := regexp.MustCompile(`\s+`)
line := space.ReplaceAllString(line, " ")
line = strings.ReplaceAll(line, ",", "")
fields := strings.Split(string(line), " ")
if len(fields) >= 2 {
if section == "automatic captions" {
subs.AutomaticCaptions[fields[0]] = fields[1:]
}
if section == "available subtitles" {
subs.Subtitles[fields[0]] = fields[1:]
}
}
}
}
return "", nil
return subs, nil
}
// findSub return nil when subtitle not available in any format
func findSub(id string, language string) (formats []string, err error) {
subList, err := listSubs(id)
if err != nil {
return nil, err
}
if formats, ok := subList.Subtitles[language]; ok {
return formats, nil
}
return nil, nil
}
// findAutomaticCaption return nil when subtitle not available in any format
func findAutomaticCaption(id string, language string) (formats []string, err error) {
subList, err := listSubs(id)
if err != nil {
return nil, err
}
if formats, ok := subList.AutomaticCaptions[language]; ok {
return formats, nil
}
return nil, nil
}
// downloadSubtitle downloads the subtitle of the given youtube video in the given language and format and saves it as file to $file.$language.$format.
func downloadSubtitle(auto bool, id string, language string, format string, file string) error {
subType := "--write-sub"
if auto {
subType = "--write-auto-sub"
}
args := []string{
"--skip-download",
subType,
"--sub-lang",
language,
"--sub-format",
format,
"https://youtu.be/" + id,
"-o",
file,
}
output, err := runYoutubeDL(args)
if err != nil {
return err
}
if strings.Contains(strings.ToLower(string(output)), "subtitles not available") {
return ErrYoutubeDL(fmt.Sprintf("subtitles language %s not available", language))
}
if strings.Contains(strings.ToLower(string(output)), "no subtitle format found") {
return ErrYoutubeDL(fmt.Sprintf("subtitle format %s not found", format))
//TODO delete the alternative subtitle file that it downloads automatically
}
return nil
}
// GetSubtitle downloads the subtitle of the video id in the passed language.
// Throws an error if subtitle not found.
// Downlaods the automaic caption when auto is true.
func GetSubtitle(auto bool, id string, language string) (sub *astisub.Subtitles, err error) {
formats, err := findSub(id, language)
if err != nil {
return nil, err
}
for _, format := range formats {
if _, ok := SupportedSubs[SubtitleFormat(format)]; !ok {
log.Infof("Subtitle %s is not supported", format)
continue
}
filename := filepath.Join("/tmp", "sub-"+id)
file := filename + "." + language + "." + SubVTT.String()
err := downloadSubtitle(auto, id, language, SubVTT.String(), filename)
defer os.Remove(file)
if err != nil {
log.Warn(err)
continue
}
sub, err := astisub.OpenFile(file)
if err != nil {
log.Warn(err)
continue
}
return sub, nil
}
return nil, ErrYoutubeDL("No available subtitle is supported")
}
// GetVideoURL retrieves the videoplayback url of the video and audio stream
func GetVideoURL(id string) (video *url.URL, audio *url.URL, err error) {
args := []string{
"-g",
"https://www.youtube.com/watch?v=" + id,
}
// run command
//log.Println(args)
cmd := exec.Command("youtube-dl", args...)
out, err := cmd.CombinedOutput()
if err != nil {
return nil, nil, err
}
links := strings.Split(strings.Trim(string(out), "\n\t "), "\n")
if len(links) != 2 {
return nil, nil, ErrYoutubeDL(fmt.Sprintf("Expected two links but got %v", len(links)))
}
video, err = url.Parse(links[0])
if err != nil {
return nil, nil, err
}
audio, err = url.Parse(links[1])
if err != nil {
return nil, nil, err
}
return video, audio, nil
}
package youtube_dl
package youtubedl
import (
"fmt"
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"os"
"path/filepath"
"testing"
)
func TestGetYouTubeDownloadLink(t *testing.T) {
video, audio, err := GetVideoURL("OsFEV35tWsg")
assert.Equal(t, nil, err)
assert.Equal(t, "/videoplayback", video.Path)
assert.Equal(t, "/videoplayback", audio.Path)
}
func TestVideoIDToSub(t *testing.T) {
info, err := getVideoInfo("OsFEV35tWsg")
assert.Equal(t, nil, err)
fmt.Print(info)
if tags, ok := info["tags"].([]interface{}); ok {
if tag, ok := tags[0].(string); ok {
assert.Equal(t, "TED", tag)
} else {
t.Errorf("got data of type %T but wanted string", tag)
}
} else {
t.Errorf("got data of type %T but wanted []interface{}", info["tags"])
}
}
func TestListSubs(t *testing.T) {
subs, err := listSubs("OsFEV35tWsg")
assert.Equal(t, nil, err)
fmt.Print(subs)
assert.Equal(t, "vtt", subs.AutomaticCaptions["en"][0])
assert.Equal(t, "ttml", subs.AutomaticCaptions["en"][1])
assert.Equal(t, "vtt", subs.Subtitles["en"][0])
assert.Equal(t, "ttml", subs.Subtitles["en"][1])
assert.Equal(t, 35, len(subs.Subtitles))
assert.Equal(t, 104, len(subs.AutomaticCaptions))
}
func TestFindSub(t *testing.T) {
formats, err := findSub("OsFEV35tWsg", "en")
assert.Equal(t, nil, err)
assert.Equal(t, 2, len(formats))
assert.Equal(t, "vtt", formats[0])
assert.Equal(t, "ttml", formats[1])
}
func TestSubDownload(t *testing.T) {
id := "OsFEV35tWsg"
language := "en"
format := "vtt"
filename := filepath.Join("/tmp", "sub-"+id)
err := downloadSubtitle(false, id, language, format, filename)
assert.Equal(t, nil, err)
fileWithSuffix := filename + "." + language + "." + format
_, err = os.Stat(fileWithSuffix)
assert.Equal(t, nil, err)
assert.Equal(t, false, os.IsNotExist(err))
assert.Equal(t, nil, os.Remove(fileWithSuffix))
}
func TestSubDownloadLanguageNotSupported(t *testing.T) {
id := "OsFEV35tWsg"
language := "zz"
format := "vtt"
filename := filepath.Join("/tmp", "sub-"+id)
err := downloadSubtitle(false, id, language, format, filename)
assert.Equal(t, fmt.Sprintf("subtitles language %s not available", language), err.Error())
}
func TestSubDownloadFormatNotSupported(t *testing.T) {
id := "OsFEV35tWsg"
language := "en"
format := "srt"
filename := filepath.Join("/tmp", "sub-"+id)
err := downloadSubtitle(false, id, language, format, filename)
assert.Equal(t, fmt.Sprintf("subtitle format %s not found", format), err.Error())
}
func TestAutomaticCaptionDownload(t *testing.T) {
id := "OsFEV35tWsg"
language := "en"
format := "vtt"
filename := filepath.Join("/tmp", "automaticCaption-"+id)
err := downloadSubtitle(true, id, language, format, filename)
assert.Equal(t, nil, err)
fileWithSuffix := filename + "." + language + "." + format
_, err = os.Stat(fileWithSuffix)
assert.Equal(t, nil, err)
assert.Equal(t, false, os.IsNotExist(err))
assert.Equal(t, nil, os.Remove(fileWithSuffix))
}
func TestGetSubtitle(t *testing.T) {
log.SetFormatter(&log.JSONFormatter{})
log.SetLevel(log.InfoLevel)
sub, err := GetSubtitle(false, "OsFEV35tWsg", "en")
assert.Equal(t, nil, err)
assert.Equal(t, "Philosophers, dramatists, theologians", sub.Items[0].String())
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment