Commit db267e3e authored by Christof Gerber's avatar Christof Gerber

Start implementing GetSubtitle with YouTube Data API instead of youtube-dl....

Start implementing GetSubtitle with YouTube Data API instead of youtube-dl. Unsolved Problem: CaptionDownload requires OAuth 2.
parent 849caaa9
......@@ -2,19 +2,14 @@ package youtubedl
import (
"context"
"encoding/json"
"fmt"
"github.com/asticode/go-astisub"
log "github.com/sirupsen/logrus"
"google.golang.org/api/googleapi/transport"
"google.golang.org/api/option"
"google.golang.org/api/youtube/v3"
"net/http"
"net/url"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
)
......@@ -52,158 +47,56 @@ func (e ErrYoutubeDL) Error() string {
return string(e)
}
func runYoutubeDL(args []string) (output []byte, err error) {
cmd := exec.Command("youtube-dl", args...)
output, err = cmd.CombinedOutput()
if err != nil {
return nil, err
}
return output, nil
}
func getVideoInfo(id string) (info map[string]interface{}, err error) {
args := []string{
"--skip-download",
"-J",
"https://youtu.be/" + id,
}
output, err := runYoutubeDL(args)
if err != nil {
return info, err
func findSubtitle(auto bool, videoID string, language string, apiKey string) (subID string, err error) {
client := &http.Client{
Transport: &transport.APIKey{Key: apiKey},
}
err = json.Unmarshal(output, &info)
service, err := youtube.NewService(context.Background(), option.WithHTTPClient(client))
if err != nil {
return info, err
}
return info, err
}
// SubList stores two maps[language][sub-formats]
// Automatic Captions are the automatically generated youtube captions
// Subtitles are the proper Subtitles
type SubList struct {
AutomaticCaptions map[string][]string
Subtitles map[string][]string
}
func NewSubList() *SubList {
return &SubList{
AutomaticCaptions: make(map[string][]string),
Subtitles: make(map[string][]string),
}
}
func listSubs(id string) (subs *SubList, err error) {
subs = NewSubList()
args := []string{
"--skip-download",
"--list-subs",
"https://youtu.be/" + id,
return "", err
}
output, err := runYoutubeDL(args)
// Check for available captions
call := service.Captions.List("id,snippet", videoID)
response, err := call.Do()
if err != nil {
return nil, err
return "", err
}
lines := strings.Split(string(output), "\n")
section := ""
for _, line := range lines {
if strings.Contains(strings.ToLower(line), "youtube") {
continue
}
if strings.Contains(strings.ToLower(line), "available automatic captions") {
section = "automatic captions"
continue
}
if strings.Contains(strings.ToLower(line), "available subtitles") {
section = "available subtitles"
continue
}
if strings.Contains(strings.ToLower(line), "language formats") {
continue
}
if section != "" {
space := regexp.MustCompile(`\s+`)
line := space.ReplaceAllString(line, " ")
line = strings.ReplaceAll(line, ",", "")
fields := strings.Split(string(line), " ")
if len(fields) >= 2 {
if section == "automatic captions" {
subs.AutomaticCaptions[fields[0]] = fields[1:]
}
if section == "available subtitles" {
subs.Subtitles[fields[0]] = fields[1:]
}
for _, item := range response.Items {
if item.Snippet.Language == language {
// manual caption
if !auto && item.Snippet.TrackKind == "standard" {
return item.Id, nil
// automatic caption
} else if item.Snippet.TrackKind == "ASR" {
return item.Id, nil
}
}
}
return subs, nil
}
// findSub return nil when subtitle in the requested language is not available in any format
func findSub(id string, language string) (formats []string, err error) {
subList, err := listSubs(id)
if err != nil {
return nil, err
}
if formats, ok := subList.Subtitles[language]; ok {
return formats, nil
}
return nil, nil
}
if auto {
return "", ErrYoutubeDLLangNotSupported("No automatic caption found")
} else {
return "", ErrYoutubeDLLangNotSupported("No standard subtitle found")
// findAutomaticCaption return nil when subtitle not available in any format
func findAutomaticCaption(id string, language string) (formats []string, err error) {
subList, err := listSubs(id)
if err != nil {
return nil, err
}
if formats, ok := subList.AutomaticCaptions[language]; ok {
return formats, nil
}
return nil, nil
}
// downloadSubtitle downloads the subtitle of the given youtube video in the given language and format and saves it as file to $file.$language.$format.
func downloadSubtitle(auto bool, id string, language string, format string, file string) error {
subType := "--write-sub"
if auto {
subType = "--write-auto-sub"
}
args := []string{
"--skip-download",
subType,
"--sub-lang",
language,
"--sub-format",
format,
"https://youtu.be/" + id,
"-o",
file,
func downloadSubtitle(subID string, apiKey string) error {
client := &http.Client{
Transport: &transport.APIKey{Key: apiKey},
}
output, err := runYoutubeDL(args)
service, err := youtube.NewService(context.Background(), option.WithHTTPClient(client))
if err != nil {
return err
}
if strings.Contains(strings.ToLower(string(output)), "subtitles not available") {
return ErrYoutubeDL(fmt.Sprintf("subtitles language %s not available", language))
}
if strings.Contains(strings.ToLower(string(output)), "no subtitle format found") {
return ErrYoutubeDL(fmt.Sprintf("subtitle format %s not found", format))
//TODO delete the alternative subtitle file that it downloads automatically
// Check for available captions
call := service.Captions.Download(subID).Tfmt("srt")
err = call.Do()
if err != nil {
return err
}
return nil
......@@ -212,54 +105,20 @@ func downloadSubtitle(auto bool, id string, language string, format string, file
// GetSubtitle downloads the subtitle of the video id in the passed language.
// Throws an error if subtitle not found.
// Downlaods the automatic caption when auto is true.
func GetSubtitle(auto bool, id string, language string) (sub *astisub.Subtitles, err error) {
var formats []string
if auto == false {
formats, err = findSub(id, language)
if err != nil {
return nil, err
}
if formats == nil {
return nil, ErrYoutubeDLLangNotSupported(fmt.Sprintf("No subtitle with language %s for video %s", language, id))
}
} else {
formats, err = findAutomaticCaption(id, language)
if err != nil {
return nil, err
}
func GetSubtitle(auto bool, videoID string, language string, apiKey string) (sub *astisub.Subtitles, err error) {
if formats == nil {
return nil, ErrYoutubeDLLangNotSupported(fmt.Sprintf("No automatic caption with language %s for video %s", language, id))
}
subID, err := findSubtitle(auto, videoID, language, apiKey)
if err != nil {
return nil, err
}
for _, format := range formats {
if _, ok := SupportedSubs[SubtitleFormat(format)]; !ok {
log.Infof("Subtitle %s is not supported", format)
continue
}
filename := filepath.Join("/tmp", "sub-"+id)
file := filename + "." + language + "." + SubVTT.String()
err := downloadSubtitle(auto, id, language, SubVTT.String(), filename)
defer os.Remove(file)
if err != nil {
log.Warn(err)
continue
}
sub, err := astisub.OpenFile(file)
if err != nil {
log.Warn(err)
continue
}
return sub, nil
err = downloadSubtitle(subID, apiKey)
if err != nil {
return nil, err
}
return nil, ErrYoutubeDL("No available subtitle is supported")
return nil, nil
}
// GetVideoURL retrieves the videoplayback url of the video and audio stream
......@@ -271,9 +130,8 @@ func GetVideoURL(id string) (video *url.URL, audio *url.URL, err error) {
}
// run command
//log.Println(args)
cmd := exec.Command("youtube-dl", args...)
out, err := cmd.CombinedOutput()
out, err := cmd.Output()
if err != nil {
return nil, nil, err
}
......
package youtubedl
import (
"fmt"
log "github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"os"
"path/filepath"
"testing"
)
......@@ -25,96 +23,15 @@ func TestGetYouTubeDownloadLink(t *testing.T) {
assert.Equal(t, "/videoplayback", audio.Path)
}
func TestVideoIDToSub(t *testing.T) {
info, err := getVideoInfo("OsFEV35tWsg")
assert.Equal(t, nil, err)
if tags, ok := info["tags"].([]interface{}); ok {
if tag, ok := tags[0].(string); ok {
assert.Equal(t, "TED", tag)
} else {
t.Errorf("got data of type %T but wanted string", tag)
}
} else {
t.Errorf("got data of type %T but wanted []interface{}", info["tags"])
}
}
func TestListSubs(t *testing.T) {
subs, err := listSubs("OsFEV35tWsg")
assert.Equal(t, nil, err)
assert.Equal(t, "vtt", subs.AutomaticCaptions["en"][0])
assert.Equal(t, "ttml", subs.AutomaticCaptions["en"][1])
assert.Equal(t, "vtt", subs.Subtitles["en"][0])
assert.Equal(t, "ttml", subs.Subtitles["en"][1])
assert.Equal(t, true, len(subs.Subtitles) > 0)
assert.Equal(t, true, len(subs.AutomaticCaptions) > 0)
}
func TestFindSub(t *testing.T) {
formats, err := findSub("OsFEV35tWsg", "en")
assert.Equal(t, nil, err)
assert.Equal(t, true, len(formats) > 0)
assert.Equal(t, "vtt", formats[0])
assert.Equal(t, "ttml", formats[1])
}
func TestSubDownload(t *testing.T) {
id := "OsFEV35tWsg"
language := "en"
format := "vtt"
filename := filepath.Join("/tmp", "sub-"+id)
err := downloadSubtitle(false, id, language, format, filename)
subID, err := findSubtitle(false, "OsFEV35tWsg", "en", os.Getenv("YOUTUBE_DATA_API_KEY"))
assert.Equal(t, nil, err)
fileWithSuffix := filename + "." + language + "." + format
_, err = os.Stat(fileWithSuffix)
assert.Equal(t, nil, err)
assert.Equal(t, false, os.IsNotExist(err))
assert.Equal(t, nil, os.Remove(fileWithSuffix))
}
func TestSubDownloadLanguageNotSupported(t *testing.T) {
id := "OsFEV35tWsg"
language := "zz"
format := "vtt"
filename := filepath.Join("/tmp", "sub-"+id)
err := downloadSubtitle(false, id, language, format, filename)
assert.Equal(t, fmt.Sprintf("subtitles language %s not available", language), err.Error())
assert.Equal(t, "9bgPOMkqAJ81EZY6OQu64ceZYlSA3W61NBUpegcd8-0=", subID)
}
func TestSubDownloadFormatNotSupported(t *testing.T) {
id := "OsFEV35tWsg"
language := "en"
format := "srt"
filename := filepath.Join("/tmp", "sub-"+id)
err := downloadSubtitle(false, id, language, format, filename)
assert.Equal(t, fmt.Sprintf("subtitle format %s not found", format), err.Error())
}
func TestAutomaticCaptionDownload(t *testing.T) {
id := "OsFEV35tWsg"
language := "en"
format := "vtt"
filename := filepath.Join("/tmp", "automaticCaption-"+id)
err := downloadSubtitle(true, id, language, format, filename)
assert.Equal(t, nil, err)
fileWithSuffix := filename + "." + language + "." + format
_, err = os.Stat(fileWithSuffix)
assert.Equal(t, nil, err)
assert.Equal(t, false, os.IsNotExist(err))
assert.Equal(t, nil, os.Remove(fileWithSuffix))
}
func TestDownloadSub(t *testing.T) {
func TestGetSubtitle(t *testing.T) {
log.SetFormatter(&log.JSONFormatter{})
log.SetLevel(log.InfoLevel)
sub, err := GetSubtitle(false, "OsFEV35tWsg", "en")
err := downloadSubtitle("9bgPOMkqAJ81EZY6OQu64ceZYlSA3W61NBUpegcd8-0=", os.Getenv("YOUTUBE_DATA_API_KEY"))
assert.Equal(t, nil, err)
assert.Equal(t, "Philosophers, dramatists, theologians", sub.Items[0].String())
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment