yt-dlp uses mp4 format by default to merge sources with separate video and audio streams. mp4 format does not handle VP9 codec, so adding the ability to set the merge output format, we can set it to use for ex. mkv, which handles VP9 well. With the possibility of setting the sorting format we can tell yt-dlp which criteria to consider as the best format.
537 lines
18 KiB
Go
537 lines
18 KiB
Go
// Package goutubedl provides a wrapper for youtube-dl.
|
|
package goutubedl
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"path"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// Path to youtube-dl binary. Default look for "youtube-dl" in PATH.
|
|
var Path = "youtube-dl"
|
|
|
|
// Printer is something that can print
|
|
type Printer interface {
|
|
Print(v ...interface{})
|
|
}
|
|
|
|
type nopPrinter struct{}
|
|
|
|
func (nopPrinter) Print(v ...interface{}) {}
|
|
|
|
// YoutubedlError is a error from youtube-dl
|
|
type YoutubedlError string
|
|
|
|
func (e YoutubedlError) Error() string {
|
|
return string(e)
|
|
}
|
|
|
|
// ErrNotAPlaylist error when single entry when expected a playlist
|
|
var ErrNotAPlaylist = errors.New("single entry when expected a playlist")
|
|
|
|
// ErrNotASingleEntry error when playlist when expected a single entry
|
|
var ErrNotASingleEntry = errors.New("playlist when expected a single entry")
|
|
|
|
// Info youtube-dl info
|
|
type Info struct {
|
|
// Generated from youtube-dl README using:
|
|
// sed -e 's/ - `\(.*\)` (\(.*\)): \(.*\)/\1 \2 `json:"\1"` \/\/ \3/' | sed -e 's/numeric/float64/' | sed -e 's/boolean/bool/' | sed -e 's/_id/ID/' | sed -e 's/_count/Count/'| sed -e 's/_uploader/Uploader/' | sed -e 's/_key/Key/' | sed -e 's/_year/Year/' | sed -e 's/_title/Title/' | sed -e 's/_rating/Rating/' | sed -e 's/_number/Number/' | awk '{print toupper(substr($0, 0, 1)) substr($0, 2)}'
|
|
ID string `json:"id"` // Video identifier
|
|
Title string `json:"title"` // Video title
|
|
URL string `json:"url"` // Video URL
|
|
AltTitle string `json:"alt_title"` // A secondary title of the video
|
|
DisplayID string `json:"display_id"` // An alternative identifier for the video
|
|
Uploader string `json:"uploader"` // Full name of the video uploader
|
|
License string `json:"license"` // License name the video is licensed under
|
|
Creator string `json:"creator"` // The creator of the video
|
|
ReleaseDate string `json:"release_date"` // The date (YYYYMMDD) when the video was released
|
|
Timestamp float64 `json:"timestamp"` // UNIX timestamp of the moment the video became available
|
|
UploadDate string `json:"upload_date"` // Video upload date (YYYYMMDD)
|
|
UploaderID string `json:"uploader_id"` // Nickname or id of the video uploader
|
|
Channel string `json:"channel"` // Full name of the channel the video is uploaded on
|
|
ChannelID string `json:"channel_id"` // Id of the channel
|
|
Location string `json:"location"` // Physical location where the video was filmed
|
|
Duration float64 `json:"duration"` // Length of the video in seconds
|
|
ViewCount float64 `json:"view_count"` // How many users have watched the video on the platform
|
|
LikeCount float64 `json:"like_count"` // Number of positive ratings of the video
|
|
DislikeCount float64 `json:"dislike_count"` // Number of negative ratings of the video
|
|
RepostCount float64 `json:"repost_count"` // Number of reposts of the video
|
|
AverageRating float64 `json:"average_rating"` // Average rating give by users, the scale used depends on the webpage
|
|
CommentCount float64 `json:"comment_count"` // Number of comments on the video
|
|
AgeLimit float64 `json:"age_limit"` // Age restriction for the video (years)
|
|
IsLive bool `json:"is_live"` // Whether this video is a live stream or a fixed-length video
|
|
StartTime float64 `json:"start_time"` // Time in seconds where the reproduction should start, as specified in the URL
|
|
EndTime float64 `json:"end_time"` // Time in seconds where the reproduction should end, as specified in the URL
|
|
Extractor string `json:"extractor"` // Name of the extractor
|
|
ExtractorKey string `json:"extractor_key"` // Key name of the extractor
|
|
Epoch float64 `json:"epoch"` // Unix epoch when creating the file
|
|
Autonumber float64 `json:"autonumber"` // Five-digit number that will be increased with each download, starting at zero
|
|
Playlist string `json:"playlist"` // Name or id of the playlist that contains the video
|
|
PlaylistIndex float64 `json:"playlist_index"` // Index of the video in the playlist padded with leading zeros according to the total length of the playlist
|
|
PlaylistID string `json:"playlist_id"` // Playlist identifier
|
|
PlaylistTitle string `json:"playlist_title"` // Playlist title
|
|
PlaylistUploader string `json:"playlist_uploader"` // Full name of the playlist uploader
|
|
PlaylistUploaderID string `json:"playlist_uploader_id"` // Nickname or id of the playlist uploader
|
|
|
|
// Available for the video that belongs to some logical chapter or section:
|
|
Chapter string `json:"chapter"` // Name or title of the chapter the video belongs to
|
|
ChapterNumber float64 `json:"chapter_number"` // Number of the chapter the video belongs to
|
|
ChapterID string `json:"chapter_id"` // Id of the chapter the video belongs to
|
|
|
|
// Available for the video that is an episode of some series or programme:
|
|
Series string `json:"series"` // Title of the series or programme the video episode belongs to
|
|
Season string `json:"season"` // Title of the season the video episode belongs to
|
|
SeasonNumber float64 `json:"season_number"` // Number of the season the video episode belongs to
|
|
SeasonID string `json:"season_id"` // Id of the season the video episode belongs to
|
|
Episode string `json:"episode"` // Title of the video episode
|
|
EpisodeNumber float64 `json:"episode_number"` // Number of the video episode within a season
|
|
EpisodeID string `json:"episode_id"` // Id of the video episode
|
|
|
|
// Available for the media that is a track or a part of a music album:
|
|
Track string `json:"track"` // Title of the track
|
|
TrackNumber float64 `json:"track_number"` // Number of the track within an album or a disc
|
|
TrackID string `json:"track_id"` // Id of the track
|
|
Artist string `json:"artist"` // Artist(s) of the track
|
|
Genre string `json:"genre"` // Genre(s) of the track
|
|
Album string `json:"album"` // Title of the album the track belongs to
|
|
AlbumType string `json:"album_type"` // Type of the album
|
|
AlbumArtist string `json:"album_artist"` // List of all artists appeared on the album
|
|
DiscNumber float64 `json:"disc_number"` // Number of the disc or other physical medium the track belongs to
|
|
ReleaseYear float64 `json:"release_year"` // Year (YYYY) when the album was released
|
|
|
|
Type string `json:"_type"`
|
|
Direct bool `json:"direct"`
|
|
WebpageURL string `json:"webpage_url"`
|
|
Description string `json:"description"`
|
|
Thumbnail string `json:"thumbnail"`
|
|
// not unmarshalled, populated from image thumbnail file
|
|
ThumbnailBytes []byte `json:"-"`
|
|
Thumbnails []Thumbnail `json:"thumbnails"`
|
|
|
|
Formats []Format `json:"formats"`
|
|
Subtitles map[string][]Subtitle `json:"subtitles"`
|
|
|
|
// Playlist entries if _type is playlist
|
|
Entries []Info `json:"entries"`
|
|
|
|
// Info can also be a mix of Info and one Format
|
|
Format
|
|
}
|
|
|
|
type Thumbnail struct {
|
|
ID string `json:"id"`
|
|
URL string `json:"url"`
|
|
Preference int `json:"preference"`
|
|
Width int `json:"width"`
|
|
Height int `json:"height"`
|
|
Resolution string `json:"resolution"`
|
|
}
|
|
|
|
// Format youtube-dl downloadable format
|
|
type Format struct {
|
|
Ext string `json:"ext"` // Video filename extension
|
|
Format string `json:"format"` // A human-readable description of the format
|
|
FormatID string `json:"format_id"` // Format code specified by `--format`
|
|
FormatNote string `json:"format_note"` // Additional info about the format
|
|
Width float64 `json:"width"` // Width of the video
|
|
Height float64 `json:"height"` // Height of the video
|
|
Resolution string `json:"resolution"` // Textual description of width and height
|
|
TBR float64 `json:"tbr"` // Average bitrate of audio and video in KBit/s
|
|
ABR float64 `json:"abr"` // Average audio bitrate in KBit/s
|
|
ACodec string `json:"acodec"` // Name of the audio codec in use
|
|
ASR float64 `json:"asr"` // Audio sampling rate in Hertz
|
|
VBR float64 `json:"vbr"` // Average video bitrate in KBit/s
|
|
FPS float64 `json:"fps"` // Frame rate
|
|
VCodec string `json:"vcodec"` // Name of the video codec in use
|
|
Container string `json:"container"` // Name of the container format
|
|
Filesize float64 `json:"filesize"` // The number of bytes, if known in advance
|
|
FilesizeApprox float64 `json:"filesize_approx"` // An estimate for the number of bytes
|
|
Protocol string `json:"protocol"` // The protocol that will be used for the actual download
|
|
HTTPHeaders map[string]string `json:"http_headers"`
|
|
}
|
|
|
|
// Subtitle youtube-dl subtitle
|
|
type Subtitle struct {
|
|
URL string `json:"url"`
|
|
Ext string `json:"ext"`
|
|
Language string `json:"-"`
|
|
// not unmarshalled, populated from subtitle file
|
|
Bytes []byte `json:"-"`
|
|
}
|
|
|
|
func (f Format) String() string {
|
|
return fmt.Sprintf("%s:%s:%s abr:%f vbr:%f tbr:%f",
|
|
f.FormatID,
|
|
f.Protocol,
|
|
f.Ext,
|
|
f.ABR,
|
|
f.VBR,
|
|
f.TBR,
|
|
)
|
|
}
|
|
|
|
// Type of response you want
|
|
type Type int
|
|
|
|
const (
|
|
// TypeAny single or playlist (default)
|
|
TypeAny Type = iota
|
|
// TypeSingle single track, file etc
|
|
TypeSingle
|
|
// TypePlaylist playlist with multiple tracks, files etc
|
|
TypePlaylist
|
|
)
|
|
|
|
var TypeFromString = map[string]Type{
|
|
"any": TypeAny,
|
|
"single": TypeSingle,
|
|
"playlist": TypePlaylist,
|
|
}
|
|
|
|
// Options for New()
|
|
type Options struct {
|
|
Type Type
|
|
PlaylistStart uint // --playlist-start
|
|
PlaylistEnd uint // --playlist-end
|
|
Downloader string // --downloader
|
|
DownloadThumbnail bool
|
|
DownloadSubtitles bool
|
|
ProxyUrl string // --proxy URL http://host:port or socks5://host:port
|
|
DebugLog Printer
|
|
StderrFn func(cmd *exec.Cmd) io.Writer // if not nil, function to get Writer for stderr
|
|
HTTPClient *http.Client // Client for download thumbnail and subtitles (nil use http.DefaultClient)
|
|
MergeOutputFormat string // --merge-output-format
|
|
SortingFormat string // --format-sort
|
|
}
|
|
|
|
// Version of youtube-dl.
|
|
// Might be a good idea to call at start to assert that youtube-dl can be found.
|
|
func Version(ctx context.Context) (string, error) {
|
|
cmd := exec.CommandContext(ctx, Path, "--version")
|
|
versionBytes, cmdErr := cmd.Output()
|
|
if cmdErr != nil {
|
|
return "", cmdErr
|
|
}
|
|
|
|
return strings.TrimSpace(string(versionBytes)), nil
|
|
}
|
|
|
|
// New downloads metadata for URL
|
|
func New(ctx context.Context, rawURL string, options Options) (result Result, err error) {
|
|
if options.DebugLog == nil {
|
|
options.DebugLog = nopPrinter{}
|
|
}
|
|
|
|
info, rawJSON, err := infoFromURL(ctx, rawURL, options)
|
|
if err != nil {
|
|
return Result{}, err
|
|
}
|
|
|
|
rawJSONCopy := make([]byte, len(rawJSON))
|
|
copy(rawJSONCopy, rawJSON)
|
|
|
|
return Result{
|
|
Info: info,
|
|
RawJSON: rawJSONCopy,
|
|
Options: options,
|
|
}, nil
|
|
}
|
|
|
|
func infoFromURL(ctx context.Context, rawURL string, options Options) (info Info, rawJSON []byte, err error) {
|
|
cmd := exec.CommandContext(
|
|
ctx,
|
|
Path,
|
|
// see comment below about ignoring errors for playlists
|
|
"--ignore-errors",
|
|
"--no-call-home",
|
|
"--no-cache-dir",
|
|
"--skip-download",
|
|
"--restrict-filenames",
|
|
// provide URL via stdin for security, youtube-dl has some run command args
|
|
"--batch-file", "-",
|
|
"-J",
|
|
)
|
|
|
|
if options.ProxyUrl != "" {
|
|
cmd.Args = append(cmd.Args, "--proxy", options.ProxyUrl)
|
|
}
|
|
|
|
if options.Downloader != "" {
|
|
cmd.Args = append(cmd.Args, "--downloader", options.Downloader)
|
|
}
|
|
|
|
if options.Type == TypePlaylist {
|
|
cmd.Args = append(cmd.Args, "--yes-playlist")
|
|
|
|
if options.PlaylistStart > 0 {
|
|
cmd.Args = append(cmd.Args,
|
|
"--playlist-start", strconv.Itoa(int(options.PlaylistStart)),
|
|
)
|
|
}
|
|
if options.PlaylistEnd > 0 {
|
|
cmd.Args = append(cmd.Args,
|
|
"--playlist-end", strconv.Itoa(int(options.PlaylistEnd)),
|
|
)
|
|
}
|
|
} else {
|
|
if options.DownloadSubtitles {
|
|
cmd.Args = append(cmd.Args,
|
|
"--all-subs",
|
|
)
|
|
}
|
|
cmd.Args = append(cmd.Args,
|
|
"--no-playlist",
|
|
)
|
|
}
|
|
|
|
tempPath, _ := ioutil.TempDir("", "ydls")
|
|
defer os.RemoveAll(tempPath)
|
|
|
|
stdoutBuf := &bytes.Buffer{}
|
|
stderrBuf := &bytes.Buffer{}
|
|
stderrWriter := ioutil.Discard
|
|
if options.StderrFn != nil {
|
|
stderrWriter = options.StderrFn(cmd)
|
|
}
|
|
|
|
cmd.Dir = tempPath
|
|
cmd.Stdout = stdoutBuf
|
|
cmd.Stderr = io.MultiWriter(stderrBuf, stderrWriter)
|
|
cmd.Stdin = bytes.NewBufferString(rawURL + "\n")
|
|
|
|
options.DebugLog.Print("cmd", " ", cmd.Args)
|
|
cmdErr := cmd.Run()
|
|
|
|
stderrLineScanner := bufio.NewScanner(stderrBuf)
|
|
errMessage := ""
|
|
for stderrLineScanner.Scan() {
|
|
const errorPrefix = "ERROR: "
|
|
line := stderrLineScanner.Text()
|
|
if strings.HasPrefix(line, errorPrefix) {
|
|
errMessage = line[len(errorPrefix):]
|
|
}
|
|
}
|
|
|
|
infoSeemsOk := false
|
|
if len(stdoutBuf.Bytes()) > 0 {
|
|
if infoErr := json.Unmarshal(stdoutBuf.Bytes(), &info); infoErr != nil {
|
|
return Info{}, nil, infoErr
|
|
}
|
|
|
|
isPlaylist := info.Type == "playlist" || info.Type == "multi_video"
|
|
switch {
|
|
case options.Type == TypePlaylist && !isPlaylist:
|
|
return Info{}, nil, ErrNotAPlaylist
|
|
case options.Type == TypeSingle && isPlaylist:
|
|
return Info{}, nil, ErrNotASingleEntry
|
|
default:
|
|
// any type
|
|
}
|
|
|
|
// HACK: --ignore-errors still return error message and exit code != 0
|
|
// so workaround is to assume things went ok if we get some ok json on stdout
|
|
infoSeemsOk = info.ID != ""
|
|
}
|
|
|
|
if !infoSeemsOk {
|
|
if errMessage != "" {
|
|
return Info{}, nil, YoutubedlError(errMessage)
|
|
} else if cmdErr != nil {
|
|
return Info{}, nil, cmdErr
|
|
}
|
|
|
|
return Info{}, nil, fmt.Errorf("unknown error")
|
|
}
|
|
|
|
get := func(url string) (*http.Response, error) {
|
|
c := http.DefaultClient
|
|
if options.HTTPClient != nil {
|
|
c = options.HTTPClient
|
|
}
|
|
|
|
r, err := http.NewRequest(http.MethodGet, url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for k, v := range info.HTTPHeaders {
|
|
r.Header.Set(k, v)
|
|
}
|
|
return c.Do(r)
|
|
}
|
|
|
|
// TODO: use headers from youtube-dl info for thumbnail and subtitle download?
|
|
if options.DownloadThumbnail && info.Thumbnail != "" {
|
|
resp, respErr := get(info.Thumbnail)
|
|
if respErr == nil {
|
|
buf, _ := ioutil.ReadAll(resp.Body)
|
|
resp.Body.Close()
|
|
info.ThumbnailBytes = buf
|
|
}
|
|
}
|
|
|
|
for language, subtitles := range info.Subtitles {
|
|
for i := range subtitles {
|
|
subtitles[i].Language = language
|
|
}
|
|
}
|
|
|
|
if options.DownloadSubtitles {
|
|
for _, subtitles := range info.Subtitles {
|
|
for i, subtitle := range subtitles {
|
|
resp, respErr := get(subtitle.URL)
|
|
if respErr == nil {
|
|
buf, _ := ioutil.ReadAll(resp.Body)
|
|
resp.Body.Close()
|
|
subtitles[i].Bytes = buf
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// as we ignore errors for playlists some entries might show up as null
|
|
if options.Type == TypePlaylist {
|
|
var filteredEntrise []Info
|
|
for _, e := range info.Entries {
|
|
if e.ID == "" {
|
|
continue
|
|
}
|
|
filteredEntrise = append(filteredEntrise, e)
|
|
}
|
|
info.Entries = filteredEntrise
|
|
}
|
|
|
|
return info, stdoutBuf.Bytes(), nil
|
|
}
|
|
|
|
// Result metadata for a URL
|
|
type Result struct {
|
|
Info Info
|
|
RawJSON []byte // saved raw JSON. Used later when downloading
|
|
Options Options // options passed to New
|
|
}
|
|
|
|
// DownloadResult download result
|
|
type DownloadResult struct {
|
|
reader io.ReadCloser
|
|
waitCh chan struct{}
|
|
}
|
|
|
|
// Download format matched by filter (usually a format id or "best").
|
|
// Filter should not be a combine filter like "1+2" as then youtube-dl
|
|
// won't write to stdout.
|
|
func (result Result) Download(ctx context.Context, filter string) (*DownloadResult, error) {
|
|
debugLog := result.Options.DebugLog
|
|
|
|
if result.Info.Type == "playlist" || result.Info.Type == "multi_video" {
|
|
return nil, fmt.Errorf("can't download a playlist")
|
|
}
|
|
|
|
tempPath, tempErr := ioutil.TempDir("", "ydls")
|
|
if tempErr != nil {
|
|
return nil, tempErr
|
|
}
|
|
jsonTempPath := path.Join(tempPath, "info.json")
|
|
if err := ioutil.WriteFile(jsonTempPath, result.RawJSON, 0600); err != nil {
|
|
os.RemoveAll(tempPath)
|
|
return nil, err
|
|
}
|
|
|
|
dr := &DownloadResult{
|
|
waitCh: make(chan struct{}),
|
|
}
|
|
|
|
cmd := exec.CommandContext(
|
|
ctx,
|
|
Path,
|
|
"--no-call-home",
|
|
"--no-cache-dir",
|
|
"--ignore-errors",
|
|
"--newline",
|
|
"--restrict-filenames",
|
|
"--load-info", jsonTempPath,
|
|
"-o", "-",
|
|
)
|
|
// don't need to specify if direct as there is only one
|
|
// also seems to be issues when using filter with generic extractor
|
|
if !result.Info.Direct && filter != "" {
|
|
cmd.Args = append(cmd.Args, "-f", filter)
|
|
}
|
|
|
|
if result.Options.ProxyUrl != "" {
|
|
cmd.Args = append(cmd.Args, "--proxy", result.Options.ProxyUrl)
|
|
}
|
|
|
|
if result.Options.Downloader != "" {
|
|
cmd.Args = append(cmd.Args, "--downloader", result.Options.Downloader)
|
|
}
|
|
|
|
if result.Options.MergeOutputFormat != "" {
|
|
cmd.Args = append(cmd.Args,
|
|
"--merge-output-format", result.Options.MergeOutputFormat,
|
|
)
|
|
}
|
|
|
|
if result.Options.SortingFormat != "" {
|
|
cmd.Args = append(cmd.Args,
|
|
"--format-sort", result.Options.SortingFormat,
|
|
)
|
|
}
|
|
|
|
cmd.Dir = tempPath
|
|
var w io.WriteCloser
|
|
dr.reader, w = io.Pipe()
|
|
|
|
stderrWriter := ioutil.Discard
|
|
if result.Options.StderrFn != nil {
|
|
stderrWriter = result.Options.StderrFn(cmd)
|
|
}
|
|
cmd.Stdout = w
|
|
cmd.Stderr = stderrWriter
|
|
|
|
debugLog.Print("cmd", " ", cmd.Args)
|
|
if err := cmd.Start(); err != nil {
|
|
os.RemoveAll(tempPath)
|
|
return nil, err
|
|
}
|
|
|
|
go func() {
|
|
_ = cmd.Wait()
|
|
w.Close()
|
|
os.RemoveAll(tempPath)
|
|
close(dr.waitCh)
|
|
}()
|
|
|
|
return dr, nil
|
|
}
|
|
|
|
func (dr *DownloadResult) Read(p []byte) (n int, err error) {
|
|
return dr.reader.Read(p)
|
|
}
|
|
|
|
// Close downloader and wait for resource cleanup
|
|
func (dr *DownloadResult) Close() error {
|
|
err := dr.reader.Close()
|
|
<-dr.waitCh
|
|
return err
|
|
}
|
|
|
|
// Formats return all formats
|
|
// helper to take care of mixed info and format
|
|
func (result Result) Formats() []Format {
|
|
if len(result.Info.Formats) > 0 {
|
|
return result.Info.Formats
|
|
}
|
|
return []Format{result.Info.Format}
|
|
}
|