This uses RSS to fetch a list of videos to avoid the vid being invisible due to "restrictions", then downloads the videos one-by-one instead of scraping and parsing the channel page using yt-dlp. We lose metadata for the entire channel (show-level) so introducing a hack to download just the metadata of a channel.
This commit is contained in:
parent
a0e8bc8caa
commit
6ff2d53c2d
|
|
@ -55,11 +55,9 @@ COPY <<-EOT /data/config.json
|
|||
"verbose": false,
|
||||
"cmd": "/home/subsyt/.local/bin/yt-dlp",
|
||||
"quality": "res:1080",
|
||||
"output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s-1080p.%(ext)s",
|
||||
"output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s.%(ext)s",
|
||||
"url": "https://www.youtube.com",
|
||||
"throttle": 5,
|
||||
"range": "1:5:1",
|
||||
"after_date": "",
|
||||
"cookies_file": "",
|
||||
"opml_file": "/data/opml.xml",
|
||||
"po_token": "",
|
||||
|
|
|
|||
|
|
@ -79,11 +79,9 @@ Full `config.json`:
|
|||
"verbose": false,
|
||||
"cmd": "./yt-dlp",
|
||||
"quality": "res:1080",
|
||||
"output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s-1080p.%(ext)s",
|
||||
"output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s.%(ext)s",
|
||||
"url": "https://www.youtube.com",
|
||||
"throttle": 5,
|
||||
"range": "1:1:1",
|
||||
"after_date": "",
|
||||
"cookies_file": "",
|
||||
"opml_file": "./youtube_subs.opml",
|
||||
"po_token": "",
|
||||
|
|
@ -103,7 +101,6 @@ Minimal `config.json`:
|
|||
"youtube": {
|
||||
"cmd": "./yt-dlp",
|
||||
"throttle": 5,
|
||||
"range": "1:1:1",
|
||||
"opml_file": "./youtube_subs.opml"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,8 +9,6 @@ import (
|
|||
type Provider struct {
|
||||
Url string
|
||||
Throttle int
|
||||
Range string
|
||||
After_date string
|
||||
Cmd string
|
||||
Cookies_file string
|
||||
Opml_file string
|
||||
|
|
|
|||
|
|
@ -21,73 +21,76 @@ type Download struct {
|
|||
OutDir string
|
||||
Name string
|
||||
DryRun bool
|
||||
Metadata bool
|
||||
}
|
||||
|
||||
func Youtube(d Download, p config.Provider) {
|
||||
|
||||
if p.Bgutil_server != "" && p.Po_token != "" {
|
||||
log.Fatal("please only provide bgutil_server OR po_token, not both")
|
||||
}
|
||||
|
||||
archive := filepath.Join(d.OutDir, "archive.txt")
|
||||
outdir := d.OutDir
|
||||
|
||||
opmlUrl, err := url.Parse(d.Url)
|
||||
vUrl, err := url.Parse(d.Url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
q := opmlUrl.Query()
|
||||
cid := q.Get("channel_id")
|
||||
|
||||
if cid == "" {
|
||||
log.Fatal("no channel !")
|
||||
}
|
||||
|
||||
if p.Url == "" {
|
||||
p.Url = "https://www.youtube.com"
|
||||
}
|
||||
|
||||
fullUrl, err := url.Parse(p.Url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
channelUrl := fullUrl.JoinPath("channel", cid, "videos")
|
||||
|
||||
throttle := strconv.Itoa(p.Throttle)
|
||||
|
||||
args := []string{
|
||||
"--no-progress",
|
||||
"--sleep-interval", throttle,
|
||||
"--sleep-subtitles", throttle,
|
||||
"--sleep-requests", throttle,
|
||||
"--max-sleep-interval", "90",
|
||||
"--prefer-free-formats",
|
||||
"--write-subs",
|
||||
"--no-write-automatic-subs",
|
||||
"--sub-langs", "en",
|
||||
"--paths", outdir,
|
||||
"--download-archive", archive,
|
||||
"--break-on-existing",
|
||||
"--playlist-items", p.Range,
|
||||
"--paths", d.OutDir,
|
||||
"--restrict-filenames",
|
||||
"--embed-metadata",
|
||||
"--write-thumbnail",
|
||||
"--write-info-json",
|
||||
"--match-filters", "!is_live & duration>?60",
|
||||
"--convert-thumbnails", "jpg",
|
||||
}
|
||||
|
||||
if d.DryRun == true {
|
||||
args = append(args, "--simulate")
|
||||
log.Println("/!\\ DRY RUN ENABLED /!\\")
|
||||
} else {
|
||||
args = append(args, "--no-simulate")
|
||||
}
|
||||
|
||||
if p.Verbose == true {
|
||||
args = append(args, "--verbose")
|
||||
}
|
||||
|
||||
if d.DryRun == true {
|
||||
log.Println("/!\\ DRY RUN ENABLED /!\\")
|
||||
args = append(args, "--simulate")
|
||||
} else {
|
||||
args = append(args, "--no-simulate")
|
||||
}
|
||||
|
||||
if d.Metadata == true {
|
||||
log.Println("Downloading metadata")
|
||||
mArgs := []string{
|
||||
"--skip-download",
|
||||
"--no-overwrites",
|
||||
"--playlist-items", "0:0:1",
|
||||
}
|
||||
args = append(args, mArgs...)
|
||||
} else {
|
||||
log.Println("Downloading video")
|
||||
archive := filepath.Join(d.OutDir, "archive.txt")
|
||||
throttle := strconv.Itoa(p.Throttle)
|
||||
|
||||
dArgs := []string{
|
||||
"--no-playlist",
|
||||
"--sleep-interval", throttle,
|
||||
"--sleep-subtitles", throttle,
|
||||
"--sleep-requests", throttle,
|
||||
"--max-sleep-interval", "90",
|
||||
"--embed-metadata",
|
||||
"--write-subs",
|
||||
"--no-write-automatic-subs",
|
||||
"--sub-langs", "en",
|
||||
"--prefer-free-formats",
|
||||
"--download-archive", archive,
|
||||
"--break-on-existing",
|
||||
"--match-filters", "!is_live & duration>?60",
|
||||
}
|
||||
args = append(args, dArgs...)
|
||||
|
||||
if p.Quality != "" {
|
||||
args = append(args, "--format-sort", p.Quality)
|
||||
} else {
|
||||
args = append(args, "--format-sort", "res:1080")
|
||||
}
|
||||
}
|
||||
|
||||
if p.Cookies_file != "" {
|
||||
args = append(args, "--cookies")
|
||||
args = append(args, p.Cookies_file)
|
||||
|
|
@ -95,11 +98,6 @@ func Youtube(d Download, p config.Provider) {
|
|||
args = append(args, "--no-cookies")
|
||||
}
|
||||
|
||||
if p.After_date != "" {
|
||||
args = append(args, "--dateafter")
|
||||
args = append(args, p.After_date)
|
||||
}
|
||||
|
||||
if p.Po_token != "" {
|
||||
args = append(args, "--extractor-args")
|
||||
args = append(args, fmt.Sprintf("youtube:po_token=web.gvs+%s", p.Po_token))
|
||||
|
|
@ -115,21 +113,14 @@ func Youtube(d Download, p config.Provider) {
|
|||
args = append(args, fmt.Sprintf("youtube:player_client=%s", p.Player_client))
|
||||
}
|
||||
|
||||
args = append(args, "--format-sort")
|
||||
if p.Quality != "" {
|
||||
args = append(args, p.Quality)
|
||||
} else {
|
||||
args = append(args, "res:1080")
|
||||
}
|
||||
|
||||
args = append(args, "--output")
|
||||
if p.Output_path_template != "" {
|
||||
args = append(args, p.Output_path_template)
|
||||
args = append(args, "--output", p.Output_path_template)
|
||||
} else {
|
||||
args = append(args, "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s-1080p.%(ext)s")
|
||||
args = append(args, "--output", "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s.%(ext)s")
|
||||
}
|
||||
|
||||
args = append(args, channelUrl.String())
|
||||
args = append(args, vUrl.String())
|
||||
|
||||
cmd := exec.Command(p.Cmd, args...)
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
|
|
@ -202,3 +193,22 @@ func Fetch(d Download) {
|
|||
log.Printf("failed to write file")
|
||||
}
|
||||
}
|
||||
|
||||
func RssDownloader(url string) ([]byte, error) {
|
||||
resp, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch RSS feed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, fmt.Errorf("failed to fetch RSS feed: %s", resp.Status)
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read RSS data: %w", err)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package opml
|
||||
package format
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
|
|
@ -22,7 +22,7 @@ type OPML struct {
|
|||
Body `xml:"opml>body"`
|
||||
}
|
||||
|
||||
func Load(path string) (OPML, error) {
|
||||
func OpmlLoad(path string) (OPML, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
|
||||
if err != nil {
|
||||
89
internal/format/rss.go
Normal file
89
internal/format/rss.go
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
package format
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
)
|
||||
|
||||
type Feed struct {
|
||||
XMLName xml.Name `xml:"feed"`
|
||||
Id string `xml:"id"`
|
||||
ChannelId string `xml:"yt:channelId"`
|
||||
Title string `xml:"title"`
|
||||
Published string `xml:"published"`
|
||||
Links []Link `xml:"link"`
|
||||
Author Author `xml:"author"`
|
||||
Entries []Entry `xml:"entry"`
|
||||
}
|
||||
|
||||
type Link struct {
|
||||
Rel string `xml:"rel,attr"`
|
||||
Href string `xml:"href,attr"`
|
||||
}
|
||||
|
||||
type Author struct {
|
||||
XMLName xml.Name `xml:"author"`
|
||||
Name string `xml:"name"`
|
||||
Uri string `xml:"uri"`
|
||||
}
|
||||
|
||||
type MediaContent struct {
|
||||
URL string `xml:"url,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
Width string `xml:"width,attr"`
|
||||
Height string `xml:"height,attr"`
|
||||
}
|
||||
|
||||
type MediaThumbnail struct {
|
||||
URL string `xml:"url,attr"`
|
||||
Width string `xml:"width,attr"`
|
||||
Height string `xml:"height,attr"`
|
||||
}
|
||||
|
||||
type MediaStarRating struct {
|
||||
Count string `xml:"count,attr"`
|
||||
Average string `xml:"average,attr"`
|
||||
Min string `xml:"min,attr"`
|
||||
Max string `xml:"max,attr"`
|
||||
}
|
||||
|
||||
type MediaStatistics struct {
|
||||
Views string `xml:"views,attr"`
|
||||
}
|
||||
|
||||
type MediaCommunity struct {
|
||||
StarRating MediaStarRating `xml:"starRating"`
|
||||
Statistics MediaStatistics `xml:"statistics"`
|
||||
}
|
||||
|
||||
type MediaGroup struct {
|
||||
Title string `xml:"title"`
|
||||
Content MediaContent `xml:"content"`
|
||||
Thumbnail MediaThumbnail `xml:"thumbnail"`
|
||||
Description string `xml:"description"`
|
||||
Community MediaCommunity `xml:"community"`
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
XMLName xml.Name `xml:"entry"`
|
||||
Title string `xml:"title"`
|
||||
Id string `xml:"id"`
|
||||
VideoId string `xml:"videoId"`
|
||||
ChannelId string `xml:"channelId"`
|
||||
Link Link `xml:"link"`
|
||||
Author Author `xml:"author"`
|
||||
Published string `xml:"published"`
|
||||
Updated string `xml:"updated"`
|
||||
MediaGroup MediaGroup `xml:"group"`
|
||||
}
|
||||
|
||||
|
||||
func RssLoad(data []byte) (Feed, error) {
|
||||
feed := Feed{}
|
||||
|
||||
err := xml.Unmarshal(data, &feed)
|
||||
if err != nil {
|
||||
return Feed{}, err
|
||||
}
|
||||
|
||||
return feed, nil
|
||||
}
|
||||
|
|
@ -7,7 +7,7 @@ import (
|
|||
"strings"
|
||||
|
||||
"git.meatbag.se/varl/subsyt/internal/dl"
|
||||
"git.meatbag.se/varl/subsyt/internal/models"
|
||||
"git.meatbag.se/varl/subsyt/internal/model"
|
||||
)
|
||||
|
||||
func episodeImage(path string) {
|
||||
|
|
@ -32,7 +32,7 @@ func showPoster(path string, show_dir string) {
|
|||
}
|
||||
}
|
||||
|
||||
func showBanner(show models.Show, showDir string) {
|
||||
func showBanner(show model.Show, showDir string) {
|
||||
_, err := os.Stat(filepath.Join(showDir, "banner.jpg"))
|
||||
if err == nil {
|
||||
log.Printf("%s has a banner, skipping download\n", show.Title)
|
||||
|
|
@ -52,14 +52,14 @@ func showBanner(show models.Show, showDir string) {
|
|||
}
|
||||
}
|
||||
|
||||
func showFanart(show models.Show, showDir string) {
|
||||
func showFanart(show model.Show, showDir string) {
|
||||
_, err := os.Stat(filepath.Join(showDir, "fanart.jpg"))
|
||||
if err == nil {
|
||||
log.Printf("%s has fanart, skipping download\n", show.Title)
|
||||
return
|
||||
}
|
||||
|
||||
c := models.Thumbnail{}
|
||||
c := model.Thumbnail{}
|
||||
for index, thumb := range show.Thumbnails {
|
||||
log.Println(index, thumb)
|
||||
if thumb.Width > c.Width {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import (
|
|||
"regexp"
|
||||
"strings"
|
||||
|
||||
"git.meatbag.se/varl/subsyt/internal/models"
|
||||
"git.meatbag.se/varl/subsyt/internal/model"
|
||||
"git.meatbag.se/varl/subsyt/internal/nfo"
|
||||
)
|
||||
|
||||
|
|
@ -52,12 +52,12 @@ func Generate(outDir string, title string, dryRun bool) {
|
|||
log.Println(index, path)
|
||||
switch {
|
||||
case show.MatchString(path):
|
||||
show := models.LoadShow(path)
|
||||
show := model.LoadShow(path)
|
||||
nfo.WriteShowInfo(show, filepath.Join(showDir, "tvshow.nfo"))
|
||||
showBanner(show, showDir)
|
||||
showFanart(show, showDir)
|
||||
case season.MatchString(path):
|
||||
ep := models.LoadEpisode(path)
|
||||
ep := model.LoadEpisode(path)
|
||||
nfo.WriteEpisodeNFO(ep, path)
|
||||
default:
|
||||
log.Printf("no match for '%s'\n", path)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package models
|
||||
package model
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package models
|
||||
package model
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package models
|
||||
package model
|
||||
|
||||
type UniqueId struct {
|
||||
Text string `xml:",chardata"`
|
||||
|
|
@ -6,10 +6,10 @@ import (
|
|||
"os"
|
||||
"strings"
|
||||
|
||||
"git.meatbag.se/varl/subsyt/internal/models"
|
||||
"git.meatbag.se/varl/subsyt/internal/model"
|
||||
)
|
||||
|
||||
func WriteEpisodeNFO(ep models.Episode, info_path string) {
|
||||
func WriteEpisodeNFO(ep model.Episode, info_path string) {
|
||||
out_path := strings.Replace(info_path, ".info.json", ".nfo", 1)
|
||||
|
||||
log.Printf("writing info from '%s' to '%s'\n", info_path, out_path)
|
||||
|
|
@ -24,7 +24,7 @@ func WriteEpisodeNFO(ep models.Episode, info_path string) {
|
|||
os.WriteFile(out_path, xmlData, 0644)
|
||||
}
|
||||
|
||||
func WriteShowInfo(show models.Show, out_path string) {
|
||||
func WriteShowInfo(show model.Show, out_path string) {
|
||||
log.Printf("writing info from '%v' to '%s'\n", show, out_path)
|
||||
|
||||
xmlData, err := xml.MarshalIndent(show, "", " ")
|
||||
|
|
|
|||
42
main.go
42
main.go
|
|
@ -2,21 +2,22 @@ package main
|
|||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"git.meatbag.se/varl/subsyt/internal/config"
|
||||
"git.meatbag.se/varl/subsyt/internal/dl"
|
||||
"git.meatbag.se/varl/subsyt/internal/format"
|
||||
"git.meatbag.se/varl/subsyt/internal/metadata"
|
||||
"git.meatbag.se/varl/subsyt/internal/opml"
|
||||
"git.meatbag.se/varl/subsyt/internal/scheduler"
|
||||
)
|
||||
|
||||
func run(cfg config.Config) {
|
||||
provider := cfg.Provider["youtube"]
|
||||
|
||||
opml, err := opml.Load(provider.Opml_file)
|
||||
opml, err := format.OpmlLoad(provider.Opml_file)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
|
@ -25,13 +26,42 @@ func run(cfg config.Config) {
|
|||
log.Printf("Archiving videos from OPML: %s\n", outlines.Title)
|
||||
|
||||
for _, outline := range outlines.Outlines {
|
||||
rssData, err := dl.RssDownloader(outline.XmlUrl)
|
||||
if err != nil {
|
||||
log.Printf("Failed to download RSS for %s: %v", outline.Title, err)
|
||||
continue
|
||||
}
|
||||
|
||||
feed, err := format.RssLoad(rssData)
|
||||
if err != nil {
|
||||
log.Printf("Failed to parse RSS for %s: %v", feed.Title, err)
|
||||
continue
|
||||
}
|
||||
|
||||
dl.Youtube(dl.Download{
|
||||
Url: outline.XmlUrl,
|
||||
OutDir: filepath.Join(cfg.Out_dir, outline.Title),
|
||||
DryRun: cfg.Dry_run,
|
||||
Url: feed.Author.Uri,
|
||||
OutDir: filepath.Join(cfg.Out_dir, outline.Title),
|
||||
DryRun: cfg.Dry_run,
|
||||
Metadata: true,
|
||||
}, provider)
|
||||
|
||||
metadata.Generate(cfg.Out_dir, outline.Title, cfg.Dry_run)
|
||||
log.Printf("Downloaded RSS feed for %s with %d entries", feed.Title, len(feed.Entries))
|
||||
|
||||
for _, entry := range feed.Entries {
|
||||
url := fmt.Sprintf("%s/watch?v=%s", provider.Url, entry.VideoId)
|
||||
|
||||
log.Printf("Entry: %#v", entry)
|
||||
dl.Youtube(dl.Download{
|
||||
Url: url,
|
||||
OutDir: filepath.Join(cfg.Out_dir, feed.Title),
|
||||
DryRun: cfg.Dry_run,
|
||||
Metadata: false,
|
||||
}, provider)
|
||||
|
||||
break
|
||||
}
|
||||
|
||||
metadata.Generate(cfg.Out_dir, feed.Title, cfg.Dry_run)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue