From 6ff2d53c2d6add02c352661cbd673be5e460a135 Mon Sep 17 00:00:00 2001 From: Viktor Varland Date: Mon, 8 Sep 2025 22:28:19 +0200 Subject: [PATCH] feat: use rss to fetch list of videos This uses RSS to fetch a list of videos to avoid the vid being invisible due to "restrictions", then downloads the videos one-by-one instead of scraping and parsing the channel page using yt-dlp. We lose metadata for the entire channel (show-level) so introducing a hack to download just the metadata of a channel. --- Containerfile | 4 +- README.md | 5 +- internal/config/config.go | 2 - internal/dl/dl.go | 130 +++++++++++++------------ internal/{opml => format}/opml.go | 4 +- internal/format/rss.go | 89 +++++++++++++++++ internal/metadata/art.go | 8 +- internal/metadata/metadata.go | 6 +- internal/{models => model}/episode.go | 2 +- internal/{models => model}/show.go | 2 +- internal/{models => model}/uniqueid.go | 2 +- internal/nfo/nfo.go | 6 +- main.go | 42 ++++++-- 13 files changed, 212 insertions(+), 90 deletions(-) rename internal/{opml => format}/opml.go (90%) create mode 100644 internal/format/rss.go rename internal/{models => model}/episode.go (99%) rename internal/{models => model}/show.go (98%) rename internal/{models => model}/uniqueid.go (89%) diff --git a/Containerfile b/Containerfile index 8736bd8..9b8e1c9 100644 --- a/Containerfile +++ b/Containerfile @@ -55,11 +55,9 @@ COPY <<-EOT /data/config.json "verbose": false, "cmd": "/home/subsyt/.local/bin/yt-dlp", "quality": "res:1080", - "output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s-1080p.%(ext)s", + "output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s.%(ext)s", "url": "https://www.youtube.com", "throttle": 5, - "range": "1:5:1", - "after_date": "", "cookies_file": "", "opml_file": "/data/opml.xml", "po_token": "", diff --git a/README.md b/README.md index 06af2bf..cc1a0d3 100644 --- a/README.md +++ b/README.md @@ -79,11 +79,9 @@ Full `config.json`: "verbose": false, "cmd": "./yt-dlp", "quality": "res:1080", - "output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s-1080p.%(ext)s", + "output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s.%(ext)s", "url": "https://www.youtube.com", "throttle": 5, - "range": "1:1:1", - "after_date": "", "cookies_file": "", "opml_file": "./youtube_subs.opml", "po_token": "", @@ -103,7 +101,6 @@ Minimal `config.json`: "youtube": { "cmd": "./yt-dlp", "throttle": 5, - "range": "1:1:1", "opml_file": "./youtube_subs.opml" } } diff --git a/internal/config/config.go b/internal/config/config.go index dc320d9..65c5831 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -9,8 +9,6 @@ import ( type Provider struct { Url string Throttle int - Range string - After_date string Cmd string Cookies_file string Opml_file string diff --git a/internal/dl/dl.go b/internal/dl/dl.go index 0fbae90..a1c0b76 100644 --- a/internal/dl/dl.go +++ b/internal/dl/dl.go @@ -21,73 +21,76 @@ type Download struct { OutDir string Name string DryRun bool + Metadata bool } func Youtube(d Download, p config.Provider) { - if p.Bgutil_server != "" && p.Po_token != "" { log.Fatal("please only provide bgutil_server OR po_token, not both") } - archive := filepath.Join(d.OutDir, "archive.txt") - outdir := d.OutDir - - opmlUrl, err := url.Parse(d.Url) + vUrl, err := url.Parse(d.Url) if err != nil { panic(err) } - q := opmlUrl.Query() - cid := q.Get("channel_id") - - if cid == "" { - log.Fatal("no channel !") - } - - if p.Url == "" { - p.Url = "https://www.youtube.com" - } - - fullUrl, err := url.Parse(p.Url) - if err != nil { - panic(err) - } - channelUrl := fullUrl.JoinPath("channel", cid, "videos") - - throttle := strconv.Itoa(p.Throttle) args := []string{ "--no-progress", - "--sleep-interval", throttle, - "--sleep-subtitles", throttle, - "--sleep-requests", throttle, - "--max-sleep-interval", "90", - "--prefer-free-formats", - "--write-subs", - "--no-write-automatic-subs", - "--sub-langs", "en", - "--paths", outdir, - "--download-archive", archive, - "--break-on-existing", - "--playlist-items", p.Range, + "--paths", d.OutDir, "--restrict-filenames", - "--embed-metadata", "--write-thumbnail", "--write-info-json", - "--match-filters", "!is_live & duration>?60", "--convert-thumbnails", "jpg", } - if d.DryRun == true { - args = append(args, "--simulate") - log.Println("/!\\ DRY RUN ENABLED /!\\") - } else { - args = append(args, "--no-simulate") - } - if p.Verbose == true { args = append(args, "--verbose") } + if d.DryRun == true { + log.Println("/!\\ DRY RUN ENABLED /!\\") + args = append(args, "--simulate") + } else { + args = append(args, "--no-simulate") + } + + if d.Metadata == true { + log.Println("Downloading metadata") + mArgs := []string{ + "--skip-download", + "--no-overwrites", + "--playlist-items", "0:0:1", + } + args = append(args, mArgs...) + } else { + log.Println("Downloading video") + archive := filepath.Join(d.OutDir, "archive.txt") + throttle := strconv.Itoa(p.Throttle) + + dArgs := []string{ + "--no-playlist", + "--sleep-interval", throttle, + "--sleep-subtitles", throttle, + "--sleep-requests", throttle, + "--max-sleep-interval", "90", + "--embed-metadata", + "--write-subs", + "--no-write-automatic-subs", + "--sub-langs", "en", + "--prefer-free-formats", + "--download-archive", archive, + "--break-on-existing", + "--match-filters", "!is_live & duration>?60", + } + args = append(args, dArgs...) + + if p.Quality != "" { + args = append(args, "--format-sort", p.Quality) + } else { + args = append(args, "--format-sort", "res:1080") + } + } + if p.Cookies_file != "" { args = append(args, "--cookies") args = append(args, p.Cookies_file) @@ -95,11 +98,6 @@ func Youtube(d Download, p config.Provider) { args = append(args, "--no-cookies") } - if p.After_date != "" { - args = append(args, "--dateafter") - args = append(args, p.After_date) - } - if p.Po_token != "" { args = append(args, "--extractor-args") args = append(args, fmt.Sprintf("youtube:po_token=web.gvs+%s", p.Po_token)) @@ -115,21 +113,14 @@ func Youtube(d Download, p config.Provider) { args = append(args, fmt.Sprintf("youtube:player_client=%s", p.Player_client)) } - args = append(args, "--format-sort") - if p.Quality != "" { - args = append(args, p.Quality) - } else { - args = append(args, "res:1080") - } - - args = append(args, "--output") if p.Output_path_template != "" { - args = append(args, p.Output_path_template) + args = append(args, "--output", p.Output_path_template) } else { - args = append(args, "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s-1080p.%(ext)s") + args = append(args, "--output", "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s.%(ext)s") } - args = append(args, channelUrl.String()) + args = append(args, vUrl.String()) + cmd := exec.Command(p.Cmd, args...) stdout, err := cmd.StdoutPipe() @@ -202,3 +193,22 @@ func Fetch(d Download) { log.Printf("failed to write file") } } + +func RssDownloader(url string) ([]byte, error) { + resp, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("failed to fetch RSS feed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to fetch RSS feed: %s", resp.Status) + } + + data, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("failed to read RSS data: %w", err) + } + + return data, nil +} diff --git a/internal/opml/opml.go b/internal/format/opml.go similarity index 90% rename from internal/opml/opml.go rename to internal/format/opml.go index cb27ab3..09932ff 100644 --- a/internal/opml/opml.go +++ b/internal/format/opml.go @@ -1,4 +1,4 @@ -package opml +package format import ( "encoding/xml" @@ -22,7 +22,7 @@ type OPML struct { Body `xml:"opml>body"` } -func Load(path string) (OPML, error) { +func OpmlLoad(path string) (OPML, error) { data, err := os.ReadFile(path) if err != nil { diff --git a/internal/format/rss.go b/internal/format/rss.go new file mode 100644 index 0000000..ea697fb --- /dev/null +++ b/internal/format/rss.go @@ -0,0 +1,89 @@ +package format + +import ( + "encoding/xml" +) + +type Feed struct { + XMLName xml.Name `xml:"feed"` + Id string `xml:"id"` + ChannelId string `xml:"yt:channelId"` + Title string `xml:"title"` + Published string `xml:"published"` + Links []Link `xml:"link"` + Author Author `xml:"author"` + Entries []Entry `xml:"entry"` +} + +type Link struct { + Rel string `xml:"rel,attr"` + Href string `xml:"href,attr"` +} + +type Author struct { + XMLName xml.Name `xml:"author"` + Name string `xml:"name"` + Uri string `xml:"uri"` +} + +type MediaContent struct { + URL string `xml:"url,attr"` + Type string `xml:"type,attr"` + Width string `xml:"width,attr"` + Height string `xml:"height,attr"` +} + +type MediaThumbnail struct { + URL string `xml:"url,attr"` + Width string `xml:"width,attr"` + Height string `xml:"height,attr"` +} + +type MediaStarRating struct { + Count string `xml:"count,attr"` + Average string `xml:"average,attr"` + Min string `xml:"min,attr"` + Max string `xml:"max,attr"` +} + +type MediaStatistics struct { + Views string `xml:"views,attr"` +} + +type MediaCommunity struct { + StarRating MediaStarRating `xml:"starRating"` + Statistics MediaStatistics `xml:"statistics"` +} + +type MediaGroup struct { + Title string `xml:"title"` + Content MediaContent `xml:"content"` + Thumbnail MediaThumbnail `xml:"thumbnail"` + Description string `xml:"description"` + Community MediaCommunity `xml:"community"` +} + +type Entry struct { + XMLName xml.Name `xml:"entry"` + Title string `xml:"title"` + Id string `xml:"id"` + VideoId string `xml:"videoId"` + ChannelId string `xml:"channelId"` + Link Link `xml:"link"` + Author Author `xml:"author"` + Published string `xml:"published"` + Updated string `xml:"updated"` + MediaGroup MediaGroup `xml:"group"` +} + + +func RssLoad(data []byte) (Feed, error) { + feed := Feed{} + + err := xml.Unmarshal(data, &feed) + if err != nil { + return Feed{}, err + } + + return feed, nil +} diff --git a/internal/metadata/art.go b/internal/metadata/art.go index 080ba71..00532e3 100644 --- a/internal/metadata/art.go +++ b/internal/metadata/art.go @@ -7,7 +7,7 @@ import ( "strings" "git.meatbag.se/varl/subsyt/internal/dl" - "git.meatbag.se/varl/subsyt/internal/models" + "git.meatbag.se/varl/subsyt/internal/model" ) func episodeImage(path string) { @@ -32,7 +32,7 @@ func showPoster(path string, show_dir string) { } } -func showBanner(show models.Show, showDir string) { +func showBanner(show model.Show, showDir string) { _, err := os.Stat(filepath.Join(showDir, "banner.jpg")) if err == nil { log.Printf("%s has a banner, skipping download\n", show.Title) @@ -52,14 +52,14 @@ func showBanner(show models.Show, showDir string) { } } -func showFanart(show models.Show, showDir string) { +func showFanart(show model.Show, showDir string) { _, err := os.Stat(filepath.Join(showDir, "fanart.jpg")) if err == nil { log.Printf("%s has fanart, skipping download\n", show.Title) return } - c := models.Thumbnail{} + c := model.Thumbnail{} for index, thumb := range show.Thumbnails { log.Println(index, thumb) if thumb.Width > c.Width { diff --git a/internal/metadata/metadata.go b/internal/metadata/metadata.go index ca554ee..a179281 100644 --- a/internal/metadata/metadata.go +++ b/internal/metadata/metadata.go @@ -8,7 +8,7 @@ import ( "regexp" "strings" - "git.meatbag.se/varl/subsyt/internal/models" + "git.meatbag.se/varl/subsyt/internal/model" "git.meatbag.se/varl/subsyt/internal/nfo" ) @@ -52,12 +52,12 @@ func Generate(outDir string, title string, dryRun bool) { log.Println(index, path) switch { case show.MatchString(path): - show := models.LoadShow(path) + show := model.LoadShow(path) nfo.WriteShowInfo(show, filepath.Join(showDir, "tvshow.nfo")) showBanner(show, showDir) showFanart(show, showDir) case season.MatchString(path): - ep := models.LoadEpisode(path) + ep := model.LoadEpisode(path) nfo.WriteEpisodeNFO(ep, path) default: log.Printf("no match for '%s'\n", path) diff --git a/internal/models/episode.go b/internal/model/episode.go similarity index 99% rename from internal/models/episode.go rename to internal/model/episode.go index 3c16760..c48f067 100644 --- a/internal/models/episode.go +++ b/internal/model/episode.go @@ -1,4 +1,4 @@ -package models +package model import ( "encoding/json" diff --git a/internal/models/show.go b/internal/model/show.go similarity index 98% rename from internal/models/show.go rename to internal/model/show.go index 79249a4..829c3ea 100644 --- a/internal/models/show.go +++ b/internal/model/show.go @@ -1,4 +1,4 @@ -package models +package model import ( "encoding/json" diff --git a/internal/models/uniqueid.go b/internal/model/uniqueid.go similarity index 89% rename from internal/models/uniqueid.go rename to internal/model/uniqueid.go index 82f62cd..9e2ee0f 100644 --- a/internal/models/uniqueid.go +++ b/internal/model/uniqueid.go @@ -1,4 +1,4 @@ -package models +package model type UniqueId struct { Text string `xml:",chardata"` diff --git a/internal/nfo/nfo.go b/internal/nfo/nfo.go index 4de0024..b35be87 100644 --- a/internal/nfo/nfo.go +++ b/internal/nfo/nfo.go @@ -6,10 +6,10 @@ import ( "os" "strings" - "git.meatbag.se/varl/subsyt/internal/models" + "git.meatbag.se/varl/subsyt/internal/model" ) -func WriteEpisodeNFO(ep models.Episode, info_path string) { +func WriteEpisodeNFO(ep model.Episode, info_path string) { out_path := strings.Replace(info_path, ".info.json", ".nfo", 1) log.Printf("writing info from '%s' to '%s'\n", info_path, out_path) @@ -24,7 +24,7 @@ func WriteEpisodeNFO(ep models.Episode, info_path string) { os.WriteFile(out_path, xmlData, 0644) } -func WriteShowInfo(show models.Show, out_path string) { +func WriteShowInfo(show model.Show, out_path string) { log.Printf("writing info from '%v' to '%s'\n", show, out_path) xmlData, err := xml.MarshalIndent(show, "", " ") diff --git a/main.go b/main.go index 036fd36..58eb6cf 100644 --- a/main.go +++ b/main.go @@ -2,21 +2,22 @@ package main import ( "flag" + "fmt" "log" "os" "path/filepath" "git.meatbag.se/varl/subsyt/internal/config" "git.meatbag.se/varl/subsyt/internal/dl" + "git.meatbag.se/varl/subsyt/internal/format" "git.meatbag.se/varl/subsyt/internal/metadata" - "git.meatbag.se/varl/subsyt/internal/opml" "git.meatbag.se/varl/subsyt/internal/scheduler" ) func run(cfg config.Config) { provider := cfg.Provider["youtube"] - opml, err := opml.Load(provider.Opml_file) + opml, err := format.OpmlLoad(provider.Opml_file) if err != nil { panic(err) } @@ -25,13 +26,42 @@ func run(cfg config.Config) { log.Printf("Archiving videos from OPML: %s\n", outlines.Title) for _, outline := range outlines.Outlines { + rssData, err := dl.RssDownloader(outline.XmlUrl) + if err != nil { + log.Printf("Failed to download RSS for %s: %v", outline.Title, err) + continue + } + + feed, err := format.RssLoad(rssData) + if err != nil { + log.Printf("Failed to parse RSS for %s: %v", feed.Title, err) + continue + } + dl.Youtube(dl.Download{ - Url: outline.XmlUrl, - OutDir: filepath.Join(cfg.Out_dir, outline.Title), - DryRun: cfg.Dry_run, + Url: feed.Author.Uri, + OutDir: filepath.Join(cfg.Out_dir, outline.Title), + DryRun: cfg.Dry_run, + Metadata: true, }, provider) - metadata.Generate(cfg.Out_dir, outline.Title, cfg.Dry_run) + log.Printf("Downloaded RSS feed for %s with %d entries", feed.Title, len(feed.Entries)) + + for _, entry := range feed.Entries { + url := fmt.Sprintf("%s/watch?v=%s", provider.Url, entry.VideoId) + + log.Printf("Entry: %#v", entry) + dl.Youtube(dl.Download{ + Url: url, + OutDir: filepath.Join(cfg.Out_dir, feed.Title), + DryRun: cfg.Dry_run, + Metadata: false, + }, provider) + + break + } + + metadata.Generate(cfg.Out_dir, feed.Title, cfg.Dry_run) } } }