feat: use rss to fetch list of videos
Some checks are pending
build / build (push) Waiting to run

This uses RSS to fetch a list of videos to avoid the vid being invisible
due to "restrictions", then downloads the videos one-by-one instead of
scraping and parsing the channel page using yt-dlp.

We lose metadata for the entire channel (show-level) so introducing a
hack to download just the metadata of a channel.
This commit is contained in:
Viktor Varland 2025-09-08 22:28:19 +02:00
parent a0e8bc8caa
commit 6ff2d53c2d
No known key found for this signature in database
GPG key ID: 940DFD5EADD1E94A
13 changed files with 212 additions and 90 deletions

View file

@ -55,11 +55,9 @@ COPY <<-EOT /data/config.json
"verbose": false, "verbose": false,
"cmd": "/home/subsyt/.local/bin/yt-dlp", "cmd": "/home/subsyt/.local/bin/yt-dlp",
"quality": "res:1080", "quality": "res:1080",
"output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s-1080p.%(ext)s", "output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s.%(ext)s",
"url": "https://www.youtube.com", "url": "https://www.youtube.com",
"throttle": 5, "throttle": 5,
"range": "1:5:1",
"after_date": "",
"cookies_file": "", "cookies_file": "",
"opml_file": "/data/opml.xml", "opml_file": "/data/opml.xml",
"po_token": "", "po_token": "",

View file

@ -79,11 +79,9 @@ Full `config.json`:
"verbose": false, "verbose": false,
"cmd": "./yt-dlp", "cmd": "./yt-dlp",
"quality": "res:1080", "quality": "res:1080",
"output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s-1080p.%(ext)s", "output_path_template": "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s.%(ext)s",
"url": "https://www.youtube.com", "url": "https://www.youtube.com",
"throttle": 5, "throttle": 5,
"range": "1:1:1",
"after_date": "",
"cookies_file": "", "cookies_file": "",
"opml_file": "./youtube_subs.opml", "opml_file": "./youtube_subs.opml",
"po_token": "", "po_token": "",
@ -103,7 +101,6 @@ Minimal `config.json`:
"youtube": { "youtube": {
"cmd": "./yt-dlp", "cmd": "./yt-dlp",
"throttle": 5, "throttle": 5,
"range": "1:1:1",
"opml_file": "./youtube_subs.opml" "opml_file": "./youtube_subs.opml"
} }
} }

View file

@ -9,8 +9,6 @@ import (
type Provider struct { type Provider struct {
Url string Url string
Throttle int Throttle int
Range string
After_date string
Cmd string Cmd string
Cookies_file string Cookies_file string
Opml_file string Opml_file string

View file

@ -21,73 +21,76 @@ type Download struct {
OutDir string OutDir string
Name string Name string
DryRun bool DryRun bool
Metadata bool
} }
func Youtube(d Download, p config.Provider) { func Youtube(d Download, p config.Provider) {
if p.Bgutil_server != "" && p.Po_token != "" { if p.Bgutil_server != "" && p.Po_token != "" {
log.Fatal("please only provide bgutil_server OR po_token, not both") log.Fatal("please only provide bgutil_server OR po_token, not both")
} }
archive := filepath.Join(d.OutDir, "archive.txt") vUrl, err := url.Parse(d.Url)
outdir := d.OutDir
opmlUrl, err := url.Parse(d.Url)
if err != nil { if err != nil {
panic(err) panic(err)
} }
q := opmlUrl.Query()
cid := q.Get("channel_id")
if cid == "" {
log.Fatal("no channel !")
}
if p.Url == "" {
p.Url = "https://www.youtube.com"
}
fullUrl, err := url.Parse(p.Url)
if err != nil {
panic(err)
}
channelUrl := fullUrl.JoinPath("channel", cid, "videos")
throttle := strconv.Itoa(p.Throttle)
args := []string{ args := []string{
"--no-progress", "--no-progress",
"--sleep-interval", throttle, "--paths", d.OutDir,
"--sleep-subtitles", throttle,
"--sleep-requests", throttle,
"--max-sleep-interval", "90",
"--prefer-free-formats",
"--write-subs",
"--no-write-automatic-subs",
"--sub-langs", "en",
"--paths", outdir,
"--download-archive", archive,
"--break-on-existing",
"--playlist-items", p.Range,
"--restrict-filenames", "--restrict-filenames",
"--embed-metadata",
"--write-thumbnail", "--write-thumbnail",
"--write-info-json", "--write-info-json",
"--match-filters", "!is_live & duration>?60",
"--convert-thumbnails", "jpg", "--convert-thumbnails", "jpg",
} }
if d.DryRun == true {
args = append(args, "--simulate")
log.Println("/!\\ DRY RUN ENABLED /!\\")
} else {
args = append(args, "--no-simulate")
}
if p.Verbose == true { if p.Verbose == true {
args = append(args, "--verbose") args = append(args, "--verbose")
} }
if d.DryRun == true {
log.Println("/!\\ DRY RUN ENABLED /!\\")
args = append(args, "--simulate")
} else {
args = append(args, "--no-simulate")
}
if d.Metadata == true {
log.Println("Downloading metadata")
mArgs := []string{
"--skip-download",
"--no-overwrites",
"--playlist-items", "0:0:1",
}
args = append(args, mArgs...)
} else {
log.Println("Downloading video")
archive := filepath.Join(d.OutDir, "archive.txt")
throttle := strconv.Itoa(p.Throttle)
dArgs := []string{
"--no-playlist",
"--sleep-interval", throttle,
"--sleep-subtitles", throttle,
"--sleep-requests", throttle,
"--max-sleep-interval", "90",
"--embed-metadata",
"--write-subs",
"--no-write-automatic-subs",
"--sub-langs", "en",
"--prefer-free-formats",
"--download-archive", archive,
"--break-on-existing",
"--match-filters", "!is_live & duration>?60",
}
args = append(args, dArgs...)
if p.Quality != "" {
args = append(args, "--format-sort", p.Quality)
} else {
args = append(args, "--format-sort", "res:1080")
}
}
if p.Cookies_file != "" { if p.Cookies_file != "" {
args = append(args, "--cookies") args = append(args, "--cookies")
args = append(args, p.Cookies_file) args = append(args, p.Cookies_file)
@ -95,11 +98,6 @@ func Youtube(d Download, p config.Provider) {
args = append(args, "--no-cookies") args = append(args, "--no-cookies")
} }
if p.After_date != "" {
args = append(args, "--dateafter")
args = append(args, p.After_date)
}
if p.Po_token != "" { if p.Po_token != "" {
args = append(args, "--extractor-args") args = append(args, "--extractor-args")
args = append(args, fmt.Sprintf("youtube:po_token=web.gvs+%s", p.Po_token)) args = append(args, fmt.Sprintf("youtube:po_token=web.gvs+%s", p.Po_token))
@ -115,21 +113,14 @@ func Youtube(d Download, p config.Provider) {
args = append(args, fmt.Sprintf("youtube:player_client=%s", p.Player_client)) args = append(args, fmt.Sprintf("youtube:player_client=%s", p.Player_client))
} }
args = append(args, "--format-sort")
if p.Quality != "" {
args = append(args, p.Quality)
} else {
args = append(args, "res:1080")
}
args = append(args, "--output")
if p.Output_path_template != "" { if p.Output_path_template != "" {
args = append(args, p.Output_path_template) args = append(args, "--output", p.Output_path_template)
} else { } else {
args = append(args, "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s-1080p.%(ext)s") args = append(args, "--output", "s%(upload_date>%Y)s/%(channel)s.s%(upload_date>%Y)Se%(upload_date>%m%d)S.%(title)s.%(id)s.%(ext)s")
} }
args = append(args, channelUrl.String()) args = append(args, vUrl.String())
cmd := exec.Command(p.Cmd, args...) cmd := exec.Command(p.Cmd, args...)
stdout, err := cmd.StdoutPipe() stdout, err := cmd.StdoutPipe()
@ -202,3 +193,22 @@ func Fetch(d Download) {
log.Printf("failed to write file") log.Printf("failed to write file")
} }
} }
func RssDownloader(url string) ([]byte, error) {
resp, err := http.Get(url)
if err != nil {
return nil, fmt.Errorf("failed to fetch RSS feed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to fetch RSS feed: %s", resp.Status)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read RSS data: %w", err)
}
return data, nil
}

View file

@ -1,4 +1,4 @@
package opml package format
import ( import (
"encoding/xml" "encoding/xml"
@ -22,7 +22,7 @@ type OPML struct {
Body `xml:"opml>body"` Body `xml:"opml>body"`
} }
func Load(path string) (OPML, error) { func OpmlLoad(path string) (OPML, error) {
data, err := os.ReadFile(path) data, err := os.ReadFile(path)
if err != nil { if err != nil {

89
internal/format/rss.go Normal file
View file

@ -0,0 +1,89 @@
package format
import (
"encoding/xml"
)
type Feed struct {
XMLName xml.Name `xml:"feed"`
Id string `xml:"id"`
ChannelId string `xml:"yt:channelId"`
Title string `xml:"title"`
Published string `xml:"published"`
Links []Link `xml:"link"`
Author Author `xml:"author"`
Entries []Entry `xml:"entry"`
}
type Link struct {
Rel string `xml:"rel,attr"`
Href string `xml:"href,attr"`
}
type Author struct {
XMLName xml.Name `xml:"author"`
Name string `xml:"name"`
Uri string `xml:"uri"`
}
type MediaContent struct {
URL string `xml:"url,attr"`
Type string `xml:"type,attr"`
Width string `xml:"width,attr"`
Height string `xml:"height,attr"`
}
type MediaThumbnail struct {
URL string `xml:"url,attr"`
Width string `xml:"width,attr"`
Height string `xml:"height,attr"`
}
type MediaStarRating struct {
Count string `xml:"count,attr"`
Average string `xml:"average,attr"`
Min string `xml:"min,attr"`
Max string `xml:"max,attr"`
}
type MediaStatistics struct {
Views string `xml:"views,attr"`
}
type MediaCommunity struct {
StarRating MediaStarRating `xml:"starRating"`
Statistics MediaStatistics `xml:"statistics"`
}
type MediaGroup struct {
Title string `xml:"title"`
Content MediaContent `xml:"content"`
Thumbnail MediaThumbnail `xml:"thumbnail"`
Description string `xml:"description"`
Community MediaCommunity `xml:"community"`
}
type Entry struct {
XMLName xml.Name `xml:"entry"`
Title string `xml:"title"`
Id string `xml:"id"`
VideoId string `xml:"videoId"`
ChannelId string `xml:"channelId"`
Link Link `xml:"link"`
Author Author `xml:"author"`
Published string `xml:"published"`
Updated string `xml:"updated"`
MediaGroup MediaGroup `xml:"group"`
}
func RssLoad(data []byte) (Feed, error) {
feed := Feed{}
err := xml.Unmarshal(data, &feed)
if err != nil {
return Feed{}, err
}
return feed, nil
}

View file

@ -7,7 +7,7 @@ import (
"strings" "strings"
"git.meatbag.se/varl/subsyt/internal/dl" "git.meatbag.se/varl/subsyt/internal/dl"
"git.meatbag.se/varl/subsyt/internal/models" "git.meatbag.se/varl/subsyt/internal/model"
) )
func episodeImage(path string) { func episodeImage(path string) {
@ -32,7 +32,7 @@ func showPoster(path string, show_dir string) {
} }
} }
func showBanner(show models.Show, showDir string) { func showBanner(show model.Show, showDir string) {
_, err := os.Stat(filepath.Join(showDir, "banner.jpg")) _, err := os.Stat(filepath.Join(showDir, "banner.jpg"))
if err == nil { if err == nil {
log.Printf("%s has a banner, skipping download\n", show.Title) log.Printf("%s has a banner, skipping download\n", show.Title)
@ -52,14 +52,14 @@ func showBanner(show models.Show, showDir string) {
} }
} }
func showFanart(show models.Show, showDir string) { func showFanart(show model.Show, showDir string) {
_, err := os.Stat(filepath.Join(showDir, "fanart.jpg")) _, err := os.Stat(filepath.Join(showDir, "fanart.jpg"))
if err == nil { if err == nil {
log.Printf("%s has fanart, skipping download\n", show.Title) log.Printf("%s has fanart, skipping download\n", show.Title)
return return
} }
c := models.Thumbnail{} c := model.Thumbnail{}
for index, thumb := range show.Thumbnails { for index, thumb := range show.Thumbnails {
log.Println(index, thumb) log.Println(index, thumb)
if thumb.Width > c.Width { if thumb.Width > c.Width {

View file

@ -8,7 +8,7 @@ import (
"regexp" "regexp"
"strings" "strings"
"git.meatbag.se/varl/subsyt/internal/models" "git.meatbag.se/varl/subsyt/internal/model"
"git.meatbag.se/varl/subsyt/internal/nfo" "git.meatbag.se/varl/subsyt/internal/nfo"
) )
@ -52,12 +52,12 @@ func Generate(outDir string, title string, dryRun bool) {
log.Println(index, path) log.Println(index, path)
switch { switch {
case show.MatchString(path): case show.MatchString(path):
show := models.LoadShow(path) show := model.LoadShow(path)
nfo.WriteShowInfo(show, filepath.Join(showDir, "tvshow.nfo")) nfo.WriteShowInfo(show, filepath.Join(showDir, "tvshow.nfo"))
showBanner(show, showDir) showBanner(show, showDir)
showFanart(show, showDir) showFanart(show, showDir)
case season.MatchString(path): case season.MatchString(path):
ep := models.LoadEpisode(path) ep := model.LoadEpisode(path)
nfo.WriteEpisodeNFO(ep, path) nfo.WriteEpisodeNFO(ep, path)
default: default:
log.Printf("no match for '%s'\n", path) log.Printf("no match for '%s'\n", path)

View file

@ -1,4 +1,4 @@
package models package model
import ( import (
"encoding/json" "encoding/json"

View file

@ -1,4 +1,4 @@
package models package model
import ( import (
"encoding/json" "encoding/json"

View file

@ -1,4 +1,4 @@
package models package model
type UniqueId struct { type UniqueId struct {
Text string `xml:",chardata"` Text string `xml:",chardata"`

View file

@ -6,10 +6,10 @@ import (
"os" "os"
"strings" "strings"
"git.meatbag.se/varl/subsyt/internal/models" "git.meatbag.se/varl/subsyt/internal/model"
) )
func WriteEpisodeNFO(ep models.Episode, info_path string) { func WriteEpisodeNFO(ep model.Episode, info_path string) {
out_path := strings.Replace(info_path, ".info.json", ".nfo", 1) out_path := strings.Replace(info_path, ".info.json", ".nfo", 1)
log.Printf("writing info from '%s' to '%s'\n", info_path, out_path) log.Printf("writing info from '%s' to '%s'\n", info_path, out_path)
@ -24,7 +24,7 @@ func WriteEpisodeNFO(ep models.Episode, info_path string) {
os.WriteFile(out_path, xmlData, 0644) os.WriteFile(out_path, xmlData, 0644)
} }
func WriteShowInfo(show models.Show, out_path string) { func WriteShowInfo(show model.Show, out_path string) {
log.Printf("writing info from '%v' to '%s'\n", show, out_path) log.Printf("writing info from '%v' to '%s'\n", show, out_path)
xmlData, err := xml.MarshalIndent(show, "", " ") xmlData, err := xml.MarshalIndent(show, "", " ")

42
main.go
View file

@ -2,21 +2,22 @@ package main
import ( import (
"flag" "flag"
"fmt"
"log" "log"
"os" "os"
"path/filepath" "path/filepath"
"git.meatbag.se/varl/subsyt/internal/config" "git.meatbag.se/varl/subsyt/internal/config"
"git.meatbag.se/varl/subsyt/internal/dl" "git.meatbag.se/varl/subsyt/internal/dl"
"git.meatbag.se/varl/subsyt/internal/format"
"git.meatbag.se/varl/subsyt/internal/metadata" "git.meatbag.se/varl/subsyt/internal/metadata"
"git.meatbag.se/varl/subsyt/internal/opml"
"git.meatbag.se/varl/subsyt/internal/scheduler" "git.meatbag.se/varl/subsyt/internal/scheduler"
) )
func run(cfg config.Config) { func run(cfg config.Config) {
provider := cfg.Provider["youtube"] provider := cfg.Provider["youtube"]
opml, err := opml.Load(provider.Opml_file) opml, err := format.OpmlLoad(provider.Opml_file)
if err != nil { if err != nil {
panic(err) panic(err)
} }
@ -25,13 +26,42 @@ func run(cfg config.Config) {
log.Printf("Archiving videos from OPML: %s\n", outlines.Title) log.Printf("Archiving videos from OPML: %s\n", outlines.Title)
for _, outline := range outlines.Outlines { for _, outline := range outlines.Outlines {
rssData, err := dl.RssDownloader(outline.XmlUrl)
if err != nil {
log.Printf("Failed to download RSS for %s: %v", outline.Title, err)
continue
}
feed, err := format.RssLoad(rssData)
if err != nil {
log.Printf("Failed to parse RSS for %s: %v", feed.Title, err)
continue
}
dl.Youtube(dl.Download{ dl.Youtube(dl.Download{
Url: outline.XmlUrl, Url: feed.Author.Uri,
OutDir: filepath.Join(cfg.Out_dir, outline.Title), OutDir: filepath.Join(cfg.Out_dir, outline.Title),
DryRun: cfg.Dry_run, DryRun: cfg.Dry_run,
Metadata: true,
}, provider) }, provider)
metadata.Generate(cfg.Out_dir, outline.Title, cfg.Dry_run) log.Printf("Downloaded RSS feed for %s with %d entries", feed.Title, len(feed.Entries))
for _, entry := range feed.Entries {
url := fmt.Sprintf("%s/watch?v=%s", provider.Url, entry.VideoId)
log.Printf("Entry: %#v", entry)
dl.Youtube(dl.Download{
Url: url,
OutDir: filepath.Join(cfg.Out_dir, feed.Title),
DryRun: cfg.Dry_run,
Metadata: false,
}, provider)
break
}
metadata.Generate(cfg.Out_dir, feed.Title, cfg.Dry_run)
} }
} }
} }