From 79be6e94f231777774639a54059aa4db64c17c5f Mon Sep 17 00:00:00 2001 From: Viktor Varland Date: Thu, 27 Mar 2025 15:54:31 +0100 Subject: [PATCH] feat: youtube channel archiver --- README.md | 33 ++++++++++ go.mod | 7 ++ go.sum | 12 ++++ internal/dl/yt.go | 59 +++++++++++++++++ internal/opml/opml.go | 37 +++++++++++ main.go | 150 ++++++++++++++++++++++++++++++++++++++++++ shell.nix | 11 ++++ 7 files changed, 309 insertions(+) create mode 100644 README.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/dl/yt.go create mode 100644 internal/opml/opml.go create mode 100644 main.go create mode 100644 shell.nix diff --git a/README.md b/README.md new file mode 100644 index 0000000..5098c97 --- /dev/null +++ b/README.md @@ -0,0 +1,33 @@ +# subsyt + +## config + +`config.toml`: + +```toml +out_dir = "./vids" + +[provider] +[provider.youtube] +url = "https://www.youtube.com" +``` + +`subs-opml.xml`: + +```xml + + + + + + + + + + + + + + + +``` diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..c5acaa6 --- /dev/null +++ b/go.mod @@ -0,0 +1,7 @@ +module git.meatbag.se/varl/subsyt + +go 1.24.1 + +require github.com/pelletier/go-toml/v2 v2.2.3 + +require github.com/bmatcuk/doublestar v1.3.4 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..1bcdaec --- /dev/null +++ b/go.sum @@ -0,0 +1,12 @@ +github.com/bmatcuk/doublestar v1.3.4 h1:gPypJ5xD31uhX6Tf54sDPUOBXTqKH4c9aPY66CyQrS0= +github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= +github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/dl/yt.go b/internal/dl/yt.go new file mode 100644 index 0000000..3030358 --- /dev/null +++ b/internal/dl/yt.go @@ -0,0 +1,59 @@ +package dl + +import ( + "bufio" + "log" + "os/exec" + "sync" +) + +func List(channelId string) { + cmd := exec.Command("yt-dlp", + "--skip-download", + "--force-write-archive", + "--download-archive", "test.txt", + channelId, + ) + + stdout, err := cmd.StdoutPipe() + if err != nil { + log.Fatal(err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + log.Fatal(err) + } + + log.Printf("Running yt-dlp for: %s", d.Url) + + var wg sync.WaitGroup + wg.Add(2) + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + log.Printf("%s\n", scanner.Text()) + } + }() + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(stderr) + for scanner.Scan() { + log.Printf("%s\n", scanner.Text()) + } + }() + + err = cmd.Start() + if err != nil { + log.Fatal(err) + } + + wg.Wait() + err = cmd.Wait() + + if err != nil { + log.Printf("Error: %s\n", err) + } +} diff --git a/internal/opml/opml.go b/internal/opml/opml.go new file mode 100644 index 0000000..792431c --- /dev/null +++ b/internal/opml/opml.go @@ -0,0 +1,37 @@ +package opml + +import ( + "encoding/xml" + "os" +) + +type Outline struct { + Outlines []Outline `xml:"outline"` + Text string `xml:"text,attr"` + Title string `xml:"title,attr"` + Type string `xml:"type,attr"` + XmlUrl string `xml:"xmlUrl,attr"` +} + +type Body struct { + Outline []Outline `xml:"body>outline"` +} + +type OPML struct { + XMLName xml.Name `xml:"opml"` + Body `xml:"opml>body"` +} + +func Unmarshal(path string) OPML { + data, err := os.ReadFile(path) + + if err != nil { + panic(err) + } + + opml := OPML{} + + xml.Unmarshal(data, &opml) + + return opml +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..66fef59 --- /dev/null +++ b/main.go @@ -0,0 +1,150 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" + + "git.meatbag.se/varl/subsyt/internal/opml" + toml "github.com/pelletier/go-toml/v2" +) + +func check(e error) { + if e != nil { + panic(e) + } +} + +type Provider struct { + Url string +} + +type Channel struct { + Name string + Id string + Provider string +} + +type Config struct { + Out_dir string + Provider map[string]Provider +} + +type Download struct { + Url *url.URL + OutDir string + AfterDate string + Name string +} + +func dl(d Download) { + + output := filepath.Join(d.OutDir, d.Name, "%(upload_date>%Y)s/%(upload_date)s-%(title)s-%(id)s.%(ext)s") + archive := filepath.Join(d.OutDir, d.Name, "archive.txt") + outdir := filepath.Join(d.OutDir, d.Name) + + cmd := exec.Command("yt-dlp", + d.Url.String(), + "--no-simulate", + "--no-progress", + "--sleep-interval", "1", + "--sleep-subtitles", "1", + "--sleep-requests", "1", + "--prefer-free-formats", + "--write-subs", + "--no-write-automatic-subs", + "--sub-langs", "en", + "--dateafter", d.AfterDate, + "--paths", outdir, + "--output", output, + "--download-archive", archive, + "--break-on-existing", + ) + + stdout, err := cmd.StdoutPipe() + if err != nil { + log.Fatal(err) + } + stderr, err := cmd.StderrPipe() + if err != nil { + log.Fatal(err) + } + + log.Printf("Running yt-dlp for: %s", d.Url) + + var wg sync.WaitGroup + wg.Add(2) + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + log.Printf("%s\n", scanner.Text()) + } + }() + + go func() { + defer wg.Done() + scanner := bufio.NewScanner(stderr) + for scanner.Scan() { + log.Printf("%s\n", scanner.Text()) + } + }() + + err = cmd.Start() + if err != nil { + log.Fatal(err) + } + + wg.Wait() + err = cmd.Wait() + + if err != nil { + log.Printf("Error: %s\n", err) + } +} + +func main() { + data, err := os.ReadFile("./config.toml") + check(err) + + cfg := Config{} + + err = toml.Unmarshal(data, &cfg) + check(err) + + pUrl := cfg.Provider["youtube"].Url + + fmt.Printf("provider url: %s", pUrl) + + opml := opml.Unmarshal("./subs-opml.xml") + + fmt.Printf("XMLName: %#v\n", opml.XMLName) + + for _, outlines := range opml.Body.Outline { + fmt.Printf("%s\n", outlines.Title) + + for _, outline := range outlines.Outlines { + curl := strings.TrimPrefix(outline.XmlUrl, "/feed/") + furl, err := url.JoinPath(pUrl, curl) + check(err) + + fmt.Printf("%s - %s\n", outline.Text, furl) + fullUrl, err := url.Parse(furl) + check(err) + + dl(Download{ + Name: outline.Title, + Url: fullUrl, + OutDir: cfg.Out_dir, + AfterDate: "20250326", + }) + } + } +} diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..4d64004 --- /dev/null +++ b/shell.nix @@ -0,0 +1,11 @@ +with (import {}); + +mkShell { + buildInputs = [ + yt-dlp + ]; + + shellHook = '' + # empty + ''; +}