feat: youtube channel archiver

This commit is contained in:
Viktor Varland 2025-03-27 15:54:31 +01:00
commit 79be6e94f2
Signed by: varl
GPG key ID: 7459F0B410115EE8
7 changed files with 309 additions and 0 deletions

33
README.md Normal file
View file

@ -0,0 +1,33 @@
# subsyt
## config
`config.toml`:
```toml
out_dir = "./vids"
[provider]
[provider.youtube]
url = "https://www.youtube.com"
```
`subs-opml.xml`:
```xml
<?xml version="1.0"?>
<opml version="1.1">
<body>
<outline ...>
<outline .../>
<outline .../>
<outline .../>
</outline>
<outline ...>
<outline .../>
<outline .../>
<outline .../>
</outline>
</body>
</opml>
```

7
go.mod Normal file
View file

@ -0,0 +1,7 @@
module git.meatbag.se/varl/subsyt
go 1.24.1
require github.com/pelletier/go-toml/v2 v2.2.3
require github.com/bmatcuk/doublestar v1.3.4 // indirect

12
go.sum Normal file
View file

@ -0,0 +1,12 @@
github.com/bmatcuk/doublestar v1.3.4 h1:gPypJ5xD31uhX6Tf54sDPUOBXTqKH4c9aPY66CyQrS0=
github.com/bmatcuk/doublestar v1.3.4/go.mod h1:wiQtGV+rzVYxB7WIlirSN++5HPtPlXEo9MEoZQC/PmE=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M=
github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

59
internal/dl/yt.go Normal file
View file

@ -0,0 +1,59 @@
package dl
import (
"bufio"
"log"
"os/exec"
"sync"
)
func List(channelId string) {
cmd := exec.Command("yt-dlp",
"--skip-download",
"--force-write-archive",
"--download-archive", "test.txt",
channelId,
)
stdout, err := cmd.StdoutPipe()
if err != nil {
log.Fatal(err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
log.Fatal(err)
}
log.Printf("Running yt-dlp for: %s", d.Url)
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer wg.Done()
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
log.Printf("%s\n", scanner.Text())
}
}()
go func() {
defer wg.Done()
scanner := bufio.NewScanner(stderr)
for scanner.Scan() {
log.Printf("%s\n", scanner.Text())
}
}()
err = cmd.Start()
if err != nil {
log.Fatal(err)
}
wg.Wait()
err = cmd.Wait()
if err != nil {
log.Printf("Error: %s\n", err)
}
}

37
internal/opml/opml.go Normal file
View file

@ -0,0 +1,37 @@
package opml
import (
"encoding/xml"
"os"
)
type Outline struct {
Outlines []Outline `xml:"outline"`
Text string `xml:"text,attr"`
Title string `xml:"title,attr"`
Type string `xml:"type,attr"`
XmlUrl string `xml:"xmlUrl,attr"`
}
type Body struct {
Outline []Outline `xml:"body>outline"`
}
type OPML struct {
XMLName xml.Name `xml:"opml"`
Body `xml:"opml>body"`
}
func Unmarshal(path string) OPML {
data, err := os.ReadFile(path)
if err != nil {
panic(err)
}
opml := OPML{}
xml.Unmarshal(data, &opml)
return opml
}

150
main.go Normal file
View file

@ -0,0 +1,150 @@
package main
import (
"bufio"
"fmt"
"log"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"git.meatbag.se/varl/subsyt/internal/opml"
toml "github.com/pelletier/go-toml/v2"
)
func check(e error) {
if e != nil {
panic(e)
}
}
type Provider struct {
Url string
}
type Channel struct {
Name string
Id string
Provider string
}
type Config struct {
Out_dir string
Provider map[string]Provider
}
type Download struct {
Url *url.URL
OutDir string
AfterDate string
Name string
}
func dl(d Download) {
output := filepath.Join(d.OutDir, d.Name, "%(upload_date>%Y)s/%(upload_date)s-%(title)s-%(id)s.%(ext)s")
archive := filepath.Join(d.OutDir, d.Name, "archive.txt")
outdir := filepath.Join(d.OutDir, d.Name)
cmd := exec.Command("yt-dlp",
d.Url.String(),
"--no-simulate",
"--no-progress",
"--sleep-interval", "1",
"--sleep-subtitles", "1",
"--sleep-requests", "1",
"--prefer-free-formats",
"--write-subs",
"--no-write-automatic-subs",
"--sub-langs", "en",
"--dateafter", d.AfterDate,
"--paths", outdir,
"--output", output,
"--download-archive", archive,
"--break-on-existing",
)
stdout, err := cmd.StdoutPipe()
if err != nil {
log.Fatal(err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
log.Fatal(err)
}
log.Printf("Running yt-dlp for: %s", d.Url)
var wg sync.WaitGroup
wg.Add(2)
go func() {
defer wg.Done()
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
log.Printf("%s\n", scanner.Text())
}
}()
go func() {
defer wg.Done()
scanner := bufio.NewScanner(stderr)
for scanner.Scan() {
log.Printf("%s\n", scanner.Text())
}
}()
err = cmd.Start()
if err != nil {
log.Fatal(err)
}
wg.Wait()
err = cmd.Wait()
if err != nil {
log.Printf("Error: %s\n", err)
}
}
func main() {
data, err := os.ReadFile("./config.toml")
check(err)
cfg := Config{}
err = toml.Unmarshal(data, &cfg)
check(err)
pUrl := cfg.Provider["youtube"].Url
fmt.Printf("provider url: %s", pUrl)
opml := opml.Unmarshal("./subs-opml.xml")
fmt.Printf("XMLName: %#v\n", opml.XMLName)
for _, outlines := range opml.Body.Outline {
fmt.Printf("%s\n", outlines.Title)
for _, outline := range outlines.Outlines {
curl := strings.TrimPrefix(outline.XmlUrl, "/feed/")
furl, err := url.JoinPath(pUrl, curl)
check(err)
fmt.Printf("%s - %s\n", outline.Text, furl)
fullUrl, err := url.Parse(furl)
check(err)
dl(Download{
Name: outline.Title,
Url: fullUrl,
OutDir: cfg.Out_dir,
AfterDate: "20250326",
})
}
}
}

11
shell.nix Normal file
View file

@ -0,0 +1,11 @@
with (import <nixpkgs> {});
mkShell {
buildInputs = [
yt-dlp
];
shellHook = ''
# empty
'';
}