Queueing, prepping to fetch

A lot more to go, but this is the core.

Need to think about how to test the queue handlers.
This commit is contained in:
Matt Jadud
2025-11-30 21:29:30 -05:00
parent 06cdc68be7
commit f53639af2f
14 changed files with 258 additions and 105 deletions

78
cmd/api/init.go Normal file
View File

@@ -0,0 +1,78 @@
package main
import (
"context"
"database/sql"
"fmt"
"log"
"os"
"time"
"git.jadud.com/jadudm/grosbeak/internal/domain64"
"git.jadud.com/jadudm/grosbeak/internal/engine"
liteq "git.jadud.com/jadudm/grosbeak/internal/liteq"
"git.jadud.com/jadudm/grosbeak/internal/types"
_ "modernc.org/sqlite"
)
func setupDB() *sql.DB {
// FIXME: This path needs to come from the env.
db, err := sql.Open("sqlite", "grosbeak.sqlite")
db.SetMaxOpenConns(1)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
return db
}
func runQ(queue *liteq.JobQueue, queueName string, worker types.QueueWorker) {
for {
log.Printf("runQ %s\n", queueName)
err := queue.Consume(context.Background(), liteq.ConsumeParams{
Queue: queueName,
PoolSize: 3,
VisibilityTimeout: 20,
Worker: worker,
})
if err != nil {
log.Printf("runQ/%s: %s", queueName, err.Error())
time.Sleep(2 * time.Second)
}
time.Sleep(1 * time.Second)
}
}
func setupLiteQ(db *sql.DB, d64m *domain64.Domain64Map) *liteq.JobQueue {
liteq.Setup(db)
queue := liteq.New(db)
// The queue processes as long as this context is not cancelled.
log.Println("setting up worker queues...")
queues := []struct {
queueName string
worker types.QueueWorker
}{
{"fetch", engine.Fetch(d64m)},
}
for _, q := range queues {
go runQ(queue, q.queueName, q.worker)
}
return queue
}
func setupDomain64Map(db *sql.DB) *domain64.Domain64Map {
d64m, err := domain64.NewDomain64Map()
if err != nil {
log.Printf("newdomain64map err: %s", err.Error())
os.Exit(1)
}
d64m.Setup(db)
return d64m
}

View File

@@ -1,107 +1,37 @@
package main
import (
"context"
"database/sql"
"fmt"
"log"
"math/rand"
"os"
"sync"
"time"
_ "modernc.org/sqlite"
liteq "git.jadud.com/jadudm/grosbeak/internal/liteq"
"git.jadud.com/jadudm/grosbeak/internal/engine"
base "git.jadud.com/jadudm/grosbeak/internal/types"
)
type queueWorker func(ctx context.Context, job *liteq.Job) error
func Fetch(ctx context.Context, job *liteq.Job) error {
n := rand.Intn(50)
time.Sleep(time.Duration(n) * time.Millisecond)
log.Println("Fetching", job.Job)
return nil
}
func runQ(queue *liteq.JobQueue, queueName string, worker queueWorker) {
for {
err := queue.Consume(context.Background(), liteq.ConsumeParams{
Queue: queueName,
PoolSize: 3,
VisibilityTimeout: 20,
Worker: worker,
})
if err != nil {
log.Printf("runQ/%s: %w", queueName, err.Error())
time.Sleep(2 * time.Second)
}
time.Sleep(1 * time.Second)
}
}
func Entre(queue *liteq.JobQueue, chUrl <-chan string) {
ctx := context.Background()
for {
url := <-chUrl
n := time.Now()
ignore_tag := fmt.Sprintf("%s:%d:%d", url, n.Year(), n.YearDay())
log.Println("entre", url, ignore_tag)
// Don't duplicate jobs on the same day of the year.
err := queue.QueueJob(ctx, liteq.QueueJobParams{
Queue: "fetch",
// This only works for things in the `queued` state
DedupingKey: liteq.IgnoreDuplicate(ignore_tag),
Job: url,
})
if err != nil {
log.Println("entre err", err.Error())
}
}
}
func setupLiteQ() *liteq.JobQueue {
// FIXME: This path needs to come from the env.
liteqDB, err := sql.Open("sqlite", "liteq.db")
liteqDB.SetMaxOpenConns(1)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
liteq.Setup(liteqDB)
queue := liteq.New(liteqDB)
// The queue processes as long as this context is not cancelled.
log.Println("Setting up worker queues...")
queues := []struct {
queueName string
worker queueWorker
}{
{"fetch", Fetch},
}
for _, q := range queues {
go runQ(queue, q.queueName, q.worker)
}
return queue
}
func main() {
// Don't let `main()` exit
wg := &sync.WaitGroup{}
wg.Add(1)
queue := setupLiteQ()
db := setupDB()
d64m := setupDomain64Map(db)
queue := setupLiteQ(db, d64m)
// Create the network for the search engine.
chUrl := make(chan string)
// Enqueue URLs
urls := []struct {
url string
uf base.UpdateFrequency
}{
{"https://jadud.com/", base.UPDATE_DAILY},
{"https://berea.us/", base.UPDATE_WEEKLY},
}
go Entre(queue, chUrl)
for range 5 {
chUrl <- "https://jadud.com/"
chUrl <- "https://berea.us/"
for _, u := range urls {
engine.Entre(queue, &base.EntreJob{URL: u.url, UpdateFrequency: u.uf})
time.Sleep(1 * time.Second)
}
// Don't exit.