From 460f2734efb37739d24ee7d42e265aaf6e01d7d6 Mon Sep 17 00:00:00 2001 From: Matt Jadud Date: Sun, 30 Nov 2025 14:06:55 -0500 Subject: [PATCH] Domain64 and DB backing tested --- internal/domain64/Makefile | 11 +- internal/domain64/database.go | 158 ++++++++++++++++++++++--- internal/domain64/database_test.go | 100 +++++++++++++++- internal/domain64/db.go | 31 ----- internal/domain64/domain64.go | 16 +-- internal/domain64/query.sql | 12 -- internal/domain64/schema.sql | 9 -- internal/domain64/sqlc/.gitignore | 2 + internal/domain64/sqlc/query.sql | 107 +++++++++++++++++ internal/domain64/sqlc/schema.sql | 39 ++++++ internal/domain64/{ => sqlc}/sqlc.yaml | 4 +- 11 files changed, 407 insertions(+), 82 deletions(-) delete mode 100644 internal/domain64/db.go delete mode 100644 internal/domain64/query.sql delete mode 100644 internal/domain64/schema.sql create mode 100644 internal/domain64/sqlc/.gitignore create mode 100644 internal/domain64/sqlc/query.sql create mode 100644 internal/domain64/sqlc/schema.sql rename internal/domain64/{ => sqlc}/sqlc.yaml (71%) diff --git a/internal/domain64/Makefile b/internal/domain64/Makefile index 5b431fc..98afadd 100644 --- a/internal/domain64/Makefile +++ b/internal/domain64/Makefile @@ -1,8 +1,9 @@ clean: - rm -rf db - -generate: clean - sqlc generate + cd sqlc ; rm -f test.sqlite + cd sqlc ; rm -f *.go -test: +generate: clean + cd sqlc ; sqlc generate + +test: generate go test *.go \ No newline at end of file diff --git a/internal/domain64/database.go b/internal/domain64/database.go index a317615..4f4850c 100644 --- a/internal/domain64/database.go +++ b/internal/domain64/database.go @@ -1,9 +1,14 @@ package domain64 import ( + "context" "database/sql" + "fmt" + "log" + "slices" "sync" + sqlc "git.jadud.com/grosbeak/internal/domain64/sqlc" "github.com/jpillora/go-tld" ) @@ -14,13 +19,6 @@ type Domain64Map struct { Flushed bool } -// Just use the D64 values. -// TLD :: 0-255 -// TLD.Domain :: FF:FFFFFF or some range of ints -// TLD.DOMAIN.SUBDOMAIN :: FF:FFFFFF:FF (bigger ints) -// now it is just a map -// https://stackoverflow.com/questions/40568759/sqlite-query-integer-field-based-on-a-bit - func NewDomain64Map(db *sql.DB) (*Domain64Map, error) { d64m := &Domain64Map{} d64m.DB = db @@ -29,12 +27,144 @@ func NewDomain64Map(db *sql.DB) (*Domain64Map, error) { return d64m, nil } -func (d64m *Domain64Map) Insert(url string) { - _, err := tld.Parse(url) - if err != nil { - // TODO - panic(err) +func (d64m *Domain64Map) URLToRFQDN(url *tld.URL) string { + s := "" + if url.TLD != "" { + s += url.TLD } - // If we have this TLD, return the value for it. - + if url.Domain != "" { + s += "." + url.Domain + } + if url.Subdomain != "" { + s += "." + url.Subdomain + } + s += url.Path + return s +} + +func _get_or_insert_tld(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error { + ctx := context.Background() + tld_id, err := queries.GetTLDId(ctx, url.TLD) + if err != nil { + cnt, err := queries.CountTLDs(ctx) + if err != nil { + return err + } + d64.TLD = cnt + 1 + err = queries.InsertTLD(ctx, sqlc.InsertTLDParams{TldID: d64.TLD, Tld: url.TLD}) + if err != nil { + return err + } + } else { + d64.TLD = tld_id + } + return nil +} + +func _get_or_insert_domain(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error { + ctx := context.Background() + domain_id, err := queries.GetDomainId(ctx, sqlc.GetDomainIdParams{TldID: d64.TLD, Domain: url.Domain}) + if err != nil { + cnt, err := queries.CountDomains(ctx, d64.TLD) + if err != nil { + return err + } + d64.Domain = cnt + 1 + err = queries.InsertDomain(ctx, sqlc.InsertDomainParams{TldID: d64.TLD, DomainID: d64.Domain, Domain: url.Domain}) + if err != nil { + return err + } + } else { + d64.Domain = domain_id + } + return nil +} + +func _get_or_insert_subdomain(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error { + ctx := context.Background() + subdomain_id, err := queries.GetSubdomainId(ctx, sqlc.GetSubdomainIdParams{ + TldID: int64(d64.TLD), DomainID: int64(d64.Domain), Subdomain: url.Subdomain, + }) + if err != nil { + if url.Subdomain == "" { + d64.Subdomain = 0 + } else { + cnt, err := queries.CountSubdomains(ctx, sqlc.CountSubdomainsParams{TldID: d64.TLD, DomainID: d64.Domain}) + if err != nil { + return err + } + d64.Subdomain = cnt + 1 + err = queries.InsertSubdomain(ctx, sqlc.InsertSubdomainParams{TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain, Subdomain: url.Subdomain}) + if err != nil { + return err + } + } + } else { + d64.Subdomain = subdomain_id + } + return nil +} + +func _get_or_insert_path(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error { + ctx := context.Background() + log.Println(url, url.Path) + path_id, err := queries.GetPathId(ctx, sqlc.GetPathIdParams{ + TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain, Path: url.Path, + }) + if err != nil { + if url.Path == "/" { + d64.Path = 0 + } else { + cnt, err := queries.CountPaths(ctx, sqlc.CountPathsParams{TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain}) + if err != nil { + return err + } + d64.Path = cnt + 1 + err = queries.InsertPath(ctx, sqlc.InsertPathParams{TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain, PathID: d64.Path, Path: url.Path}) + if err != nil { + return err + } + } + } else { + d64.Path = path_id + } + return nil +} + +// FIXME: This feels like a very convoluted way to maintain the domain names. +// However, I also need to maintain uniqueness. Can I do this in one table? +func (d64m *Domain64Map) URLToDomain64(url *tld.URL) (*Domain64, error) { + allowed_schemes := []string{"https"} + if !slices.Contains(allowed_schemes, url.Scheme) { + return nil, fmt.Errorf("URL scheme must be in %q; given %s", allowed_schemes, url.Scheme) + } + + d64 := &Domain64{} + queries := sqlc.New(d64m.DB) + // These manipulate both the DB and the Domain64 struct + err := _get_or_insert_tld(queries, d64, url) + if err != nil { + return nil, err + } + err = _get_or_insert_domain(queries, d64, url) + if err != nil { + return nil, err + } + err = _get_or_insert_subdomain(queries, d64, url) + if err != nil { + return nil, err + } + err = _get_or_insert_path(queries, d64, url) + if err != nil { + return nil, err + } + return d64, nil +} + +func NullInt64(i int64) sql.NullInt64 { + return sql.NullInt64{Int64: i, Valid: true} +} + +func NullString(s string) sql.NullString { + return sql.NullString{String: s, Valid: true} } diff --git a/internal/domain64/database_test.go b/internal/domain64/database_test.go index 2a9a39d..57740be 100644 --- a/internal/domain64/database_test.go +++ b/internal/domain64/database_test.go @@ -6,15 +6,17 @@ import ( _ "embed" "testing" + "github.com/jpillora/go-tld" _ "modernc.org/sqlite" ) -//go:embed schema.sql +//go:embed sqlc/schema.sql var ddl string func setup() *sql.DB { ctx := context.Background() + // db, err := sql.Open("sqlite", "sqlc/test.sqlite") db, err := sql.Open("sqlite", ":memory:") if err != nil { // TODO @@ -39,3 +41,99 @@ func TestNewDomain64Map(t *testing.T) { t.Error("DB should not be nil") } } + +func TestURLToRFQDN(t *testing.T) { + M, _ := NewDomain64Map(nil) + simple, _ := tld.Parse("https://jadud.com/") + rfqdn := M.URLToRFQDN(simple) + if rfqdn != "com.jadud/" { + t.Errorf("Expected `com.jadud/`, got %s", rfqdn) + } +} + +func TestURLToDomain64(t *testing.T) { + db := setup() + M, _ := NewDomain64Map(db) + simple, _ := tld.Parse("https://jadud.com/") + d64, _ := M.URLToDomain64(simple) + if d64.TLD != 1 { + t.Errorf("expected TLD == 1, got %d", d64.TLD) + } + + if d64.Domain != 1 { + t.Errorf("expected domain == 1, got %d", d64.Domain) + } +} + +func TestURLToDomain64_02(t *testing.T) { + db := setup() + M, _ := NewDomain64Map(db) + simple1, _ := tld.Parse("https://jadud.com/") + simple2, _ := tld.Parse("https://another.com/") + d64_1, _ := M.URLToDomain64(simple1) + d64_2, _ := M.URLToDomain64(simple2) + + if d64_1.TLD != 1 { + t.Errorf("expected TLD == 1, got %d", d64_1.TLD) + } + + if d64_1.Domain != 1 { + t.Errorf("expected domain == 1, got %d", d64_1.Domain) + } + + if d64_2.TLD != 1 { + t.Errorf("expected TLD == 1, got %d", d64_2.TLD) + } + + if d64_2.Domain != 2 { + t.Errorf("expected domain == 2, got %d", d64_2.Domain) + } + +} + +func TestURLToDomain64_03(t *testing.T) { + db := setup() + M, _ := NewDomain64Map(db) + var tests = []struct { + url string + tld int64 + domain int64 + subdomain int64 + path int64 + d64 int64 + }{ + {"https://jadud.com/", 1, 1, 0, 0, 0x0100000100000000}, + {"https://research.jadud.com/", 1, 1, 1, 0, 0x0100000101000000}, + {"https://teaching.jadud.com/", 1, 1, 2, 0, 0x0100000102000000}, + {"https://teaching.jadud.com/classes", 1, 1, 2, 1, 0x0100000102000001}, + {"https://teaching.jadud.com/other-classes", 1, 1, 2, 2, 0x0100000102000002}, + {"https://research.jadud.com/papers", 1, 1, 1, 1, 0x0100000101000001}, + {"https://research.jadud.com/experiments", 1, 1, 1, 2, 0x0100000101000002}, + {"https://teaching.another.com/classes", 1, 2, 1, 1, 0x0100000201000001}, + {"https://teaching.jadud.org/classes", 2, 1, 1, 1, 0x0200000101000001}, + // The ordering here matters; if we see a "bare" domain after first seeing the + // subdomain, I expect the numbering to come out right. That is, subdomain <- 0 and + // path <- 0. That is because of "" and "/" checking on subdomain and path, respectively. + {"https://jadud.org/", 2, 1, 0, 0, 0x0200000100000000}, + } + + for _, tt := range tests { + parsed, _ := tld.Parse(tt.url) + d64, _ := M.URLToDomain64(parsed) + if d64.TLD != tt.tld { + t.Errorf("%s TLD expected %d given %d", tt.url, tt.tld, d64.TLD) + } + if d64.Domain != tt.domain { + t.Errorf("%s Domain expected %d given %d", tt.url, tt.domain, d64.Domain) + } + if d64.Subdomain != tt.subdomain { + t.Errorf("%s Subdomain expected %d given %d", tt.url, tt.subdomain, d64.Subdomain) + } + if d64.Path != tt.path { + t.Errorf("%s Path expected %d given %d", tt.url, tt.path, d64.Path) + } + if d64.ToInt64() != tt.d64 { + t.Errorf("%s int64 value expected %d given %d", tt.url, tt.d64, d64.ToInt64()) + } + } +} diff --git a/internal/domain64/db.go b/internal/domain64/db.go deleted file mode 100644 index 84ac1cb..0000000 --- a/internal/domain64/db.go +++ /dev/null @@ -1,31 +0,0 @@ -// Code generated by sqlc. DO NOT EDIT. -// versions: -// sqlc v1.30.0 - -package domain64 - -import ( - "context" - "database/sql" -) - -type DBTX interface { - ExecContext(context.Context, string, ...interface{}) (sql.Result, error) - PrepareContext(context.Context, string) (*sql.Stmt, error) - QueryContext(context.Context, string, ...interface{}) (*sql.Rows, error) - QueryRowContext(context.Context, string, ...interface{}) *sql.Row -} - -func New(db DBTX) *Queries { - return &Queries{db: db} -} - -type Queries struct { - db DBTX -} - -func (q *Queries) WithTx(tx *sql.Tx) *Queries { - return &Queries{ - db: tx, - } -} diff --git a/internal/domain64/domain64.go b/internal/domain64/domain64.go index 448acc4..608bfe8 100644 --- a/internal/domain64/domain64.go +++ b/internal/domain64/domain64.go @@ -2,13 +2,13 @@ package domain64 type Domain64 struct { // The TLD is FF - TLD uint8 + TLD int64 // The Domain is FFFFFF, so the uint16 is the closest we'll get - Domain uint16 + Domain int64 // Subdomains are FF - Subdomain uint8 + Subdomain int64 // Paths are, again, FFFFFF - Path uint16 + Path int64 } /* @@ -42,9 +42,9 @@ func (d64 Domain64) ToInt64() int64 { func IntToDomain64(i int64) Domain64 { d64 := Domain64{} - d64.TLD = uint8((i & MASK_TLD) >> SHIFT_TLD) - d64.Domain = uint16((i & MASK_DOMAIN) >> SHIFT_DOMAIN) - d64.Subdomain = uint8((i & MASK_SUBDOMAIN) >> SHIFT_SUBDOMAIN) - d64.Path = uint16(i & MASK_PATH) + d64.TLD = (i & MASK_TLD) >> SHIFT_TLD + d64.Domain = (i & MASK_DOMAIN) >> SHIFT_DOMAIN + d64.Subdomain = (i & MASK_SUBDOMAIN) >> SHIFT_SUBDOMAIN + d64.Path = i & MASK_PATH return d64 } diff --git a/internal/domain64/query.sql b/internal/domain64/query.sql deleted file mode 100644 index 1175a43..0000000 --- a/internal/domain64/query.sql +++ /dev/null @@ -1,12 +0,0 @@ --- name: InsertDomain64 :exec -INSERT INTO d64 - (url, d64, tld_id, domain_id, subdomain_id, path_id) - VALUES - (?, ?, ?, ?, ?, ?); - --- name: GetTLD :one -SELECT id from domain64 - WHERE tld_id = ? - --- name: CountTLD :one -SELECT COUNT(*) FROM domain64 WHERE tld_id = ?; \ No newline at end of file diff --git a/internal/domain64/schema.sql b/internal/domain64/schema.sql deleted file mode 100644 index 386093c..0000000 --- a/internal/domain64/schema.sql +++ /dev/null @@ -1,9 +0,0 @@ -CREATE TABLE domain64 ( - id INT PRIMARY KEY, - url TEXT, - d64 BIGINT, - tld_id INT, - domain_id INT, - subdomain_id INT, - path_id INT -) diff --git a/internal/domain64/sqlc/.gitignore b/internal/domain64/sqlc/.gitignore new file mode 100644 index 0000000..c50cfb6 --- /dev/null +++ b/internal/domain64/sqlc/.gitignore @@ -0,0 +1,2 @@ +test.sqlite +*.go \ No newline at end of file diff --git a/internal/domain64/sqlc/query.sql b/internal/domain64/sqlc/query.sql new file mode 100644 index 0000000..de1193c --- /dev/null +++ b/internal/domain64/sqlc/query.sql @@ -0,0 +1,107 @@ +-- name: InsertIntoDomain64 :one +INSERT INTO domain64 + (rfqdn, d64) + VALUES + (?, ?) + RETURNING d64 +; + +-- If you forget a semicolon, the previous +-- query gets flagged as a duplicate. +-- https://github.com/sqlc-dev/sqlc/issues/3851 + +-- name: GetTLDId :one +SELECT tld_id FROM tlds + WHERE tld = ? +; + +-- name: CountTLDs :one +SELECT COUNT(*) FROM tlds; + +-- name: InsertTLD :exec +INSERT INTO tlds + (tld_id, tld) + VALUES + (?, ?) +; + +-- name: GetDomainId :one +SELECT domain_id FROM domains + WHERE + tld_id = ? + AND + domain = ? +; + +-- name: CountDomains :one +SELECT COUNT(DISTINCT domain_id) + FROM domains + WHERE + tld_id = ? +; + +-- name: InsertDomain :exec +INSERT INTO domains + (tld_id, domain_id, domain) + VALUES + (?, ?, ?) +; + +-- name: GetSubdomainId :one +SELECT subdomain_id FROM subdomains + WHERE + tld_id = ? + AND + domain_id = ? + AND + subdomain = ? +; + +-- name: CountSubdomains :one +SELECT COUNT(DISTINCT subdomain_id) + FROM subdomains + WHERE + tld_id = ? + AND + domain_id = ? +; + +-- name: InsertSubdomain :exec +INSERT INTO subdomains + (tld_id, domain_id, subdomain_id, subdomain) + VALUES + (?, ?, ?, ?) +; + +-- name: GetPathId :one +SELECT path_id FROM paths + WHERE + tld_id = ? + AND + domain_id = ? + AND + subdomain_id = ? + AND + path = ? +; + +-- name: CountPaths :one +SELECT COUNT(DISTINCT path_id) + FROM paths + WHERE + tld_id = ? + AND + domain_id = ? + AND + subdomain_id = ? +; + +-- name: InsertPath :exec +INSERT INTO paths + (tld_id, domain_id, subdomain_id, path_id, path) + VALUES + (?, ?, ?, ?, ?) +; + +-- -- name: CountTLD :one +-- SELECT COUNT(DISTINCT tld_id) FROM domain64 WHERE tld_id = ?; \ No newline at end of file diff --git a/internal/domain64/sqlc/schema.sql b/internal/domain64/sqlc/schema.sql new file mode 100644 index 0000000..648abf2 --- /dev/null +++ b/internal/domain64/sqlc/schema.sql @@ -0,0 +1,39 @@ +CREATE TABLE IF NOT EXISTS tlds ( + tld_id INTEGER PRIMARY KEY NOT NULL, + tld TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS domains ( + id INTEGER PRIMARY KEY, + tld_id INTEGER NOT NULL REFERENCES tlds(tld_id), + domain_id INTEGER NOT NULL, + domain TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS ndx_domains_uniq ON domains(tld_id, domain_id); + + +CREATE TABLE IF NOT EXISTS subdomains ( + id INTEGER PRIMARY KEY, + tld_id INTEGER NOT NULL REFERENCES tlds(tld_id), + domain_id INTEGER NOT NULL REFERENCES domains(domain_id), + subdomain_id INTEGER NOT NULL, + subdomain TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS ndx_subdomains_uniq ON subdomains(tld_id, domain_id, subdomain_id); + +CREATE TABLE IF NOT EXISTS paths ( + id INTEGER PRIMARY KEY, + tld_id INTEGER NOT NULL REFERENCES tlds(tld_id), + domain_id INTEGER NOT NULL REFERENCES domains(domain_id), + subdomain_id INTEGER NOT NULL REFERENCES subdomains(subdomain_id), + path_id INTEGER NOT NULL, + path TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS ndx_paths_uniq ON paths(tld_id, domain_id, subdomain_id, path_id); + + +CREATE TABLE IF NOT EXISTS domain64 ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + rfqdn TEXT, + d64 BIGINT UNIQUE NOT NULL +); diff --git a/internal/domain64/sqlc.yaml b/internal/domain64/sqlc/sqlc.yaml similarity index 71% rename from internal/domain64/sqlc.yaml rename to internal/domain64/sqlc/sqlc.yaml index 1a8db1e..8045b80 100644 --- a/internal/domain64/sqlc.yaml +++ b/internal/domain64/sqlc/sqlc.yaml @@ -5,5 +5,5 @@ sql: schema: "schema.sql" gen: go: - package: "db" - out: "db" \ No newline at end of file + package: "test_db" + out: "." \ No newline at end of file