Domain64 and DB backing tested

This commit is contained in:
Matt Jadud
2025-11-30 14:06:55 -05:00
parent f72c6b020f
commit 460f2734ef
11 changed files with 407 additions and 82 deletions

View File

@@ -1,8 +1,9 @@
clean:
rm -rf db
generate: clean
sqlc generate
cd sqlc ; rm -f test.sqlite
cd sqlc ; rm -f *.go
test:
generate: clean
cd sqlc ; sqlc generate
test: generate
go test *.go

View File

@@ -1,9 +1,14 @@
package domain64
import (
"context"
"database/sql"
"fmt"
"log"
"slices"
"sync"
sqlc "git.jadud.com/grosbeak/internal/domain64/sqlc"
"github.com/jpillora/go-tld"
)
@@ -14,13 +19,6 @@ type Domain64Map struct {
Flushed bool
}
// Just use the D64 values.
// TLD :: 0-255
// TLD.Domain :: FF:FFFFFF or some range of ints
// TLD.DOMAIN.SUBDOMAIN :: FF:FFFFFF:FF (bigger ints)
// now it is just a map
// https://stackoverflow.com/questions/40568759/sqlite-query-integer-field-based-on-a-bit
func NewDomain64Map(db *sql.DB) (*Domain64Map, error) {
d64m := &Domain64Map{}
d64m.DB = db
@@ -29,12 +27,144 @@ func NewDomain64Map(db *sql.DB) (*Domain64Map, error) {
return d64m, nil
}
func (d64m *Domain64Map) Insert(url string) {
_, err := tld.Parse(url)
if err != nil {
// TODO
panic(err)
func (d64m *Domain64Map) URLToRFQDN(url *tld.URL) string {
s := ""
if url.TLD != "" {
s += url.TLD
}
// If we have this TLD, return the value for it.
if url.Domain != "" {
s += "." + url.Domain
}
if url.Subdomain != "" {
s += "." + url.Subdomain
}
s += url.Path
return s
}
func _get_or_insert_tld(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error {
ctx := context.Background()
tld_id, err := queries.GetTLDId(ctx, url.TLD)
if err != nil {
cnt, err := queries.CountTLDs(ctx)
if err != nil {
return err
}
d64.TLD = cnt + 1
err = queries.InsertTLD(ctx, sqlc.InsertTLDParams{TldID: d64.TLD, Tld: url.TLD})
if err != nil {
return err
}
} else {
d64.TLD = tld_id
}
return nil
}
func _get_or_insert_domain(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error {
ctx := context.Background()
domain_id, err := queries.GetDomainId(ctx, sqlc.GetDomainIdParams{TldID: d64.TLD, Domain: url.Domain})
if err != nil {
cnt, err := queries.CountDomains(ctx, d64.TLD)
if err != nil {
return err
}
d64.Domain = cnt + 1
err = queries.InsertDomain(ctx, sqlc.InsertDomainParams{TldID: d64.TLD, DomainID: d64.Domain, Domain: url.Domain})
if err != nil {
return err
}
} else {
d64.Domain = domain_id
}
return nil
}
func _get_or_insert_subdomain(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error {
ctx := context.Background()
subdomain_id, err := queries.GetSubdomainId(ctx, sqlc.GetSubdomainIdParams{
TldID: int64(d64.TLD), DomainID: int64(d64.Domain), Subdomain: url.Subdomain,
})
if err != nil {
if url.Subdomain == "" {
d64.Subdomain = 0
} else {
cnt, err := queries.CountSubdomains(ctx, sqlc.CountSubdomainsParams{TldID: d64.TLD, DomainID: d64.Domain})
if err != nil {
return err
}
d64.Subdomain = cnt + 1
err = queries.InsertSubdomain(ctx, sqlc.InsertSubdomainParams{TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain, Subdomain: url.Subdomain})
if err != nil {
return err
}
}
} else {
d64.Subdomain = subdomain_id
}
return nil
}
func _get_or_insert_path(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error {
ctx := context.Background()
log.Println(url, url.Path)
path_id, err := queries.GetPathId(ctx, sqlc.GetPathIdParams{
TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain, Path: url.Path,
})
if err != nil {
if url.Path == "/" {
d64.Path = 0
} else {
cnt, err := queries.CountPaths(ctx, sqlc.CountPathsParams{TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain})
if err != nil {
return err
}
d64.Path = cnt + 1
err = queries.InsertPath(ctx, sqlc.InsertPathParams{TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain, PathID: d64.Path, Path: url.Path})
if err != nil {
return err
}
}
} else {
d64.Path = path_id
}
return nil
}
// FIXME: This feels like a very convoluted way to maintain the domain names.
// However, I also need to maintain uniqueness. Can I do this in one table?
func (d64m *Domain64Map) URLToDomain64(url *tld.URL) (*Domain64, error) {
allowed_schemes := []string{"https"}
if !slices.Contains(allowed_schemes, url.Scheme) {
return nil, fmt.Errorf("URL scheme must be in %q; given %s", allowed_schemes, url.Scheme)
}
d64 := &Domain64{}
queries := sqlc.New(d64m.DB)
// These manipulate both the DB and the Domain64 struct
err := _get_or_insert_tld(queries, d64, url)
if err != nil {
return nil, err
}
err = _get_or_insert_domain(queries, d64, url)
if err != nil {
return nil, err
}
err = _get_or_insert_subdomain(queries, d64, url)
if err != nil {
return nil, err
}
err = _get_or_insert_path(queries, d64, url)
if err != nil {
return nil, err
}
return d64, nil
}
func NullInt64(i int64) sql.NullInt64 {
return sql.NullInt64{Int64: i, Valid: true}
}
func NullString(s string) sql.NullString {
return sql.NullString{String: s, Valid: true}
}

View File

@@ -6,15 +6,17 @@ import (
_ "embed"
"testing"
"github.com/jpillora/go-tld"
_ "modernc.org/sqlite"
)
//go:embed schema.sql
//go:embed sqlc/schema.sql
var ddl string
func setup() *sql.DB {
ctx := context.Background()
// db, err := sql.Open("sqlite", "sqlc/test.sqlite")
db, err := sql.Open("sqlite", ":memory:")
if err != nil {
// TODO
@@ -39,3 +41,99 @@ func TestNewDomain64Map(t *testing.T) {
t.Error("DB should not be nil")
}
}
func TestURLToRFQDN(t *testing.T) {
M, _ := NewDomain64Map(nil)
simple, _ := tld.Parse("https://jadud.com/")
rfqdn := M.URLToRFQDN(simple)
if rfqdn != "com.jadud/" {
t.Errorf("Expected `com.jadud/`, got %s", rfqdn)
}
}
func TestURLToDomain64(t *testing.T) {
db := setup()
M, _ := NewDomain64Map(db)
simple, _ := tld.Parse("https://jadud.com/")
d64, _ := M.URLToDomain64(simple)
if d64.TLD != 1 {
t.Errorf("expected TLD == 1, got %d", d64.TLD)
}
if d64.Domain != 1 {
t.Errorf("expected domain == 1, got %d", d64.Domain)
}
}
func TestURLToDomain64_02(t *testing.T) {
db := setup()
M, _ := NewDomain64Map(db)
simple1, _ := tld.Parse("https://jadud.com/")
simple2, _ := tld.Parse("https://another.com/")
d64_1, _ := M.URLToDomain64(simple1)
d64_2, _ := M.URLToDomain64(simple2)
if d64_1.TLD != 1 {
t.Errorf("expected TLD == 1, got %d", d64_1.TLD)
}
if d64_1.Domain != 1 {
t.Errorf("expected domain == 1, got %d", d64_1.Domain)
}
if d64_2.TLD != 1 {
t.Errorf("expected TLD == 1, got %d", d64_2.TLD)
}
if d64_2.Domain != 2 {
t.Errorf("expected domain == 2, got %d", d64_2.Domain)
}
}
func TestURLToDomain64_03(t *testing.T) {
db := setup()
M, _ := NewDomain64Map(db)
var tests = []struct {
url string
tld int64
domain int64
subdomain int64
path int64
d64 int64
}{
{"https://jadud.com/", 1, 1, 0, 0, 0x0100000100000000},
{"https://research.jadud.com/", 1, 1, 1, 0, 0x0100000101000000},
{"https://teaching.jadud.com/", 1, 1, 2, 0, 0x0100000102000000},
{"https://teaching.jadud.com/classes", 1, 1, 2, 1, 0x0100000102000001},
{"https://teaching.jadud.com/other-classes", 1, 1, 2, 2, 0x0100000102000002},
{"https://research.jadud.com/papers", 1, 1, 1, 1, 0x0100000101000001},
{"https://research.jadud.com/experiments", 1, 1, 1, 2, 0x0100000101000002},
{"https://teaching.another.com/classes", 1, 2, 1, 1, 0x0100000201000001},
{"https://teaching.jadud.org/classes", 2, 1, 1, 1, 0x0200000101000001},
// The ordering here matters; if we see a "bare" domain after first seeing the
// subdomain, I expect the numbering to come out right. That is, subdomain <- 0 and
// path <- 0. That is because of "" and "/" checking on subdomain and path, respectively.
{"https://jadud.org/", 2, 1, 0, 0, 0x0200000100000000},
}
for _, tt := range tests {
parsed, _ := tld.Parse(tt.url)
d64, _ := M.URLToDomain64(parsed)
if d64.TLD != tt.tld {
t.Errorf("%s TLD expected %d given %d", tt.url, tt.tld, d64.TLD)
}
if d64.Domain != tt.domain {
t.Errorf("%s Domain expected %d given %d", tt.url, tt.domain, d64.Domain)
}
if d64.Subdomain != tt.subdomain {
t.Errorf("%s Subdomain expected %d given %d", tt.url, tt.subdomain, d64.Subdomain)
}
if d64.Path != tt.path {
t.Errorf("%s Path expected %d given %d", tt.url, tt.path, d64.Path)
}
if d64.ToInt64() != tt.d64 {
t.Errorf("%s int64 value expected %d given %d", tt.url, tt.d64, d64.ToInt64())
}
}
}

View File

@@ -1,31 +0,0 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.30.0
package domain64
import (
"context"
"database/sql"
)
type DBTX interface {
ExecContext(context.Context, string, ...interface{}) (sql.Result, error)
PrepareContext(context.Context, string) (*sql.Stmt, error)
QueryContext(context.Context, string, ...interface{}) (*sql.Rows, error)
QueryRowContext(context.Context, string, ...interface{}) *sql.Row
}
func New(db DBTX) *Queries {
return &Queries{db: db}
}
type Queries struct {
db DBTX
}
func (q *Queries) WithTx(tx *sql.Tx) *Queries {
return &Queries{
db: tx,
}
}

View File

@@ -2,13 +2,13 @@ package domain64
type Domain64 struct {
// The TLD is FF
TLD uint8
TLD int64
// The Domain is FFFFFF, so the uint16 is the closest we'll get
Domain uint16
Domain int64
// Subdomains are FF
Subdomain uint8
Subdomain int64
// Paths are, again, FFFFFF
Path uint16
Path int64
}
/*
@@ -42,9 +42,9 @@ func (d64 Domain64) ToInt64() int64 {
func IntToDomain64(i int64) Domain64 {
d64 := Domain64{}
d64.TLD = uint8((i & MASK_TLD) >> SHIFT_TLD)
d64.Domain = uint16((i & MASK_DOMAIN) >> SHIFT_DOMAIN)
d64.Subdomain = uint8((i & MASK_SUBDOMAIN) >> SHIFT_SUBDOMAIN)
d64.Path = uint16(i & MASK_PATH)
d64.TLD = (i & MASK_TLD) >> SHIFT_TLD
d64.Domain = (i & MASK_DOMAIN) >> SHIFT_DOMAIN
d64.Subdomain = (i & MASK_SUBDOMAIN) >> SHIFT_SUBDOMAIN
d64.Path = i & MASK_PATH
return d64
}

View File

@@ -1,12 +0,0 @@
-- name: InsertDomain64 :exec
INSERT INTO d64
(url, d64, tld_id, domain_id, subdomain_id, path_id)
VALUES
(?, ?, ?, ?, ?, ?);
-- name: GetTLD :one
SELECT id from domain64
WHERE tld_id = ?
-- name: CountTLD :one
SELECT COUNT(*) FROM domain64 WHERE tld_id = ?;

View File

@@ -1,9 +0,0 @@
CREATE TABLE domain64 (
id INT PRIMARY KEY,
url TEXT,
d64 BIGINT,
tld_id INT,
domain_id INT,
subdomain_id INT,
path_id INT
)

2
internal/domain64/sqlc/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
test.sqlite
*.go

View File

@@ -0,0 +1,107 @@
-- name: InsertIntoDomain64 :one
INSERT INTO domain64
(rfqdn, d64)
VALUES
(?, ?)
RETURNING d64
;
-- If you forget a semicolon, the previous
-- query gets flagged as a duplicate.
-- https://github.com/sqlc-dev/sqlc/issues/3851
-- name: GetTLDId :one
SELECT tld_id FROM tlds
WHERE tld = ?
;
-- name: CountTLDs :one
SELECT COUNT(*) FROM tlds;
-- name: InsertTLD :exec
INSERT INTO tlds
(tld_id, tld)
VALUES
(?, ?)
;
-- name: GetDomainId :one
SELECT domain_id FROM domains
WHERE
tld_id = ?
AND
domain = ?
;
-- name: CountDomains :one
SELECT COUNT(DISTINCT domain_id)
FROM domains
WHERE
tld_id = ?
;
-- name: InsertDomain :exec
INSERT INTO domains
(tld_id, domain_id, domain)
VALUES
(?, ?, ?)
;
-- name: GetSubdomainId :one
SELECT subdomain_id FROM subdomains
WHERE
tld_id = ?
AND
domain_id = ?
AND
subdomain = ?
;
-- name: CountSubdomains :one
SELECT COUNT(DISTINCT subdomain_id)
FROM subdomains
WHERE
tld_id = ?
AND
domain_id = ?
;
-- name: InsertSubdomain :exec
INSERT INTO subdomains
(tld_id, domain_id, subdomain_id, subdomain)
VALUES
(?, ?, ?, ?)
;
-- name: GetPathId :one
SELECT path_id FROM paths
WHERE
tld_id = ?
AND
domain_id = ?
AND
subdomain_id = ?
AND
path = ?
;
-- name: CountPaths :one
SELECT COUNT(DISTINCT path_id)
FROM paths
WHERE
tld_id = ?
AND
domain_id = ?
AND
subdomain_id = ?
;
-- name: InsertPath :exec
INSERT INTO paths
(tld_id, domain_id, subdomain_id, path_id, path)
VALUES
(?, ?, ?, ?, ?)
;
-- -- name: CountTLD :one
-- SELECT COUNT(DISTINCT tld_id) FROM domain64 WHERE tld_id = ?;

View File

@@ -0,0 +1,39 @@
CREATE TABLE IF NOT EXISTS tlds (
tld_id INTEGER PRIMARY KEY NOT NULL,
tld TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS domains (
id INTEGER PRIMARY KEY,
tld_id INTEGER NOT NULL REFERENCES tlds(tld_id),
domain_id INTEGER NOT NULL,
domain TEXT NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS ndx_domains_uniq ON domains(tld_id, domain_id);
CREATE TABLE IF NOT EXISTS subdomains (
id INTEGER PRIMARY KEY,
tld_id INTEGER NOT NULL REFERENCES tlds(tld_id),
domain_id INTEGER NOT NULL REFERENCES domains(domain_id),
subdomain_id INTEGER NOT NULL,
subdomain TEXT NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS ndx_subdomains_uniq ON subdomains(tld_id, domain_id, subdomain_id);
CREATE TABLE IF NOT EXISTS paths (
id INTEGER PRIMARY KEY,
tld_id INTEGER NOT NULL REFERENCES tlds(tld_id),
domain_id INTEGER NOT NULL REFERENCES domains(domain_id),
subdomain_id INTEGER NOT NULL REFERENCES subdomains(subdomain_id),
path_id INTEGER NOT NULL,
path TEXT NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS ndx_paths_uniq ON paths(tld_id, domain_id, subdomain_id, path_id);
CREATE TABLE IF NOT EXISTS domain64 (
id INTEGER PRIMARY KEY AUTOINCREMENT,
rfqdn TEXT,
d64 BIGINT UNIQUE NOT NULL
);

View File

@@ -5,5 +5,5 @@ sql:
schema: "schema.sql"
gen:
go:
package: "db"
out: "db"
package: "test_db"
out: "."