Domain64 and DB backing tested
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
clean:
|
||||
rm -rf db
|
||||
|
||||
generate: clean
|
||||
sqlc generate
|
||||
cd sqlc ; rm -f test.sqlite
|
||||
cd sqlc ; rm -f *.go
|
||||
|
||||
test:
|
||||
generate: clean
|
||||
cd sqlc ; sqlc generate
|
||||
|
||||
test: generate
|
||||
go test *.go
|
||||
@@ -1,9 +1,14 @@
|
||||
package domain64
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"log"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
sqlc "git.jadud.com/grosbeak/internal/domain64/sqlc"
|
||||
"github.com/jpillora/go-tld"
|
||||
)
|
||||
|
||||
@@ -14,13 +19,6 @@ type Domain64Map struct {
|
||||
Flushed bool
|
||||
}
|
||||
|
||||
// Just use the D64 values.
|
||||
// TLD :: 0-255
|
||||
// TLD.Domain :: FF:FFFFFF or some range of ints
|
||||
// TLD.DOMAIN.SUBDOMAIN :: FF:FFFFFF:FF (bigger ints)
|
||||
// now it is just a map
|
||||
// https://stackoverflow.com/questions/40568759/sqlite-query-integer-field-based-on-a-bit
|
||||
|
||||
func NewDomain64Map(db *sql.DB) (*Domain64Map, error) {
|
||||
d64m := &Domain64Map{}
|
||||
d64m.DB = db
|
||||
@@ -29,12 +27,144 @@ func NewDomain64Map(db *sql.DB) (*Domain64Map, error) {
|
||||
return d64m, nil
|
||||
}
|
||||
|
||||
func (d64m *Domain64Map) Insert(url string) {
|
||||
_, err := tld.Parse(url)
|
||||
if err != nil {
|
||||
// TODO
|
||||
panic(err)
|
||||
func (d64m *Domain64Map) URLToRFQDN(url *tld.URL) string {
|
||||
s := ""
|
||||
if url.TLD != "" {
|
||||
s += url.TLD
|
||||
}
|
||||
// If we have this TLD, return the value for it.
|
||||
|
||||
if url.Domain != "" {
|
||||
s += "." + url.Domain
|
||||
}
|
||||
if url.Subdomain != "" {
|
||||
s += "." + url.Subdomain
|
||||
}
|
||||
s += url.Path
|
||||
return s
|
||||
}
|
||||
|
||||
func _get_or_insert_tld(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error {
|
||||
ctx := context.Background()
|
||||
tld_id, err := queries.GetTLDId(ctx, url.TLD)
|
||||
if err != nil {
|
||||
cnt, err := queries.CountTLDs(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d64.TLD = cnt + 1
|
||||
err = queries.InsertTLD(ctx, sqlc.InsertTLDParams{TldID: d64.TLD, Tld: url.TLD})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
d64.TLD = tld_id
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func _get_or_insert_domain(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error {
|
||||
ctx := context.Background()
|
||||
domain_id, err := queries.GetDomainId(ctx, sqlc.GetDomainIdParams{TldID: d64.TLD, Domain: url.Domain})
|
||||
if err != nil {
|
||||
cnt, err := queries.CountDomains(ctx, d64.TLD)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d64.Domain = cnt + 1
|
||||
err = queries.InsertDomain(ctx, sqlc.InsertDomainParams{TldID: d64.TLD, DomainID: d64.Domain, Domain: url.Domain})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
d64.Domain = domain_id
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func _get_or_insert_subdomain(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error {
|
||||
ctx := context.Background()
|
||||
subdomain_id, err := queries.GetSubdomainId(ctx, sqlc.GetSubdomainIdParams{
|
||||
TldID: int64(d64.TLD), DomainID: int64(d64.Domain), Subdomain: url.Subdomain,
|
||||
})
|
||||
if err != nil {
|
||||
if url.Subdomain == "" {
|
||||
d64.Subdomain = 0
|
||||
} else {
|
||||
cnt, err := queries.CountSubdomains(ctx, sqlc.CountSubdomainsParams{TldID: d64.TLD, DomainID: d64.Domain})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d64.Subdomain = cnt + 1
|
||||
err = queries.InsertSubdomain(ctx, sqlc.InsertSubdomainParams{TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain, Subdomain: url.Subdomain})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
d64.Subdomain = subdomain_id
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func _get_or_insert_path(queries *sqlc.Queries, d64 *Domain64, url *tld.URL) error {
|
||||
ctx := context.Background()
|
||||
log.Println(url, url.Path)
|
||||
path_id, err := queries.GetPathId(ctx, sqlc.GetPathIdParams{
|
||||
TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain, Path: url.Path,
|
||||
})
|
||||
if err != nil {
|
||||
if url.Path == "/" {
|
||||
d64.Path = 0
|
||||
} else {
|
||||
cnt, err := queries.CountPaths(ctx, sqlc.CountPathsParams{TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d64.Path = cnt + 1
|
||||
err = queries.InsertPath(ctx, sqlc.InsertPathParams{TldID: d64.TLD, DomainID: d64.Domain, SubdomainID: d64.Subdomain, PathID: d64.Path, Path: url.Path})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
d64.Path = path_id
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// FIXME: This feels like a very convoluted way to maintain the domain names.
|
||||
// However, I also need to maintain uniqueness. Can I do this in one table?
|
||||
func (d64m *Domain64Map) URLToDomain64(url *tld.URL) (*Domain64, error) {
|
||||
allowed_schemes := []string{"https"}
|
||||
if !slices.Contains(allowed_schemes, url.Scheme) {
|
||||
return nil, fmt.Errorf("URL scheme must be in %q; given %s", allowed_schemes, url.Scheme)
|
||||
}
|
||||
|
||||
d64 := &Domain64{}
|
||||
queries := sqlc.New(d64m.DB)
|
||||
// These manipulate both the DB and the Domain64 struct
|
||||
err := _get_or_insert_tld(queries, d64, url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = _get_or_insert_domain(queries, d64, url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = _get_or_insert_subdomain(queries, d64, url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
err = _get_or_insert_path(queries, d64, url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return d64, nil
|
||||
}
|
||||
|
||||
func NullInt64(i int64) sql.NullInt64 {
|
||||
return sql.NullInt64{Int64: i, Valid: true}
|
||||
}
|
||||
|
||||
func NullString(s string) sql.NullString {
|
||||
return sql.NullString{String: s, Valid: true}
|
||||
}
|
||||
|
||||
@@ -6,15 +6,17 @@ import (
|
||||
_ "embed"
|
||||
"testing"
|
||||
|
||||
"github.com/jpillora/go-tld"
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
//go:embed schema.sql
|
||||
//go:embed sqlc/schema.sql
|
||||
var ddl string
|
||||
|
||||
func setup() *sql.DB {
|
||||
ctx := context.Background()
|
||||
|
||||
// db, err := sql.Open("sqlite", "sqlc/test.sqlite")
|
||||
db, err := sql.Open("sqlite", ":memory:")
|
||||
if err != nil {
|
||||
// TODO
|
||||
@@ -39,3 +41,99 @@ func TestNewDomain64Map(t *testing.T) {
|
||||
t.Error("DB should not be nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestURLToRFQDN(t *testing.T) {
|
||||
M, _ := NewDomain64Map(nil)
|
||||
simple, _ := tld.Parse("https://jadud.com/")
|
||||
rfqdn := M.URLToRFQDN(simple)
|
||||
if rfqdn != "com.jadud/" {
|
||||
t.Errorf("Expected `com.jadud/`, got %s", rfqdn)
|
||||
}
|
||||
}
|
||||
|
||||
func TestURLToDomain64(t *testing.T) {
|
||||
db := setup()
|
||||
M, _ := NewDomain64Map(db)
|
||||
simple, _ := tld.Parse("https://jadud.com/")
|
||||
d64, _ := M.URLToDomain64(simple)
|
||||
if d64.TLD != 1 {
|
||||
t.Errorf("expected TLD == 1, got %d", d64.TLD)
|
||||
}
|
||||
|
||||
if d64.Domain != 1 {
|
||||
t.Errorf("expected domain == 1, got %d", d64.Domain)
|
||||
}
|
||||
}
|
||||
|
||||
func TestURLToDomain64_02(t *testing.T) {
|
||||
db := setup()
|
||||
M, _ := NewDomain64Map(db)
|
||||
simple1, _ := tld.Parse("https://jadud.com/")
|
||||
simple2, _ := tld.Parse("https://another.com/")
|
||||
d64_1, _ := M.URLToDomain64(simple1)
|
||||
d64_2, _ := M.URLToDomain64(simple2)
|
||||
|
||||
if d64_1.TLD != 1 {
|
||||
t.Errorf("expected TLD == 1, got %d", d64_1.TLD)
|
||||
}
|
||||
|
||||
if d64_1.Domain != 1 {
|
||||
t.Errorf("expected domain == 1, got %d", d64_1.Domain)
|
||||
}
|
||||
|
||||
if d64_2.TLD != 1 {
|
||||
t.Errorf("expected TLD == 1, got %d", d64_2.TLD)
|
||||
}
|
||||
|
||||
if d64_2.Domain != 2 {
|
||||
t.Errorf("expected domain == 2, got %d", d64_2.Domain)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestURLToDomain64_03(t *testing.T) {
|
||||
db := setup()
|
||||
M, _ := NewDomain64Map(db)
|
||||
var tests = []struct {
|
||||
url string
|
||||
tld int64
|
||||
domain int64
|
||||
subdomain int64
|
||||
path int64
|
||||
d64 int64
|
||||
}{
|
||||
{"https://jadud.com/", 1, 1, 0, 0, 0x0100000100000000},
|
||||
{"https://research.jadud.com/", 1, 1, 1, 0, 0x0100000101000000},
|
||||
{"https://teaching.jadud.com/", 1, 1, 2, 0, 0x0100000102000000},
|
||||
{"https://teaching.jadud.com/classes", 1, 1, 2, 1, 0x0100000102000001},
|
||||
{"https://teaching.jadud.com/other-classes", 1, 1, 2, 2, 0x0100000102000002},
|
||||
{"https://research.jadud.com/papers", 1, 1, 1, 1, 0x0100000101000001},
|
||||
{"https://research.jadud.com/experiments", 1, 1, 1, 2, 0x0100000101000002},
|
||||
{"https://teaching.another.com/classes", 1, 2, 1, 1, 0x0100000201000001},
|
||||
{"https://teaching.jadud.org/classes", 2, 1, 1, 1, 0x0200000101000001},
|
||||
// The ordering here matters; if we see a "bare" domain after first seeing the
|
||||
// subdomain, I expect the numbering to come out right. That is, subdomain <- 0 and
|
||||
// path <- 0. That is because of "" and "/" checking on subdomain and path, respectively.
|
||||
{"https://jadud.org/", 2, 1, 0, 0, 0x0200000100000000},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
parsed, _ := tld.Parse(tt.url)
|
||||
d64, _ := M.URLToDomain64(parsed)
|
||||
if d64.TLD != tt.tld {
|
||||
t.Errorf("%s TLD expected %d given %d", tt.url, tt.tld, d64.TLD)
|
||||
}
|
||||
if d64.Domain != tt.domain {
|
||||
t.Errorf("%s Domain expected %d given %d", tt.url, tt.domain, d64.Domain)
|
||||
}
|
||||
if d64.Subdomain != tt.subdomain {
|
||||
t.Errorf("%s Subdomain expected %d given %d", tt.url, tt.subdomain, d64.Subdomain)
|
||||
}
|
||||
if d64.Path != tt.path {
|
||||
t.Errorf("%s Path expected %d given %d", tt.url, tt.path, d64.Path)
|
||||
}
|
||||
if d64.ToInt64() != tt.d64 {
|
||||
t.Errorf("%s int64 value expected %d given %d", tt.url, tt.d64, d64.ToInt64())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,31 +0,0 @@
|
||||
// Code generated by sqlc. DO NOT EDIT.
|
||||
// versions:
|
||||
// sqlc v1.30.0
|
||||
|
||||
package domain64
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
)
|
||||
|
||||
type DBTX interface {
|
||||
ExecContext(context.Context, string, ...interface{}) (sql.Result, error)
|
||||
PrepareContext(context.Context, string) (*sql.Stmt, error)
|
||||
QueryContext(context.Context, string, ...interface{}) (*sql.Rows, error)
|
||||
QueryRowContext(context.Context, string, ...interface{}) *sql.Row
|
||||
}
|
||||
|
||||
func New(db DBTX) *Queries {
|
||||
return &Queries{db: db}
|
||||
}
|
||||
|
||||
type Queries struct {
|
||||
db DBTX
|
||||
}
|
||||
|
||||
func (q *Queries) WithTx(tx *sql.Tx) *Queries {
|
||||
return &Queries{
|
||||
db: tx,
|
||||
}
|
||||
}
|
||||
@@ -2,13 +2,13 @@ package domain64
|
||||
|
||||
type Domain64 struct {
|
||||
// The TLD is FF
|
||||
TLD uint8
|
||||
TLD int64
|
||||
// The Domain is FFFFFF, so the uint16 is the closest we'll get
|
||||
Domain uint16
|
||||
Domain int64
|
||||
// Subdomains are FF
|
||||
Subdomain uint8
|
||||
Subdomain int64
|
||||
// Paths are, again, FFFFFF
|
||||
Path uint16
|
||||
Path int64
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -42,9 +42,9 @@ func (d64 Domain64) ToInt64() int64 {
|
||||
func IntToDomain64(i int64) Domain64 {
|
||||
d64 := Domain64{}
|
||||
|
||||
d64.TLD = uint8((i & MASK_TLD) >> SHIFT_TLD)
|
||||
d64.Domain = uint16((i & MASK_DOMAIN) >> SHIFT_DOMAIN)
|
||||
d64.Subdomain = uint8((i & MASK_SUBDOMAIN) >> SHIFT_SUBDOMAIN)
|
||||
d64.Path = uint16(i & MASK_PATH)
|
||||
d64.TLD = (i & MASK_TLD) >> SHIFT_TLD
|
||||
d64.Domain = (i & MASK_DOMAIN) >> SHIFT_DOMAIN
|
||||
d64.Subdomain = (i & MASK_SUBDOMAIN) >> SHIFT_SUBDOMAIN
|
||||
d64.Path = i & MASK_PATH
|
||||
return d64
|
||||
}
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
-- name: InsertDomain64 :exec
|
||||
INSERT INTO d64
|
||||
(url, d64, tld_id, domain_id, subdomain_id, path_id)
|
||||
VALUES
|
||||
(?, ?, ?, ?, ?, ?);
|
||||
|
||||
-- name: GetTLD :one
|
||||
SELECT id from domain64
|
||||
WHERE tld_id = ?
|
||||
|
||||
-- name: CountTLD :one
|
||||
SELECT COUNT(*) FROM domain64 WHERE tld_id = ?;
|
||||
@@ -1,9 +0,0 @@
|
||||
CREATE TABLE domain64 (
|
||||
id INT PRIMARY KEY,
|
||||
url TEXT,
|
||||
d64 BIGINT,
|
||||
tld_id INT,
|
||||
domain_id INT,
|
||||
subdomain_id INT,
|
||||
path_id INT
|
||||
)
|
||||
2
internal/domain64/sqlc/.gitignore
vendored
Normal file
2
internal/domain64/sqlc/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
test.sqlite
|
||||
*.go
|
||||
107
internal/domain64/sqlc/query.sql
Normal file
107
internal/domain64/sqlc/query.sql
Normal file
@@ -0,0 +1,107 @@
|
||||
-- name: InsertIntoDomain64 :one
|
||||
INSERT INTO domain64
|
||||
(rfqdn, d64)
|
||||
VALUES
|
||||
(?, ?)
|
||||
RETURNING d64
|
||||
;
|
||||
|
||||
-- If you forget a semicolon, the previous
|
||||
-- query gets flagged as a duplicate.
|
||||
-- https://github.com/sqlc-dev/sqlc/issues/3851
|
||||
|
||||
-- name: GetTLDId :one
|
||||
SELECT tld_id FROM tlds
|
||||
WHERE tld = ?
|
||||
;
|
||||
|
||||
-- name: CountTLDs :one
|
||||
SELECT COUNT(*) FROM tlds;
|
||||
|
||||
-- name: InsertTLD :exec
|
||||
INSERT INTO tlds
|
||||
(tld_id, tld)
|
||||
VALUES
|
||||
(?, ?)
|
||||
;
|
||||
|
||||
-- name: GetDomainId :one
|
||||
SELECT domain_id FROM domains
|
||||
WHERE
|
||||
tld_id = ?
|
||||
AND
|
||||
domain = ?
|
||||
;
|
||||
|
||||
-- name: CountDomains :one
|
||||
SELECT COUNT(DISTINCT domain_id)
|
||||
FROM domains
|
||||
WHERE
|
||||
tld_id = ?
|
||||
;
|
||||
|
||||
-- name: InsertDomain :exec
|
||||
INSERT INTO domains
|
||||
(tld_id, domain_id, domain)
|
||||
VALUES
|
||||
(?, ?, ?)
|
||||
;
|
||||
|
||||
-- name: GetSubdomainId :one
|
||||
SELECT subdomain_id FROM subdomains
|
||||
WHERE
|
||||
tld_id = ?
|
||||
AND
|
||||
domain_id = ?
|
||||
AND
|
||||
subdomain = ?
|
||||
;
|
||||
|
||||
-- name: CountSubdomains :one
|
||||
SELECT COUNT(DISTINCT subdomain_id)
|
||||
FROM subdomains
|
||||
WHERE
|
||||
tld_id = ?
|
||||
AND
|
||||
domain_id = ?
|
||||
;
|
||||
|
||||
-- name: InsertSubdomain :exec
|
||||
INSERT INTO subdomains
|
||||
(tld_id, domain_id, subdomain_id, subdomain)
|
||||
VALUES
|
||||
(?, ?, ?, ?)
|
||||
;
|
||||
|
||||
-- name: GetPathId :one
|
||||
SELECT path_id FROM paths
|
||||
WHERE
|
||||
tld_id = ?
|
||||
AND
|
||||
domain_id = ?
|
||||
AND
|
||||
subdomain_id = ?
|
||||
AND
|
||||
path = ?
|
||||
;
|
||||
|
||||
-- name: CountPaths :one
|
||||
SELECT COUNT(DISTINCT path_id)
|
||||
FROM paths
|
||||
WHERE
|
||||
tld_id = ?
|
||||
AND
|
||||
domain_id = ?
|
||||
AND
|
||||
subdomain_id = ?
|
||||
;
|
||||
|
||||
-- name: InsertPath :exec
|
||||
INSERT INTO paths
|
||||
(tld_id, domain_id, subdomain_id, path_id, path)
|
||||
VALUES
|
||||
(?, ?, ?, ?, ?)
|
||||
;
|
||||
|
||||
-- -- name: CountTLD :one
|
||||
-- SELECT COUNT(DISTINCT tld_id) FROM domain64 WHERE tld_id = ?;
|
||||
39
internal/domain64/sqlc/schema.sql
Normal file
39
internal/domain64/sqlc/schema.sql
Normal file
@@ -0,0 +1,39 @@
|
||||
CREATE TABLE IF NOT EXISTS tlds (
|
||||
tld_id INTEGER PRIMARY KEY NOT NULL,
|
||||
tld TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS domains (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tld_id INTEGER NOT NULL REFERENCES tlds(tld_id),
|
||||
domain_id INTEGER NOT NULL,
|
||||
domain TEXT NOT NULL
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS ndx_domains_uniq ON domains(tld_id, domain_id);
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS subdomains (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tld_id INTEGER NOT NULL REFERENCES tlds(tld_id),
|
||||
domain_id INTEGER NOT NULL REFERENCES domains(domain_id),
|
||||
subdomain_id INTEGER NOT NULL,
|
||||
subdomain TEXT NOT NULL
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS ndx_subdomains_uniq ON subdomains(tld_id, domain_id, subdomain_id);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS paths (
|
||||
id INTEGER PRIMARY KEY,
|
||||
tld_id INTEGER NOT NULL REFERENCES tlds(tld_id),
|
||||
domain_id INTEGER NOT NULL REFERENCES domains(domain_id),
|
||||
subdomain_id INTEGER NOT NULL REFERENCES subdomains(subdomain_id),
|
||||
path_id INTEGER NOT NULL,
|
||||
path TEXT NOT NULL
|
||||
);
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS ndx_paths_uniq ON paths(tld_id, domain_id, subdomain_id, path_id);
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS domain64 (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
rfqdn TEXT,
|
||||
d64 BIGINT UNIQUE NOT NULL
|
||||
);
|
||||
@@ -5,5 +5,5 @@ sql:
|
||||
schema: "schema.sql"
|
||||
gen:
|
||||
go:
|
||||
package: "db"
|
||||
out: "db"
|
||||
package: "test_db"
|
||||
out: "."
|
||||
Reference in New Issue
Block a user