feat(backend): record tag usage in filters to activity.tag_uses

Listing files with a tag filter now logs each referenced tag to
activity.tag_uses, flagging it included (positive) or excluded (negated
under an odd number of NOTs); the untagged pseudo-token is skipped. The
filter AST is reused to determine polarity, so grouped negations like
!(A|B) mark both tags excluded.

Recording happens only when a filter is first applied — not on cursor
pagination or an anchored return — so one browse counts once. The write
is best-effort and never fails the listing.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-11 21:40:13 +03:00
parent 6a3bb9ff51
commit 73ae8a046f
6 changed files with 266 additions and 15 deletions
+30
View File
@@ -811,3 +811,33 @@ func (r *FileRepo) RecordView(ctx context.Context, fileID uuid.UUID, userID int1
} }
return nil return nil
} }
// RecordTagUses appends a row to activity.tag_uses for each tag referenced in a
// filter DSL, flagging it included (positive) or excluded (negated). Tags are
// deduplicated per call, so one statement_timestamp() never collides on the
// (tag_id, used_at, user_id) PK; ON CONFLICT DO NOTHING guards the rest. A
// filter with no tag terms is a no-op.
func (r *FileRepo) RecordTagUses(ctx context.Context, userID int16, filterDSL string) error {
uses := filterTagUses(filterDSL)
if len(uses) == 0 {
return nil
}
var sb strings.Builder
sb.WriteString("INSERT INTO activity.tag_uses (tag_id, user_id, is_included) VALUES ")
args := make([]any, 0, len(uses)*3)
for i, u := range uses {
if i > 0 {
sb.WriteString(", ")
}
base := i * 3
fmt.Fprintf(&sb, "($%d, $%d, $%d)", base+1, base+2, base+3)
args = append(args, u.tagID, userID, u.included)
}
sb.WriteString(" ON CONFLICT DO NOTHING")
if _, err := connOrTx(ctx, r.pool).Exec(ctx, sb.String(), args...); err != nil {
return fmt.Errorf("FileRepo.RecordTagUses: %w", err)
}
return nil
}
+76 -14
View File
@@ -253,6 +253,31 @@ func (p *filterParser) parseAtom() (filterNode, error) {
// Public entry point // Public entry point
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// parseFilterAST lexes and parses a filter DSL into an AST. Returns (nil, nil)
// for an empty or trivial DSL.
func parseFilterAST(dsl string) (filterNode, error) {
dsl = strings.TrimSpace(dsl)
if dsl == "" || dsl == "{}" {
return nil, nil
}
toks, err := lexFilter(dsl)
if err != nil {
return nil, err
}
if len(toks) == 0 {
return nil, nil
}
p := &filterParser{tokens: toks}
node, err := p.parseExpr()
if err != nil {
return nil, err
}
if p.pos != len(p.tokens) {
return nil, fmt.Errorf("filter: trailing tokens at position %d", p.pos)
}
return node, nil
}
// ParseFilter parses a filter DSL string into a parameterized SQL fragment. // ParseFilter parses a filter DSL string into a parameterized SQL fragment.
// //
// argStart is the 1-based index for the first $N placeholder; this lets the // argStart is the 1-based index for the first $N placeholder; this lets the
@@ -262,25 +287,62 @@ func (p *filterParser) parseAtom() (filterNode, error) {
// SQL injection is structurally impossible: every user-supplied value is // SQL injection is structurally impossible: every user-supplied value is
// bound as a query parameter ($N), never interpolated into the SQL string. // bound as a query parameter ($N), never interpolated into the SQL string.
func ParseFilter(dsl string, argStart int) (sql string, nextN int, args []any, err error) { func ParseFilter(dsl string, argStart int) (sql string, nextN int, args []any, err error) {
dsl = strings.TrimSpace(dsl) node, err := parseFilterAST(dsl)
if dsl == "" || dsl == "{}" {
return "", argStart, nil, nil
}
toks, err := lexFilter(dsl)
if err != nil { if err != nil {
return "", argStart, nil, err return "", argStart, nil, err
} }
if len(toks) == 0 { if node == nil {
return "", argStart, nil, nil return "", argStart, nil, nil
} }
p := &filterParser{tokens: toks}
node, err := p.parseExpr()
if err != nil {
return "", argStart, nil, err
}
if p.pos != len(p.tokens) {
return "", argStart, nil, fmt.Errorf("filter: trailing tokens at position %d", p.pos)
}
sql, nextN, args = node.toSQL(argStart, nil) sql, nextN, args = node.toSQL(argStart, nil)
return sql, nextN, args, nil return sql, nextN, args, nil
} }
// tagUse is a tag referenced by a filter, with whether it was included
// (positive) or excluded (negated under an odd number of NOTs).
type tagUse struct {
tagID uuid.UUID
included bool
}
// filterTagUses extracts the distinct tag references in a filter DSL, marking
// each as included or excluded. The "untagged" pseudo-token (zero UUID) is
// skipped. Returns nil for a filter with no tag terms; an unparseable filter
// also yields nil (extraction is best-effort analytics, not validation).
func filterTagUses(dsl string) []tagUse {
node, err := parseFilterAST(dsl)
if err != nil || node == nil {
return nil
}
seen := make(map[uuid.UUID]bool)
collectTagUses(node, true, seen)
if len(seen) == 0 {
return nil
}
uses := make([]tagUse, 0, len(seen))
for id, inc := range seen {
uses = append(uses, tagUse{tagID: id, included: inc})
}
return uses
}
// collectTagUses walks the AST, recording each real tag leaf into out keyed by
// id. included flips under every NOT, so a tag is "excluded" only when nested
// under an odd number of NOTs. A tag appearing under both polarities keeps the
// last seen — pathological, but it avoids a duplicate-key insert.
func collectTagUses(node filterNode, included bool, out map[uuid.UUID]bool) {
switch nd := node.(type) {
case *andNode:
collectTagUses(nd.left, included, out)
collectTagUses(nd.right, included, out)
case *orNode:
collectTagUses(nd.left, included, out)
collectTagUses(nd.right, included, out)
case *notNode:
collectTagUses(nd.child, !included, out)
case *leafNode:
if nd.tok.kind == ftkTag && !nd.tok.untagged {
out[nd.tok.tagID] = included
}
}
}
@@ -0,0 +1,53 @@
package postgres
import (
"testing"
"github.com/google/uuid"
)
func TestFilterTagUses(t *testing.T) {
a := uuid.MustParse("11111111-1111-1111-1111-111111111111")
b := uuid.MustParse("22222222-2222-2222-2222-222222222222")
tests := []struct {
name string
dsl string
want map[uuid.UUID]bool // tag → included; absence means "not recorded"
}{
{"single included", "{t=" + a.String() + "}", map[uuid.UUID]bool{a: true}},
{"single excluded", "{!,t=" + a.String() + "}", map[uuid.UUID]bool{a: false}},
{"double negation is included", "{!,!,t=" + a.String() + "}", map[uuid.UUID]bool{a: true}},
{
"and of two included",
"{t=" + a.String() + ",&,t=" + b.String() + "}",
map[uuid.UUID]bool{a: true, b: true},
},
{
"not over a group excludes both",
"{!,(,t=" + a.String() + ",|,t=" + b.String() + ",)}",
map[uuid.UUID]bool{a: false, b: false},
},
{"untagged pseudo-token skipped", "{t=" + uuid.Nil.String() + "}", map[uuid.UUID]bool{}},
{"mime-only filter records nothing", "{m=3}", map[uuid.UUID]bool{}},
{"empty filter", "{}", map[uuid.UUID]bool{}},
{"unparseable filter is best-effort nil", "{t=not-a-uuid}", map[uuid.UUID]bool{}},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := make(map[uuid.UUID]bool)
for _, u := range filterTagUses(tc.dsl) {
got[u.tagID] = u.included
}
if len(got) != len(tc.want) {
t.Fatalf("got %d uses %v, want %d %v", len(got), got, len(tc.want), tc.want)
}
for id, inc := range tc.want {
if g, ok := got[id]; !ok || g != inc {
t.Errorf("tag %s: got (included=%v, present=%v), want included=%v", id, g, ok, inc)
}
}
})
}
}
@@ -54,6 +54,7 @@ type harness struct {
server *httptest.Server server *httptest.Server
client *http.Client client *http.Client
importDir string importDir string
pool *pgxpool.Pool
} }
// setupSuite creates an ephemeral database, runs migrations, wires the full // setupSuite creates an ephemeral database, runs migrations, wires the full
@@ -165,6 +166,7 @@ func setupSuite(t *testing.T) *harness {
server: srv, server: srv,
client: srv.Client(), client: srv.Client(),
importDir: importDir, importDir: importDir,
pool: pool,
} }
} }
@@ -192,6 +194,32 @@ func (h *harness) url(path string) string {
return h.server.URL + "/api/v1" + path return h.server.URL + "/api/v1" + path
} }
// tagUses returns all activity.tag_uses rows as tag_id (text) → is_included.
func (h *harness) tagUses(ctx context.Context) map[string]bool {
h.t.Helper()
rows, err := h.pool.Query(ctx, `SELECT tag_id::text, is_included FROM activity.tag_uses`)
require.NoError(h.t, err)
defer rows.Close()
out := make(map[string]bool)
for rows.Next() {
var id string
var included bool
require.NoError(h.t, rows.Scan(&id, &included))
out[id] = included
}
require.NoError(h.t, rows.Err())
return out
}
// countTagUses returns the number of rows in activity.tag_uses.
func (h *harness) countTagUses(ctx context.Context) int {
h.t.Helper()
var n int
require.NoError(h.t, h.pool.QueryRow(ctx, `SELECT count(*) FROM activity.tag_uses`).Scan(&n))
return n
}
func (h *harness) do(method, path string, body io.Reader, token string, contentType string) *testResponse { func (h *harness) do(method, path string, body io.Reader, token string, contentType string) *testResponse {
h.t.Helper() h.t.Helper()
req, err := http.NewRequest(method, h.url(path), body) req, err := http.NewRequest(method, h.url(path), body)
@@ -718,6 +746,67 @@ func TestRecordFileView(t *testing.T) {
require.Equal(t, http.StatusNotFound, resp.StatusCode, resp.String()) require.Equal(t, http.StatusNotFound, resp.StatusCode, resp.String())
} }
// TestRecordTagUses verifies that filtering files by tags logs to
// activity.tag_uses — included tags as is_included=true, negated ones as
// false — while an unfiltered listing and follow-up pagination record nothing.
func TestRecordTagUses(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test in short mode")
}
h := setupSuite(t)
ctx := context.Background()
adminToken := h.login("admin", "admin")
resp := h.doJSON("POST", "/tags", map[string]any{"name": "sea"}, adminToken)
require.Equal(t, http.StatusCreated, resp.StatusCode, resp.String())
var sea map[string]any
resp.decode(t, &sea)
seaID := sea["id"].(string)
resp = h.doJSON("POST", "/tags", map[string]any{"name": "sky"}, adminToken)
require.Equal(t, http.StatusCreated, resp.StatusCode, resp.String())
var sky map[string]any
resp.decode(t, &sky)
skyID := sky["id"].(string)
// Two files both tagged "sea", so a paged {t=sea} listing has a second page.
for _, name := range []string{"a.jpg", "b.jpg"} {
f := h.uploadJPEG(adminToken, name)
resp = h.doJSON("PUT", "/files/"+f["id"].(string)+"/tags",
map[string]any{"tag_ids": []string{seaID}}, adminToken)
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
}
// An unfiltered listing must not touch tag_uses.
resp = h.doJSON("GET", "/files", nil, adminToken)
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
require.Equal(t, 0, h.countTagUses(ctx), "unfiltered list should record nothing")
// Include "sea": {t=sea}, one item per page so a next_cursor comes back.
resp = h.doJSON("GET", "/files?limit=1&filter=%7Bt%3D"+seaID+"%7D", nil, adminToken)
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
var page1 map[string]any
resp.decode(t, &page1)
nextCursor, _ := page1["next_cursor"].(string)
require.NotEmpty(t, nextCursor, "expected a next_cursor for page 2")
// Exclude "sky": {!,t=sky}
resp = h.doJSON("GET", "/files?filter=%7B%21%2Ct%3D"+skyID+"%7D", nil, adminToken)
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
uses := h.tagUses(ctx)
require.Len(t, uses, 2, "expected one row per filtered tag")
assert.True(t, uses[seaID], "included tag should be is_included=true")
assert.False(t, uses[skyID], "negated tag should be is_included=false")
// Page 2 (cursor present) is pagination, not a fresh filter — no new row.
resp = h.doJSON("GET", "/files?limit=1&cursor="+nextCursor+"&filter=%7Bt%3D"+seaID+"%7D",
nil, adminToken)
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
assert.Equal(t, 2, h.countTagUses(ctx), "pagination should not add tag_uses rows")
}
// TestBulkTagAutoRule verifies the bulk add path also applies then_tags. // TestBulkTagAutoRule verifies the bulk add path also applies then_tags.
func TestBulkTagAutoRule(t *testing.T) { func TestBulkTagAutoRule(t *testing.T) {
if testing.Short() { if testing.Short() {
+4
View File
@@ -62,6 +62,10 @@ type FileRepo interface {
// RecordView appends a view-history row (activity.file_views) for the user. // RecordView appends a view-history row (activity.file_views) for the user.
RecordView(ctx context.Context, fileID uuid.UUID, userID int16) error RecordView(ctx context.Context, fileID uuid.UUID, userID int16) error
// RecordTagUses logs the tags referenced in a filter DSL to
// activity.tag_uses, flagging each included or excluded. Best-effort
// analytics — callers may ignore the error.
RecordTagUses(ctx context.Context, userID int16, filterDSL string) error
} }
// TagRepo is the persistence interface for tags. // TagRepo is the persistence interface for tags.
+14 -1
View File
@@ -461,7 +461,20 @@ func (s *FileService) Replace(ctx context.Context, id uuid.UUID, p UploadParams)
// files the caller may see (unless they are an admin). // files the caller may see (unless they are an admin).
func (s *FileService) List(ctx context.Context, params domain.FileListParams) (*domain.FilePage, error) { func (s *FileService) List(ctx context.Context, params domain.FileListParams) (*domain.FilePage, error) {
params.ViewerID, params.ViewerIsAdmin, _ = domain.UserFromContext(ctx) params.ViewerID, params.ViewerIsAdmin, _ = domain.UserFromContext(ctx)
return s.files.List(ctx, params)
page, err := s.files.List(ctx, params)
if err != nil {
return nil, err
}
// Log tag usage when a filter is first applied — not on pagination (cursor)
// or an anchored return, so a single browse counts once. Best-effort
// analytics; a failed write never breaks the listing.
if params.Filter != "" && params.Cursor == "" && params.Anchor == nil && params.ViewerID != 0 {
_ = s.files.RecordTagUses(ctx, params.ViewerID, params.Filter)
}
return page, nil
} }
// AuthorizeView ensures the caller may view the file. Returns ErrNotFound if the // AuthorizeView ensures the caller may view the file. Returns ErrNotFound if the