feat(backend): record tag usage in filters to activity.tag_uses
Listing files with a tag filter now logs each referenced tag to activity.tag_uses, flagging it included (positive) or excluded (negated under an odd number of NOTs); the untagged pseudo-token is skipped. The filter AST is reused to determine polarity, so grouped negations like !(A|B) mark both tags excluded. Recording happens only when a filter is first applied — not on cursor pagination or an anchored return — so one browse counts once. The write is best-effort and never fails the listing. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -811,3 +811,33 @@ func (r *FileRepo) RecordView(ctx context.Context, fileID uuid.UUID, userID int1
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RecordTagUses appends a row to activity.tag_uses for each tag referenced in a
|
||||||
|
// filter DSL, flagging it included (positive) or excluded (negated). Tags are
|
||||||
|
// deduplicated per call, so one statement_timestamp() never collides on the
|
||||||
|
// (tag_id, used_at, user_id) PK; ON CONFLICT DO NOTHING guards the rest. A
|
||||||
|
// filter with no tag terms is a no-op.
|
||||||
|
func (r *FileRepo) RecordTagUses(ctx context.Context, userID int16, filterDSL string) error {
|
||||||
|
uses := filterTagUses(filterDSL)
|
||||||
|
if len(uses) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.WriteString("INSERT INTO activity.tag_uses (tag_id, user_id, is_included) VALUES ")
|
||||||
|
args := make([]any, 0, len(uses)*3)
|
||||||
|
for i, u := range uses {
|
||||||
|
if i > 0 {
|
||||||
|
sb.WriteString(", ")
|
||||||
|
}
|
||||||
|
base := i * 3
|
||||||
|
fmt.Fprintf(&sb, "($%d, $%d, $%d)", base+1, base+2, base+3)
|
||||||
|
args = append(args, u.tagID, userID, u.included)
|
||||||
|
}
|
||||||
|
sb.WriteString(" ON CONFLICT DO NOTHING")
|
||||||
|
|
||||||
|
if _, err := connOrTx(ctx, r.pool).Exec(ctx, sb.String(), args...); err != nil {
|
||||||
|
return fmt.Errorf("FileRepo.RecordTagUses: %w", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -253,6 +253,31 @@ func (p *filterParser) parseAtom() (filterNode, error) {
|
|||||||
// Public entry point
|
// Public entry point
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// parseFilterAST lexes and parses a filter DSL into an AST. Returns (nil, nil)
|
||||||
|
// for an empty or trivial DSL.
|
||||||
|
func parseFilterAST(dsl string) (filterNode, error) {
|
||||||
|
dsl = strings.TrimSpace(dsl)
|
||||||
|
if dsl == "" || dsl == "{}" {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
toks, err := lexFilter(dsl)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if len(toks) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
p := &filterParser{tokens: toks}
|
||||||
|
node, err := p.parseExpr()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if p.pos != len(p.tokens) {
|
||||||
|
return nil, fmt.Errorf("filter: trailing tokens at position %d", p.pos)
|
||||||
|
}
|
||||||
|
return node, nil
|
||||||
|
}
|
||||||
|
|
||||||
// ParseFilter parses a filter DSL string into a parameterized SQL fragment.
|
// ParseFilter parses a filter DSL string into a parameterized SQL fragment.
|
||||||
//
|
//
|
||||||
// argStart is the 1-based index for the first $N placeholder; this lets the
|
// argStart is the 1-based index for the first $N placeholder; this lets the
|
||||||
@@ -262,25 +287,62 @@ func (p *filterParser) parseAtom() (filterNode, error) {
|
|||||||
// SQL injection is structurally impossible: every user-supplied value is
|
// SQL injection is structurally impossible: every user-supplied value is
|
||||||
// bound as a query parameter ($N), never interpolated into the SQL string.
|
// bound as a query parameter ($N), never interpolated into the SQL string.
|
||||||
func ParseFilter(dsl string, argStart int) (sql string, nextN int, args []any, err error) {
|
func ParseFilter(dsl string, argStart int) (sql string, nextN int, args []any, err error) {
|
||||||
dsl = strings.TrimSpace(dsl)
|
node, err := parseFilterAST(dsl)
|
||||||
if dsl == "" || dsl == "{}" {
|
|
||||||
return "", argStart, nil, nil
|
|
||||||
}
|
|
||||||
toks, err := lexFilter(dsl)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", argStart, nil, err
|
return "", argStart, nil, err
|
||||||
}
|
}
|
||||||
if len(toks) == 0 {
|
if node == nil {
|
||||||
return "", argStart, nil, nil
|
return "", argStart, nil, nil
|
||||||
}
|
}
|
||||||
p := &filterParser{tokens: toks}
|
|
||||||
node, err := p.parseExpr()
|
|
||||||
if err != nil {
|
|
||||||
return "", argStart, nil, err
|
|
||||||
}
|
|
||||||
if p.pos != len(p.tokens) {
|
|
||||||
return "", argStart, nil, fmt.Errorf("filter: trailing tokens at position %d", p.pos)
|
|
||||||
}
|
|
||||||
sql, nextN, args = node.toSQL(argStart, nil)
|
sql, nextN, args = node.toSQL(argStart, nil)
|
||||||
return sql, nextN, args, nil
|
return sql, nextN, args, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tagUse is a tag referenced by a filter, with whether it was included
|
||||||
|
// (positive) or excluded (negated under an odd number of NOTs).
|
||||||
|
type tagUse struct {
|
||||||
|
tagID uuid.UUID
|
||||||
|
included bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// filterTagUses extracts the distinct tag references in a filter DSL, marking
|
||||||
|
// each as included or excluded. The "untagged" pseudo-token (zero UUID) is
|
||||||
|
// skipped. Returns nil for a filter with no tag terms; an unparseable filter
|
||||||
|
// also yields nil (extraction is best-effort analytics, not validation).
|
||||||
|
func filterTagUses(dsl string) []tagUse {
|
||||||
|
node, err := parseFilterAST(dsl)
|
||||||
|
if err != nil || node == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
seen := make(map[uuid.UUID]bool)
|
||||||
|
collectTagUses(node, true, seen)
|
||||||
|
if len(seen) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
uses := make([]tagUse, 0, len(seen))
|
||||||
|
for id, inc := range seen {
|
||||||
|
uses = append(uses, tagUse{tagID: id, included: inc})
|
||||||
|
}
|
||||||
|
return uses
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectTagUses walks the AST, recording each real tag leaf into out keyed by
|
||||||
|
// id. included flips under every NOT, so a tag is "excluded" only when nested
|
||||||
|
// under an odd number of NOTs. A tag appearing under both polarities keeps the
|
||||||
|
// last seen — pathological, but it avoids a duplicate-key insert.
|
||||||
|
func collectTagUses(node filterNode, included bool, out map[uuid.UUID]bool) {
|
||||||
|
switch nd := node.(type) {
|
||||||
|
case *andNode:
|
||||||
|
collectTagUses(nd.left, included, out)
|
||||||
|
collectTagUses(nd.right, included, out)
|
||||||
|
case *orNode:
|
||||||
|
collectTagUses(nd.left, included, out)
|
||||||
|
collectTagUses(nd.right, included, out)
|
||||||
|
case *notNode:
|
||||||
|
collectTagUses(nd.child, !included, out)
|
||||||
|
case *leafNode:
|
||||||
|
if nd.tok.kind == ftkTag && !nd.tok.untagged {
|
||||||
|
out[nd.tok.tagID] = included
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,53 @@
|
|||||||
|
package postgres
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFilterTagUses(t *testing.T) {
|
||||||
|
a := uuid.MustParse("11111111-1111-1111-1111-111111111111")
|
||||||
|
b := uuid.MustParse("22222222-2222-2222-2222-222222222222")
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
dsl string
|
||||||
|
want map[uuid.UUID]bool // tag → included; absence means "not recorded"
|
||||||
|
}{
|
||||||
|
{"single included", "{t=" + a.String() + "}", map[uuid.UUID]bool{a: true}},
|
||||||
|
{"single excluded", "{!,t=" + a.String() + "}", map[uuid.UUID]bool{a: false}},
|
||||||
|
{"double negation is included", "{!,!,t=" + a.String() + "}", map[uuid.UUID]bool{a: true}},
|
||||||
|
{
|
||||||
|
"and of two included",
|
||||||
|
"{t=" + a.String() + ",&,t=" + b.String() + "}",
|
||||||
|
map[uuid.UUID]bool{a: true, b: true},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"not over a group excludes both",
|
||||||
|
"{!,(,t=" + a.String() + ",|,t=" + b.String() + ",)}",
|
||||||
|
map[uuid.UUID]bool{a: false, b: false},
|
||||||
|
},
|
||||||
|
{"untagged pseudo-token skipped", "{t=" + uuid.Nil.String() + "}", map[uuid.UUID]bool{}},
|
||||||
|
{"mime-only filter records nothing", "{m=3}", map[uuid.UUID]bool{}},
|
||||||
|
{"empty filter", "{}", map[uuid.UUID]bool{}},
|
||||||
|
{"unparseable filter is best-effort nil", "{t=not-a-uuid}", map[uuid.UUID]bool{}},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
got := make(map[uuid.UUID]bool)
|
||||||
|
for _, u := range filterTagUses(tc.dsl) {
|
||||||
|
got[u.tagID] = u.included
|
||||||
|
}
|
||||||
|
if len(got) != len(tc.want) {
|
||||||
|
t.Fatalf("got %d uses %v, want %d %v", len(got), got, len(tc.want), tc.want)
|
||||||
|
}
|
||||||
|
for id, inc := range tc.want {
|
||||||
|
if g, ok := got[id]; !ok || g != inc {
|
||||||
|
t.Errorf("tag %s: got (included=%v, present=%v), want included=%v", id, g, ok, inc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -54,6 +54,7 @@ type harness struct {
|
|||||||
server *httptest.Server
|
server *httptest.Server
|
||||||
client *http.Client
|
client *http.Client
|
||||||
importDir string
|
importDir string
|
||||||
|
pool *pgxpool.Pool
|
||||||
}
|
}
|
||||||
|
|
||||||
// setupSuite creates an ephemeral database, runs migrations, wires the full
|
// setupSuite creates an ephemeral database, runs migrations, wires the full
|
||||||
@@ -165,6 +166,7 @@ func setupSuite(t *testing.T) *harness {
|
|||||||
server: srv,
|
server: srv,
|
||||||
client: srv.Client(),
|
client: srv.Client(),
|
||||||
importDir: importDir,
|
importDir: importDir,
|
||||||
|
pool: pool,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -192,6 +194,32 @@ func (h *harness) url(path string) string {
|
|||||||
return h.server.URL + "/api/v1" + path
|
return h.server.URL + "/api/v1" + path
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tagUses returns all activity.tag_uses rows as tag_id (text) → is_included.
|
||||||
|
func (h *harness) tagUses(ctx context.Context) map[string]bool {
|
||||||
|
h.t.Helper()
|
||||||
|
rows, err := h.pool.Query(ctx, `SELECT tag_id::text, is_included FROM activity.tag_uses`)
|
||||||
|
require.NoError(h.t, err)
|
||||||
|
defer rows.Close()
|
||||||
|
|
||||||
|
out := make(map[string]bool)
|
||||||
|
for rows.Next() {
|
||||||
|
var id string
|
||||||
|
var included bool
|
||||||
|
require.NoError(h.t, rows.Scan(&id, &included))
|
||||||
|
out[id] = included
|
||||||
|
}
|
||||||
|
require.NoError(h.t, rows.Err())
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// countTagUses returns the number of rows in activity.tag_uses.
|
||||||
|
func (h *harness) countTagUses(ctx context.Context) int {
|
||||||
|
h.t.Helper()
|
||||||
|
var n int
|
||||||
|
require.NoError(h.t, h.pool.QueryRow(ctx, `SELECT count(*) FROM activity.tag_uses`).Scan(&n))
|
||||||
|
return n
|
||||||
|
}
|
||||||
|
|
||||||
func (h *harness) do(method, path string, body io.Reader, token string, contentType string) *testResponse {
|
func (h *harness) do(method, path string, body io.Reader, token string, contentType string) *testResponse {
|
||||||
h.t.Helper()
|
h.t.Helper()
|
||||||
req, err := http.NewRequest(method, h.url(path), body)
|
req, err := http.NewRequest(method, h.url(path), body)
|
||||||
@@ -718,6 +746,67 @@ func TestRecordFileView(t *testing.T) {
|
|||||||
require.Equal(t, http.StatusNotFound, resp.StatusCode, resp.String())
|
require.Equal(t, http.StatusNotFound, resp.StatusCode, resp.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestRecordTagUses verifies that filtering files by tags logs to
|
||||||
|
// activity.tag_uses — included tags as is_included=true, negated ones as
|
||||||
|
// false — while an unfiltered listing and follow-up pagination record nothing.
|
||||||
|
func TestRecordTagUses(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping integration test in short mode")
|
||||||
|
}
|
||||||
|
|
||||||
|
h := setupSuite(t)
|
||||||
|
ctx := context.Background()
|
||||||
|
adminToken := h.login("admin", "admin")
|
||||||
|
|
||||||
|
resp := h.doJSON("POST", "/tags", map[string]any{"name": "sea"}, adminToken)
|
||||||
|
require.Equal(t, http.StatusCreated, resp.StatusCode, resp.String())
|
||||||
|
var sea map[string]any
|
||||||
|
resp.decode(t, &sea)
|
||||||
|
seaID := sea["id"].(string)
|
||||||
|
|
||||||
|
resp = h.doJSON("POST", "/tags", map[string]any{"name": "sky"}, adminToken)
|
||||||
|
require.Equal(t, http.StatusCreated, resp.StatusCode, resp.String())
|
||||||
|
var sky map[string]any
|
||||||
|
resp.decode(t, &sky)
|
||||||
|
skyID := sky["id"].(string)
|
||||||
|
|
||||||
|
// Two files both tagged "sea", so a paged {t=sea} listing has a second page.
|
||||||
|
for _, name := range []string{"a.jpg", "b.jpg"} {
|
||||||
|
f := h.uploadJPEG(adminToken, name)
|
||||||
|
resp = h.doJSON("PUT", "/files/"+f["id"].(string)+"/tags",
|
||||||
|
map[string]any{"tag_ids": []string{seaID}}, adminToken)
|
||||||
|
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// An unfiltered listing must not touch tag_uses.
|
||||||
|
resp = h.doJSON("GET", "/files", nil, adminToken)
|
||||||
|
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
|
||||||
|
require.Equal(t, 0, h.countTagUses(ctx), "unfiltered list should record nothing")
|
||||||
|
|
||||||
|
// Include "sea": {t=sea}, one item per page so a next_cursor comes back.
|
||||||
|
resp = h.doJSON("GET", "/files?limit=1&filter=%7Bt%3D"+seaID+"%7D", nil, adminToken)
|
||||||
|
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
|
||||||
|
var page1 map[string]any
|
||||||
|
resp.decode(t, &page1)
|
||||||
|
nextCursor, _ := page1["next_cursor"].(string)
|
||||||
|
require.NotEmpty(t, nextCursor, "expected a next_cursor for page 2")
|
||||||
|
|
||||||
|
// Exclude "sky": {!,t=sky}
|
||||||
|
resp = h.doJSON("GET", "/files?filter=%7B%21%2Ct%3D"+skyID+"%7D", nil, adminToken)
|
||||||
|
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
|
||||||
|
|
||||||
|
uses := h.tagUses(ctx)
|
||||||
|
require.Len(t, uses, 2, "expected one row per filtered tag")
|
||||||
|
assert.True(t, uses[seaID], "included tag should be is_included=true")
|
||||||
|
assert.False(t, uses[skyID], "negated tag should be is_included=false")
|
||||||
|
|
||||||
|
// Page 2 (cursor present) is pagination, not a fresh filter — no new row.
|
||||||
|
resp = h.doJSON("GET", "/files?limit=1&cursor="+nextCursor+"&filter=%7Bt%3D"+seaID+"%7D",
|
||||||
|
nil, adminToken)
|
||||||
|
require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
|
||||||
|
assert.Equal(t, 2, h.countTagUses(ctx), "pagination should not add tag_uses rows")
|
||||||
|
}
|
||||||
|
|
||||||
// TestBulkTagAutoRule verifies the bulk add path also applies then_tags.
|
// TestBulkTagAutoRule verifies the bulk add path also applies then_tags.
|
||||||
func TestBulkTagAutoRule(t *testing.T) {
|
func TestBulkTagAutoRule(t *testing.T) {
|
||||||
if testing.Short() {
|
if testing.Short() {
|
||||||
|
|||||||
@@ -62,6 +62,10 @@ type FileRepo interface {
|
|||||||
|
|
||||||
// RecordView appends a view-history row (activity.file_views) for the user.
|
// RecordView appends a view-history row (activity.file_views) for the user.
|
||||||
RecordView(ctx context.Context, fileID uuid.UUID, userID int16) error
|
RecordView(ctx context.Context, fileID uuid.UUID, userID int16) error
|
||||||
|
// RecordTagUses logs the tags referenced in a filter DSL to
|
||||||
|
// activity.tag_uses, flagging each included or excluded. Best-effort
|
||||||
|
// analytics — callers may ignore the error.
|
||||||
|
RecordTagUses(ctx context.Context, userID int16, filterDSL string) error
|
||||||
}
|
}
|
||||||
|
|
||||||
// TagRepo is the persistence interface for tags.
|
// TagRepo is the persistence interface for tags.
|
||||||
|
|||||||
@@ -461,7 +461,20 @@ func (s *FileService) Replace(ctx context.Context, id uuid.UUID, p UploadParams)
|
|||||||
// files the caller may see (unless they are an admin).
|
// files the caller may see (unless they are an admin).
|
||||||
func (s *FileService) List(ctx context.Context, params domain.FileListParams) (*domain.FilePage, error) {
|
func (s *FileService) List(ctx context.Context, params domain.FileListParams) (*domain.FilePage, error) {
|
||||||
params.ViewerID, params.ViewerIsAdmin, _ = domain.UserFromContext(ctx)
|
params.ViewerID, params.ViewerIsAdmin, _ = domain.UserFromContext(ctx)
|
||||||
return s.files.List(ctx, params)
|
|
||||||
|
page, err := s.files.List(ctx, params)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log tag usage when a filter is first applied — not on pagination (cursor)
|
||||||
|
// or an anchored return, so a single browse counts once. Best-effort
|
||||||
|
// analytics; a failed write never breaks the listing.
|
||||||
|
if params.Filter != "" && params.Cursor == "" && params.Anchor == nil && params.ViewerID != 0 {
|
||||||
|
_ = s.files.RecordTagUses(ctx, params.ViewerID, params.Filter)
|
||||||
|
}
|
||||||
|
|
||||||
|
return page, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// AuthorizeView ensures the caller may view the file. Returns ErrNotFound if the
|
// AuthorizeView ensures the caller may view the file. Returns ErrNotFound if the
|
||||||
|
|||||||
Reference in New Issue
Block a user