From 73ae8a046f6de21e0fba5057fd50874d47db2be0 Mon Sep 17 00:00:00 2001 From: Masahiko AMANO Date: Thu, 11 Jun 2026 21:40:13 +0300 Subject: [PATCH] feat(backend): record tag usage in filters to activity.tag_uses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Listing files with a tag filter now logs each referenced tag to activity.tag_uses, flagging it included (positive) or excluded (negated under an odd number of NOTs); the untagged pseudo-token is skipped. The filter AST is reused to determine polarity, so grouped negations like !(A|B) mark both tags excluded. Recording happens only when a filter is first applied — not on cursor pagination or an anchored return — so one browse counts once. The write is best-effort and never fails the listing. Co-Authored-By: Claude Opus 4.8 --- backend/internal/db/postgres/file_repo.go | 30 +++++++ backend/internal/db/postgres/filter_parser.go | 90 ++++++++++++++++--- .../db/postgres/filter_parser_test.go | 53 +++++++++++ backend/internal/integration/server_test.go | 89 ++++++++++++++++++ backend/internal/port/repository.go | 4 + backend/internal/service/file_service.go | 15 +++- 6 files changed, 266 insertions(+), 15 deletions(-) create mode 100644 backend/internal/db/postgres/filter_parser_test.go diff --git a/backend/internal/db/postgres/file_repo.go b/backend/internal/db/postgres/file_repo.go index ab90ce6..0dc0b44 100644 --- a/backend/internal/db/postgres/file_repo.go +++ b/backend/internal/db/postgres/file_repo.go @@ -811,3 +811,33 @@ func (r *FileRepo) RecordView(ctx context.Context, fileID uuid.UUID, userID int1 } return nil } + +// RecordTagUses appends a row to activity.tag_uses for each tag referenced in a +// filter DSL, flagging it included (positive) or excluded (negated). Tags are +// deduplicated per call, so one statement_timestamp() never collides on the +// (tag_id, used_at, user_id) PK; ON CONFLICT DO NOTHING guards the rest. A +// filter with no tag terms is a no-op. +func (r *FileRepo) RecordTagUses(ctx context.Context, userID int16, filterDSL string) error { + uses := filterTagUses(filterDSL) + if len(uses) == 0 { + return nil + } + + var sb strings.Builder + sb.WriteString("INSERT INTO activity.tag_uses (tag_id, user_id, is_included) VALUES ") + args := make([]any, 0, len(uses)*3) + for i, u := range uses { + if i > 0 { + sb.WriteString(", ") + } + base := i * 3 + fmt.Fprintf(&sb, "($%d, $%d, $%d)", base+1, base+2, base+3) + args = append(args, u.tagID, userID, u.included) + } + sb.WriteString(" ON CONFLICT DO NOTHING") + + if _, err := connOrTx(ctx, r.pool).Exec(ctx, sb.String(), args...); err != nil { + return fmt.Errorf("FileRepo.RecordTagUses: %w", err) + } + return nil +} diff --git a/backend/internal/db/postgres/filter_parser.go b/backend/internal/db/postgres/filter_parser.go index a3adf50..90110c9 100644 --- a/backend/internal/db/postgres/filter_parser.go +++ b/backend/internal/db/postgres/filter_parser.go @@ -253,6 +253,31 @@ func (p *filterParser) parseAtom() (filterNode, error) { // Public entry point // --------------------------------------------------------------------------- +// parseFilterAST lexes and parses a filter DSL into an AST. Returns (nil, nil) +// for an empty or trivial DSL. +func parseFilterAST(dsl string) (filterNode, error) { + dsl = strings.TrimSpace(dsl) + if dsl == "" || dsl == "{}" { + return nil, nil + } + toks, err := lexFilter(dsl) + if err != nil { + return nil, err + } + if len(toks) == 0 { + return nil, nil + } + p := &filterParser{tokens: toks} + node, err := p.parseExpr() + if err != nil { + return nil, err + } + if p.pos != len(p.tokens) { + return nil, fmt.Errorf("filter: trailing tokens at position %d", p.pos) + } + return node, nil +} + // ParseFilter parses a filter DSL string into a parameterized SQL fragment. // // argStart is the 1-based index for the first $N placeholder; this lets the @@ -262,25 +287,62 @@ func (p *filterParser) parseAtom() (filterNode, error) { // SQL injection is structurally impossible: every user-supplied value is // bound as a query parameter ($N), never interpolated into the SQL string. func ParseFilter(dsl string, argStart int) (sql string, nextN int, args []any, err error) { - dsl = strings.TrimSpace(dsl) - if dsl == "" || dsl == "{}" { - return "", argStart, nil, nil - } - toks, err := lexFilter(dsl) + node, err := parseFilterAST(dsl) if err != nil { return "", argStart, nil, err } - if len(toks) == 0 { + if node == nil { return "", argStart, nil, nil } - p := &filterParser{tokens: toks} - node, err := p.parseExpr() - if err != nil { - return "", argStart, nil, err - } - if p.pos != len(p.tokens) { - return "", argStart, nil, fmt.Errorf("filter: trailing tokens at position %d", p.pos) - } sql, nextN, args = node.toSQL(argStart, nil) return sql, nextN, args, nil } + +// tagUse is a tag referenced by a filter, with whether it was included +// (positive) or excluded (negated under an odd number of NOTs). +type tagUse struct { + tagID uuid.UUID + included bool +} + +// filterTagUses extracts the distinct tag references in a filter DSL, marking +// each as included or excluded. The "untagged" pseudo-token (zero UUID) is +// skipped. Returns nil for a filter with no tag terms; an unparseable filter +// also yields nil (extraction is best-effort analytics, not validation). +func filterTagUses(dsl string) []tagUse { + node, err := parseFilterAST(dsl) + if err != nil || node == nil { + return nil + } + seen := make(map[uuid.UUID]bool) + collectTagUses(node, true, seen) + if len(seen) == 0 { + return nil + } + uses := make([]tagUse, 0, len(seen)) + for id, inc := range seen { + uses = append(uses, tagUse{tagID: id, included: inc}) + } + return uses +} + +// collectTagUses walks the AST, recording each real tag leaf into out keyed by +// id. included flips under every NOT, so a tag is "excluded" only when nested +// under an odd number of NOTs. A tag appearing under both polarities keeps the +// last seen — pathological, but it avoids a duplicate-key insert. +func collectTagUses(node filterNode, included bool, out map[uuid.UUID]bool) { + switch nd := node.(type) { + case *andNode: + collectTagUses(nd.left, included, out) + collectTagUses(nd.right, included, out) + case *orNode: + collectTagUses(nd.left, included, out) + collectTagUses(nd.right, included, out) + case *notNode: + collectTagUses(nd.child, !included, out) + case *leafNode: + if nd.tok.kind == ftkTag && !nd.tok.untagged { + out[nd.tok.tagID] = included + } + } +} diff --git a/backend/internal/db/postgres/filter_parser_test.go b/backend/internal/db/postgres/filter_parser_test.go new file mode 100644 index 0000000..bfa7fd7 --- /dev/null +++ b/backend/internal/db/postgres/filter_parser_test.go @@ -0,0 +1,53 @@ +package postgres + +import ( + "testing" + + "github.com/google/uuid" +) + +func TestFilterTagUses(t *testing.T) { + a := uuid.MustParse("11111111-1111-1111-1111-111111111111") + b := uuid.MustParse("22222222-2222-2222-2222-222222222222") + + tests := []struct { + name string + dsl string + want map[uuid.UUID]bool // tag → included; absence means "not recorded" + }{ + {"single included", "{t=" + a.String() + "}", map[uuid.UUID]bool{a: true}}, + {"single excluded", "{!,t=" + a.String() + "}", map[uuid.UUID]bool{a: false}}, + {"double negation is included", "{!,!,t=" + a.String() + "}", map[uuid.UUID]bool{a: true}}, + { + "and of two included", + "{t=" + a.String() + ",&,t=" + b.String() + "}", + map[uuid.UUID]bool{a: true, b: true}, + }, + { + "not over a group excludes both", + "{!,(,t=" + a.String() + ",|,t=" + b.String() + ",)}", + map[uuid.UUID]bool{a: false, b: false}, + }, + {"untagged pseudo-token skipped", "{t=" + uuid.Nil.String() + "}", map[uuid.UUID]bool{}}, + {"mime-only filter records nothing", "{m=3}", map[uuid.UUID]bool{}}, + {"empty filter", "{}", map[uuid.UUID]bool{}}, + {"unparseable filter is best-effort nil", "{t=not-a-uuid}", map[uuid.UUID]bool{}}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := make(map[uuid.UUID]bool) + for _, u := range filterTagUses(tc.dsl) { + got[u.tagID] = u.included + } + if len(got) != len(tc.want) { + t.Fatalf("got %d uses %v, want %d %v", len(got), got, len(tc.want), tc.want) + } + for id, inc := range tc.want { + if g, ok := got[id]; !ok || g != inc { + t.Errorf("tag %s: got (included=%v, present=%v), want included=%v", id, g, ok, inc) + } + } + }) + } +} diff --git a/backend/internal/integration/server_test.go b/backend/internal/integration/server_test.go index 77a3812..be462c1 100644 --- a/backend/internal/integration/server_test.go +++ b/backend/internal/integration/server_test.go @@ -54,6 +54,7 @@ type harness struct { server *httptest.Server client *http.Client importDir string + pool *pgxpool.Pool } // setupSuite creates an ephemeral database, runs migrations, wires the full @@ -165,6 +166,7 @@ func setupSuite(t *testing.T) *harness { server: srv, client: srv.Client(), importDir: importDir, + pool: pool, } } @@ -192,6 +194,32 @@ func (h *harness) url(path string) string { return h.server.URL + "/api/v1" + path } +// tagUses returns all activity.tag_uses rows as tag_id (text) → is_included. +func (h *harness) tagUses(ctx context.Context) map[string]bool { + h.t.Helper() + rows, err := h.pool.Query(ctx, `SELECT tag_id::text, is_included FROM activity.tag_uses`) + require.NoError(h.t, err) + defer rows.Close() + + out := make(map[string]bool) + for rows.Next() { + var id string + var included bool + require.NoError(h.t, rows.Scan(&id, &included)) + out[id] = included + } + require.NoError(h.t, rows.Err()) + return out +} + +// countTagUses returns the number of rows in activity.tag_uses. +func (h *harness) countTagUses(ctx context.Context) int { + h.t.Helper() + var n int + require.NoError(h.t, h.pool.QueryRow(ctx, `SELECT count(*) FROM activity.tag_uses`).Scan(&n)) + return n +} + func (h *harness) do(method, path string, body io.Reader, token string, contentType string) *testResponse { h.t.Helper() req, err := http.NewRequest(method, h.url(path), body) @@ -718,6 +746,67 @@ func TestRecordFileView(t *testing.T) { require.Equal(t, http.StatusNotFound, resp.StatusCode, resp.String()) } +// TestRecordTagUses verifies that filtering files by tags logs to +// activity.tag_uses — included tags as is_included=true, negated ones as +// false — while an unfiltered listing and follow-up pagination record nothing. +func TestRecordTagUses(t *testing.T) { + if testing.Short() { + t.Skip("skipping integration test in short mode") + } + + h := setupSuite(t) + ctx := context.Background() + adminToken := h.login("admin", "admin") + + resp := h.doJSON("POST", "/tags", map[string]any{"name": "sea"}, adminToken) + require.Equal(t, http.StatusCreated, resp.StatusCode, resp.String()) + var sea map[string]any + resp.decode(t, &sea) + seaID := sea["id"].(string) + + resp = h.doJSON("POST", "/tags", map[string]any{"name": "sky"}, adminToken) + require.Equal(t, http.StatusCreated, resp.StatusCode, resp.String()) + var sky map[string]any + resp.decode(t, &sky) + skyID := sky["id"].(string) + + // Two files both tagged "sea", so a paged {t=sea} listing has a second page. + for _, name := range []string{"a.jpg", "b.jpg"} { + f := h.uploadJPEG(adminToken, name) + resp = h.doJSON("PUT", "/files/"+f["id"].(string)+"/tags", + map[string]any{"tag_ids": []string{seaID}}, adminToken) + require.Equal(t, http.StatusOK, resp.StatusCode, resp.String()) + } + + // An unfiltered listing must not touch tag_uses. + resp = h.doJSON("GET", "/files", nil, adminToken) + require.Equal(t, http.StatusOK, resp.StatusCode, resp.String()) + require.Equal(t, 0, h.countTagUses(ctx), "unfiltered list should record nothing") + + // Include "sea": {t=sea}, one item per page so a next_cursor comes back. + resp = h.doJSON("GET", "/files?limit=1&filter=%7Bt%3D"+seaID+"%7D", nil, adminToken) + require.Equal(t, http.StatusOK, resp.StatusCode, resp.String()) + var page1 map[string]any + resp.decode(t, &page1) + nextCursor, _ := page1["next_cursor"].(string) + require.NotEmpty(t, nextCursor, "expected a next_cursor for page 2") + + // Exclude "sky": {!,t=sky} + resp = h.doJSON("GET", "/files?filter=%7B%21%2Ct%3D"+skyID+"%7D", nil, adminToken) + require.Equal(t, http.StatusOK, resp.StatusCode, resp.String()) + + uses := h.tagUses(ctx) + require.Len(t, uses, 2, "expected one row per filtered tag") + assert.True(t, uses[seaID], "included tag should be is_included=true") + assert.False(t, uses[skyID], "negated tag should be is_included=false") + + // Page 2 (cursor present) is pagination, not a fresh filter — no new row. + resp = h.doJSON("GET", "/files?limit=1&cursor="+nextCursor+"&filter=%7Bt%3D"+seaID+"%7D", + nil, adminToken) + require.Equal(t, http.StatusOK, resp.StatusCode, resp.String()) + assert.Equal(t, 2, h.countTagUses(ctx), "pagination should not add tag_uses rows") +} + // TestBulkTagAutoRule verifies the bulk add path also applies then_tags. func TestBulkTagAutoRule(t *testing.T) { if testing.Short() { diff --git a/backend/internal/port/repository.go b/backend/internal/port/repository.go index 94f6d8d..0b991b8 100644 --- a/backend/internal/port/repository.go +++ b/backend/internal/port/repository.go @@ -62,6 +62,10 @@ type FileRepo interface { // RecordView appends a view-history row (activity.file_views) for the user. RecordView(ctx context.Context, fileID uuid.UUID, userID int16) error + // RecordTagUses logs the tags referenced in a filter DSL to + // activity.tag_uses, flagging each included or excluded. Best-effort + // analytics — callers may ignore the error. + RecordTagUses(ctx context.Context, userID int16, filterDSL string) error } // TagRepo is the persistence interface for tags. diff --git a/backend/internal/service/file_service.go b/backend/internal/service/file_service.go index b78d708..a98a3f8 100644 --- a/backend/internal/service/file_service.go +++ b/backend/internal/service/file_service.go @@ -461,7 +461,20 @@ func (s *FileService) Replace(ctx context.Context, id uuid.UUID, p UploadParams) // files the caller may see (unless they are an admin). func (s *FileService) List(ctx context.Context, params domain.FileListParams) (*domain.FilePage, error) { params.ViewerID, params.ViewerIsAdmin, _ = domain.UserFromContext(ctx) - return s.files.List(ctx, params) + + page, err := s.files.List(ctx, params) + if err != nil { + return nil, err + } + + // Log tag usage when a filter is first applied — not on pagination (cursor) + // or an anchored return, so a single browse counts once. Best-effort + // analytics; a failed write never breaks the listing. + if params.Filter != "" && params.Cursor == "" && params.Anchor == nil && params.ViewerID != 0 { + _ = s.files.RecordTagUses(ctx, params.ViewerID, params.Filter) + } + + return page, nil } // AuthorizeView ensures the caller may view the file. Returns ErrNotFound if the