9216a8687f
Adds the duplicate-detection backend on top of perceptual hashing:
- Two tables (edited into the original migrations): data.duplicate_pairs holds
precomputed near-duplicate candidates (rebuilt wholesale by the rescan), and
data.duplicate_dismissals is a global "not a duplicate" overlay that survives
rescans. New audit actions file_merge / duplicate_dismiss.
- DuplicateService:
- Rescan builds every pair within DUPLICATE_HASH_THRESHOLD via a BK-tree over
the perceptual hashes and replaces the pairs table. This is the only thing
that populates pairs, so GET never compares all-vs-all (scales to 110k+).
- Clusters reads the precomputed pairs (ACL-filtered, non-trashed, non-
dismissed), groups them into connected components via union-find, and
paginates whole clusters.
- Resolve merges a pair field-by-field: each scalar from keep or discard,
metadata keep/discard/shallow-merge, tags/pools keep or union; then trashes
the discarded file. Enforces edit ACL on both.
- Dismiss records a canonical pair (view ACL on both).
- Endpoints under /files: GET /files/duplicates, POST /files/duplicates/dismiss,
POST /files/duplicates/resolve (registered before /:id to avoid collision).
Plain delete reuses /files/bulk/delete.
- Repo support: ListMissingPHash, ListAllPHashes, CopyPoolMemberships, plus the
DuplicatePairRepo (ReplaceAll via COPY, ListVisible) and DismissalRepo.
Unit tests cover the BK-tree pairing, union-find clustering, metadata merge and
field validation; an integration test covers rescan -> list -> merge -> dismiss
(including that a dismissal survives a re-rescan).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
223 lines
11 KiB
Go
223 lines
11 KiB
Go
package port
|
||
|
||
import (
|
||
"context"
|
||
"time"
|
||
|
||
"github.com/google/uuid"
|
||
|
||
"tanabata/backend/internal/domain"
|
||
)
|
||
|
||
// Transactor executes fn inside a single database transaction.
|
||
// All repository calls made within fn receive the transaction via context.
|
||
type Transactor interface {
|
||
WithTx(ctx context.Context, fn func(ctx context.Context) error) error
|
||
}
|
||
|
||
// OffsetParams holds common offset-pagination and sort parameters.
|
||
type OffsetParams struct {
|
||
Sort string
|
||
Order string // "asc" | "desc"
|
||
Search string
|
||
Offset int
|
||
Limit int
|
||
|
||
// Visibility — populated by the service from the request context. When
|
||
// ViewerIsAdmin is false the repository restricts results to rows the viewer
|
||
// may see (public, owned, or explicitly granted). Ignored by user listing,
|
||
// which is admin-only.
|
||
ViewerID int16
|
||
ViewerIsAdmin bool
|
||
}
|
||
|
||
// PoolFileListParams holds parameters for listing files inside a pool.
|
||
type PoolFileListParams struct {
|
||
Cursor string
|
||
Limit int
|
||
Filter string // filter DSL expression
|
||
}
|
||
|
||
// FileRepo is the persistence interface for file records.
|
||
type FileRepo interface {
|
||
// List returns a cursor-based page of files.
|
||
List(ctx context.Context, params domain.FileListParams) (*domain.FilePage, error)
|
||
// GetByID returns the file with its tags loaded.
|
||
GetByID(ctx context.Context, id uuid.UUID) (*domain.File, error)
|
||
// Create inserts a new file record and returns it.
|
||
Create(ctx context.Context, f *domain.File) (*domain.File, error)
|
||
// Update applies partial metadata changes and returns the updated record.
|
||
Update(ctx context.Context, id uuid.UUID, f *domain.File) (*domain.File, error)
|
||
// SetNeedsReview sets the review status on the given (non-trashed) files.
|
||
SetNeedsReview(ctx context.Context, ids []uuid.UUID, value bool) error
|
||
// SetPHash sets (or clears, when nil) the perceptual hash of a file.
|
||
SetPHash(ctx context.Context, id uuid.UUID, phash *int64) error
|
||
// ListMissingPHash returns live image/video files that have no perceptual
|
||
// hash yet (the dedup backfill work list).
|
||
ListMissingPHash(ctx context.Context) ([]domain.File, error)
|
||
// ListAllPHashes returns the id and perceptual hash of every live, hashed
|
||
// file (the global input to the dedup rescan; not ACL-filtered).
|
||
ListAllPHashes(ctx context.Context) ([]domain.PHashEntry, error)
|
||
// CopyPoolMemberships adds targetID to every pool sourceID belongs to,
|
||
// skipping pools target is already in (used by the duplicate merge).
|
||
CopyPoolMemberships(ctx context.Context, targetID, sourceID uuid.UUID) error
|
||
// SoftDelete moves a file to trash (sets is_deleted = true).
|
||
SoftDelete(ctx context.Context, id uuid.UUID) error
|
||
// Restore moves a file out of trash (sets is_deleted = false).
|
||
Restore(ctx context.Context, id uuid.UUID) (*domain.File, error)
|
||
// DeletePermanent removes a file record. Only allowed when is_deleted = true.
|
||
DeletePermanent(ctx context.Context, id uuid.UUID) error
|
||
|
||
// ListTags returns all tags assigned to a file.
|
||
ListTags(ctx context.Context, fileID uuid.UUID) ([]domain.Tag, error)
|
||
// SetTags replaces all tags on a file (full replace semantics).
|
||
SetTags(ctx context.Context, fileID uuid.UUID, tagIDs []uuid.UUID) error
|
||
|
||
// RecordView appends a view-history row (activity.file_views) for the user.
|
||
RecordView(ctx context.Context, fileID uuid.UUID, userID int16) error
|
||
// RecordTagUses logs the tags referenced in a filter DSL to
|
||
// activity.tag_uses, flagging each included or excluded. Best-effort
|
||
// analytics — callers may ignore the error.
|
||
RecordTagUses(ctx context.Context, userID int16, filterDSL string) error
|
||
}
|
||
|
||
// DuplicatePairRepo persists the precomputed near-duplicate candidate pairs.
|
||
type DuplicatePairRepo interface {
|
||
// ReplaceAll atomically replaces the whole pairs table (used by the rescan).
|
||
ReplaceAll(ctx context.Context, pairs []domain.DuplicatePair) error
|
||
// ListVisible returns pairs whose both files are live, not dismissed, and
|
||
// (for non-admins) visible to the viewer.
|
||
ListVisible(ctx context.Context, viewerID int16, isAdmin bool) ([]domain.DuplicatePair, error)
|
||
}
|
||
|
||
// DismissalRepo persists "not a duplicate" decisions.
|
||
type DismissalRepo interface {
|
||
// Add records a pair as dismissed (canonical order, idempotent).
|
||
Add(ctx context.Context, a, b uuid.UUID, userID int16) error
|
||
}
|
||
|
||
// TagRepo is the persistence interface for tags.
|
||
type TagRepo interface {
|
||
List(ctx context.Context, params OffsetParams) (*domain.TagOffsetPage, error)
|
||
// ListByCategory returns tags belonging to a specific category.
|
||
ListByCategory(ctx context.Context, categoryID uuid.UUID, params OffsetParams) (*domain.TagOffsetPage, error)
|
||
GetByID(ctx context.Context, id uuid.UUID) (*domain.Tag, error)
|
||
Create(ctx context.Context, t *domain.Tag) (*domain.Tag, error)
|
||
Update(ctx context.Context, id uuid.UUID, t *domain.Tag) (*domain.Tag, error)
|
||
Delete(ctx context.Context, id uuid.UUID) error
|
||
|
||
// ListByFile returns all tags assigned to a specific file, ordered by name.
|
||
ListByFile(ctx context.Context, fileID uuid.UUID) ([]domain.Tag, error)
|
||
// AddFileTag inserts a single file→tag relation. No-op if already present.
|
||
AddFileTag(ctx context.Context, fileID, tagID uuid.UUID) error
|
||
// RemoveFileTag deletes a single file→tag relation.
|
||
RemoveFileTag(ctx context.Context, fileID, tagID uuid.UUID) error
|
||
// SetFileTags replaces all tags on a file (full replace semantics).
|
||
SetFileTags(ctx context.Context, fileID uuid.UUID, tagIDs []uuid.UUID) error
|
||
// CommonTagsForFiles returns tags present on every one of the given files.
|
||
CommonTagsForFiles(ctx context.Context, fileIDs []uuid.UUID) ([]domain.Tag, error)
|
||
// PartialTagsForFiles returns tags present on some but not all of the given files.
|
||
PartialTagsForFiles(ctx context.Context, fileIDs []uuid.UUID) ([]domain.Tag, error)
|
||
}
|
||
|
||
// TagRuleRepo is the persistence interface for auto-tag rules.
|
||
type TagRuleRepo interface {
|
||
// ListByTag returns all rules where WhenTagID == tagID.
|
||
ListByTag(ctx context.Context, tagID uuid.UUID) ([]domain.TagRule, error)
|
||
Create(ctx context.Context, r domain.TagRule) (*domain.TagRule, error)
|
||
// SetActive toggles a rule's is_active flag. When active and applyToExisting
|
||
// are both true, the full transitive expansion of thenTagID is retroactively
|
||
// applied to all files that already carry whenTagID.
|
||
SetActive(ctx context.Context, whenTagID, thenTagID uuid.UUID, active, applyToExisting bool) error
|
||
// ApplyToExisting retroactively applies the full transitive expansion of
|
||
// thenTagID (following active rules) to every file that already carries
|
||
// whenTagID. Used when a rule is created or activated with apply_to_existing.
|
||
ApplyToExisting(ctx context.Context, whenTagID, thenTagID uuid.UUID) error
|
||
Delete(ctx context.Context, whenTagID, thenTagID uuid.UUID) error
|
||
}
|
||
|
||
// CategoryRepo is the persistence interface for categories.
|
||
type CategoryRepo interface {
|
||
List(ctx context.Context, params OffsetParams) (*domain.CategoryOffsetPage, error)
|
||
GetByID(ctx context.Context, id uuid.UUID) (*domain.Category, error)
|
||
Create(ctx context.Context, c *domain.Category) (*domain.Category, error)
|
||
Update(ctx context.Context, id uuid.UUID, c *domain.Category) (*domain.Category, error)
|
||
Delete(ctx context.Context, id uuid.UUID) error
|
||
}
|
||
|
||
// PoolRepo is the persistence interface for pools and pool–file membership.
|
||
type PoolRepo interface {
|
||
List(ctx context.Context, params OffsetParams) (*domain.PoolOffsetPage, error)
|
||
GetByID(ctx context.Context, id uuid.UUID) (*domain.Pool, error)
|
||
Create(ctx context.Context, p *domain.Pool) (*domain.Pool, error)
|
||
Update(ctx context.Context, id uuid.UUID, p *domain.Pool) (*domain.Pool, error)
|
||
Delete(ctx context.Context, id uuid.UUID) error
|
||
|
||
// ListFiles returns pool files ordered by position (cursor-based).
|
||
ListFiles(ctx context.Context, poolID uuid.UUID, params PoolFileListParams) (*domain.PoolFilePage, error)
|
||
// AddFiles appends files starting at position; nil position means append at end.
|
||
AddFiles(ctx context.Context, poolID uuid.UUID, fileIDs []uuid.UUID, position *int) error
|
||
// RemoveFiles removes files from the pool.
|
||
RemoveFiles(ctx context.Context, poolID uuid.UUID, fileIDs []uuid.UUID) error
|
||
// Reorder sets the full ordered sequence of file IDs in the pool.
|
||
Reorder(ctx context.Context, poolID uuid.UUID, fileIDs []uuid.UUID) error
|
||
|
||
// RecordView appends a view-history row (activity.pool_views) for the user.
|
||
RecordView(ctx context.Context, poolID uuid.UUID, userID int16) error
|
||
}
|
||
|
||
// UserRepo is the persistence interface for users.
|
||
type UserRepo interface {
|
||
List(ctx context.Context, params OffsetParams) (*domain.UserPage, error)
|
||
GetByID(ctx context.Context, id int16) (*domain.User, error)
|
||
// GetByName is used during login to look up credentials.
|
||
GetByName(ctx context.Context, name string) (*domain.User, error)
|
||
Create(ctx context.Context, u *domain.User) (*domain.User, error)
|
||
Update(ctx context.Context, id int16, u *domain.User) (*domain.User, error)
|
||
Delete(ctx context.Context, id int16) error
|
||
}
|
||
|
||
// SessionRepo is the persistence interface for auth sessions.
|
||
type SessionRepo interface {
|
||
// ListByUser returns all active sessions for a user.
|
||
ListByUser(ctx context.Context, userID int16) (*domain.SessionList, error)
|
||
// GetByID returns an active session by its ID, or ErrNotFound if it does not
|
||
// exist or has been deactivated.
|
||
GetByID(ctx context.Context, id int) (*domain.Session, error)
|
||
// GetByTokenHash looks up a session by the hashed refresh token.
|
||
GetByTokenHash(ctx context.Context, hash string) (*domain.Session, error)
|
||
Create(ctx context.Context, s *domain.Session) (*domain.Session, error)
|
||
// UpdateLastActivity refreshes the last_activity timestamp.
|
||
UpdateLastActivity(ctx context.Context, id int, t time.Time) error
|
||
// Delete terminates a single session.
|
||
Delete(ctx context.Context, id int) error
|
||
// DeleteByUserID terminates all sessions for a user (logout everywhere).
|
||
DeleteByUserID(ctx context.Context, userID int16) error
|
||
}
|
||
|
||
// ACLRepo is the persistence interface for per-object permissions.
|
||
type ACLRepo interface {
|
||
// List returns all permission entries for a given object.
|
||
List(ctx context.Context, objectTypeID int16, objectID uuid.UUID) ([]domain.Permission, error)
|
||
// Get returns the permission entry for a specific user and object; returns
|
||
// ErrNotFound if no entry exists.
|
||
Get(ctx context.Context, userID int16, objectTypeID int16, objectID uuid.UUID) (*domain.Permission, error)
|
||
// Set replaces all permissions for an object (full replace semantics).
|
||
Set(ctx context.Context, objectTypeID int16, objectID uuid.UUID, perms []domain.Permission) error
|
||
}
|
||
|
||
// AuditRepo is the persistence interface for the audit log.
|
||
type AuditRepo interface {
|
||
Log(ctx context.Context, entry domain.AuditEntry) error
|
||
List(ctx context.Context, filter domain.AuditFilter) (*domain.AuditPage, error)
|
||
}
|
||
|
||
// MimeRepo is the persistence interface for the MIME type whitelist.
|
||
type MimeRepo interface {
|
||
// List returns all supported MIME types.
|
||
List(ctx context.Context) ([]domain.MIMEType, error)
|
||
// GetByName returns the MIME type record for a given MIME name (e.g. "image/jpeg").
|
||
// Returns ErrUnsupportedMIME if not in the whitelist.
|
||
GetByName(ctx context.Context, name string) (*domain.MIMEType, error)
|
||
}
|