diff --git a/backend/cmd/server/main.go b/backend/cmd/server/main.go
index 1db842a..0ca0195 100644
--- a/backend/cmd/server/main.go
+++ b/backend/cmd/server/main.go
@@ -70,6 +70,8 @@ func main() {
 	tagRuleRepo := postgres.NewTagRuleRepo(pool)
 	categoryRepo := postgres.NewCategoryRepo(pool)
 	poolRepo := postgres.NewPoolRepo(pool)
+	duplicatePairRepo := postgres.NewDuplicatePairRepo(pool)
+	dismissalRepo := postgres.NewDismissalRepo(pool)
 	transactor := postgres.NewTransactor(pool)
 
 	// Services
@@ -86,6 +88,9 @@ func main() {
 	tagSvc := service.NewTagService(tagRepo, tagRuleRepo, aclSvc, auditSvc, transactor)
 	categorySvc := service.NewCategoryService(categoryRepo, tagRepo, aclSvc, auditSvc)
 	poolSvc := service.NewPoolService(poolRepo, aclSvc, auditSvc)
+	duplicateSvc := service.NewDuplicateService(
+		fileRepo, duplicatePairRepo, dismissalRepo, aclSvc, auditSvc, transactor, cfg.DuplicateHashThreshold,
+	)
 	fileSvc := service.NewFileService(
 		fileRepo,
 		mimeRepo,
@@ -108,6 +113,7 @@ func main() {
 	authMiddleware := handler.NewAuthMiddleware(authSvc)
 	authHandler := handler.NewAuthHandler(authSvc)
 	fileHandler := handler.NewFileHandler(fileSvc, tagSvc, authSvc, cfg.MaxUploadBytes)
+	duplicateHandler := handler.NewDuplicateHandler(duplicateSvc)
 	tagHandler := handler.NewTagHandler(tagSvc, fileSvc)
 	categoryHandler := handler.NewCategoryHandler(categorySvc)
 	poolHandler := handler.NewPoolHandler(poolSvc)
@@ -117,7 +123,7 @@ func main() {
 
 	r, err := handler.NewRouter(
 		authMiddleware, authHandler,
-		fileHandler, tagHandler, categoryHandler, poolHandler,
+		fileHandler, duplicateHandler, tagHandler, categoryHandler, poolHandler,
 		userHandler, aclHandler, auditHandler,
 		cfg.StaticDir,
 		cfg.TrustedProxies,
diff --git a/backend/internal/db/postgres/duplicate_repo.go b/backend/internal/db/postgres/duplicate_repo.go
new file mode 100644
index 0000000..650292b
--- /dev/null
+++ b/backend/internal/db/postgres/duplicate_repo.go
@@ -0,0 +1,145 @@
+package postgres
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+
+	"github.com/google/uuid"
+	"github.com/jackc/pgx/v5"
+	"github.com/jackc/pgx/v5/pgxpool"
+
+	"tanabata/backend/internal/domain"
+	"tanabata/backend/internal/port"
+)
+
+// ---------------------------------------------------------------------------
+// DuplicatePairRepo
+// ---------------------------------------------------------------------------
+
+// DuplicatePairRepo implements port.DuplicatePairRepo using PostgreSQL.
+type DuplicatePairRepo struct {
+	pool *pgxpool.Pool
+}
+
+// NewDuplicatePairRepo creates a DuplicatePairRepo backed by pool.
+func NewDuplicatePairRepo(pool *pgxpool.Pool) *DuplicatePairRepo {
+	return &DuplicatePairRepo{pool: pool}
+}
+
+var _ port.DuplicatePairRepo = (*DuplicatePairRepo)(nil)
+
+// ReplaceAll atomically replaces the entire pairs table with the given set.
+// The rescan recomputes pairs from scratch, so a full DELETE + COPY is both
+// correct and the simplest way to drop pairs that no longer qualify.
+func (r *DuplicatePairRepo) ReplaceAll(ctx context.Context, pairs []domain.DuplicatePair) error {
+	tx, err := r.pool.Begin(ctx)
+	if err != nil {
+		return fmt.Errorf("DuplicatePairRepo.ReplaceAll begin: %w", err)
+	}
+	defer tx.Rollback(ctx) //nolint:errcheck // no-op after a successful commit
+
+	if _, err := tx.Exec(ctx, `DELETE FROM data.duplicate_pairs`); err != nil {
+		return fmt.Errorf("DuplicatePairRepo.ReplaceAll delete: %w", err)
+	}
+
+	if len(pairs) > 0 {
+		rows := make([][]any, len(pairs))
+		for i, p := range pairs {
+			rows[i] = []any{p.FileA, p.FileB, int16(p.Distance)}
+		}
+		if _, err := tx.CopyFrom(ctx,
+			pgx.Identifier{"data", "duplicate_pairs"},
+			[]string{"file_a", "file_b", "distance"},
+			pgx.CopyFromRows(rows),
+		); err != nil {
+			return fmt.Errorf("DuplicatePairRepo.ReplaceAll copy: %w", err)
+		}
+	}
+
+	if err := tx.Commit(ctx); err != nil {
+		return fmt.Errorf("DuplicatePairRepo.ReplaceAll commit: %w", err)
+	}
+	return nil
+}
+
+type pairRow struct {
+	FileA    uuid.UUID `db:"file_a"`
+	FileB    uuid.UUID `db:"file_b"`
+	Distance int16     `db:"distance"`
+}
+
+// ListVisible returns every stored pair where both files are live (not trashed),
+// the pair is not dismissed, and — for non-admins — both files are visible to the
+// viewer under the private-by-default model. This is the input to clustering.
+func (r *DuplicatePairRepo) ListVisible(ctx context.Context, viewerID int16, isAdmin bool) ([]domain.DuplicatePair, error) {
+	args := make([]any, 0, 4)
+	n := 1
+	aclWhere := ""
+	if !isAdmin {
+		var ca, cb string
+		ca, n, args = aclVisibilityCond("fa", objTypeFile, viewerID, n, args)
+		cb, n, args = aclVisibilityCond("fb", objTypeFile, viewerID, n, args)
+		aclWhere = "AND " + ca + " AND " + cb
+	}
+
+	sqlStr := fmt.Sprintf(`
+        SELECT p.file_a, p.file_b, p.distance
+        FROM data.duplicate_pairs p
+        JOIN data.files fa ON fa.id = p.file_a AND fa.is_deleted = false
+        JOIN data.files fb ON fb.id = p.file_b AND fb.is_deleted = false
+        WHERE NOT EXISTS (
+            SELECT 1 FROM data.duplicate_dismissals d
+            WHERE d.file_a = p.file_a AND d.file_b = p.file_b
+        )
+        %s
+        ORDER BY p.file_a, p.file_b`, aclWhere)
+
+	rows, err := r.pool.Query(ctx, sqlStr, args...)
+	if err != nil {
+		return nil, fmt.Errorf("DuplicatePairRepo.ListVisible: %w", err)
+	}
+	collected, err := pgx.CollectRows(rows, pgx.RowToStructByName[pairRow])
+	if err != nil {
+		return nil, fmt.Errorf("DuplicatePairRepo.ListVisible scan: %w", err)
+	}
+	out := make([]domain.DuplicatePair, len(collected))
+	for i, row := range collected {
+		out[i] = domain.DuplicatePair{FileA: row.FileA, FileB: row.FileB, Distance: int(row.Distance)}
+	}
+	return out, nil
+}
+
+// ---------------------------------------------------------------------------
+// DismissalRepo
+// ---------------------------------------------------------------------------
+
+// DismissalRepo implements port.DismissalRepo using PostgreSQL.
+type DismissalRepo struct {
+	pool *pgxpool.Pool
+}
+
+// NewDismissalRepo creates a DismissalRepo backed by pool.
+func NewDismissalRepo(pool *pgxpool.Pool) *DismissalRepo {
+	return &DismissalRepo{pool: pool}
+}
+
+var _ port.DismissalRepo = (*DismissalRepo)(nil)
+
+// Add records a pair as "not a duplicate". The two ids are stored in canonical
+// (file_a < file_b) order to match the table's CHECK and avoid (a,b)/(b,a)
+// duplicates; a repeated dismissal is a no-op.
+func (r *DismissalRepo) Add(ctx context.Context, a, b uuid.UUID, userID int16) error {
+	if bytes.Compare(a[:], b[:]) > 0 {
+		a, b = b, a
+	}
+	const sqlStr = `
+        INSERT INTO data.duplicate_dismissals (file_a, file_b, dismissed_by)
+        VALUES ($1, $2, $3)
+        ON CONFLICT (file_a, file_b) DO NOTHING`
+	q := connOrTx(ctx, r.pool)
+	if _, err := q.Exec(ctx, sqlStr, a, b, userID); err != nil {
+		return fmt.Errorf("DismissalRepo.Add: %w", err)
+	}
+	return nil
+}
diff --git a/backend/internal/db/postgres/file_repo.go b/backend/internal/db/postgres/file_repo.go
index 22d9990..acff0d7 100644
--- a/backend/internal/db/postgres/file_repo.go
+++ b/backend/internal/db/postgres/file_repo.go
@@ -446,6 +446,89 @@ func (r *FileRepo) SetPHash(ctx context.Context, id uuid.UUID, phash *int64) err
 	return nil
 }
 
+// ---------------------------------------------------------------------------
+// Perceptual-hash / duplicate support
+// ---------------------------------------------------------------------------
+
+// ListMissingPHash returns live image/video files that have no perceptual hash
+// yet — the work list for the dedup backfill. Tags are not loaded (the backfill
+// only needs the id and MIME type to choose image vs video hashing).
+func (r *FileRepo) ListMissingPHash(ctx context.Context) ([]domain.File, error) {
+	const sqlStr = `
+        SELECT f.id, f.original_name,
+               mt.name AS mime_type, mt.extension AS mime_extension,
+               f.content_datetime, f.notes, f.metadata, f.exif, f.phash,
+               f.creator_id, u.name AS creator_name,
+               f.is_public, f.is_deleted, f.needs_review
+        FROM data.files f
+        JOIN core.mime_types mt ON mt.id = f.mime_id
+        JOIN core.users u       ON u.id  = f.creator_id
+        WHERE f.phash IS NULL AND f.is_deleted = false
+          AND (mt.name LIKE 'image/%' OR mt.name LIKE 'video/%')
+        ORDER BY f.id`
+
+	q := connOrTx(ctx, r.pool)
+	rows, err := q.Query(ctx, sqlStr)
+	if err != nil {
+		return nil, fmt.Errorf("FileRepo.ListMissingPHash: %w", err)
+	}
+	collected, err := pgx.CollectRows(rows, pgx.RowToStructByName[fileRow])
+	if err != nil {
+		return nil, fmt.Errorf("FileRepo.ListMissingPHash scan: %w", err)
+	}
+	files := make([]domain.File, len(collected))
+	for i, row := range collected {
+		files[i] = toFile(row)
+	}
+	return files, nil
+}
+
+// phashRow is the minimal projection used to build duplicate clusters.
+type phashRow struct {
+	ID    uuid.UUID `db:"id"`
+	PHash int64     `db:"phash"`
+}
+
+// ListAllPHashes returns the id and perceptual hash of every live, hashed file.
+// It is the global input to the dedup rescan, so it deliberately ignores ACL —
+// the rescan builds the shared pairs table; visibility is enforced on read.
+func (r *FileRepo) ListAllPHashes(ctx context.Context) ([]domain.PHashEntry, error) {
+	const sqlStr = `SELECT id, phash FROM data.files WHERE is_deleted = false AND phash IS NOT NULL`
+	q := connOrTx(ctx, r.pool)
+	rows, err := q.Query(ctx, sqlStr)
+	if err != nil {
+		return nil, fmt.Errorf("FileRepo.ListAllPHashes: %w", err)
+	}
+	collected, err := pgx.CollectRows(rows, pgx.RowToStructByName[phashRow])
+	if err != nil {
+		return nil, fmt.Errorf("FileRepo.ListAllPHashes scan: %w", err)
+	}
+	out := make([]domain.PHashEntry, len(collected))
+	for i, row := range collected {
+		out[i] = domain.PHashEntry{ID: row.ID, PHash: row.PHash}
+	}
+	return out, nil
+}
+
+// CopyPoolMemberships adds targetID to every pool sourceID belongs to (copying
+// the source's position), skipping pools the target is already in. Used by the
+// duplicate merge to preserve the discarded file's pool memberships on the
+// survivor. The merge is authorised at the file level, so pool ACL is not
+// re-checked here.
+func (r *FileRepo) CopyPoolMemberships(ctx context.Context, targetID, sourceID uuid.UUID) error {
+	const sqlStr = `
+        INSERT INTO data.file_pool (file_id, pool_id, position)
+        SELECT $1, fp.pool_id, fp.position
+        FROM data.file_pool fp
+        WHERE fp.file_id = $2
+        ON CONFLICT (file_id, pool_id) DO NOTHING`
+	q := connOrTx(ctx, r.pool)
+	if _, err := q.Exec(ctx, sqlStr, targetID, sourceID); err != nil {
+		return fmt.Errorf("FileRepo.CopyPoolMemberships: %w", err)
+	}
+	return nil
+}
+
 // ---------------------------------------------------------------------------
 // SoftDelete / Restore / DeletePermanent
 // ---------------------------------------------------------------------------
diff --git a/backend/internal/domain/duplicate.go b/backend/internal/domain/duplicate.go
new file mode 100644
index 0000000..8853096
--- /dev/null
+++ b/backend/internal/domain/duplicate.go
@@ -0,0 +1,18 @@
+package domain
+
+import "github.com/google/uuid"
+
+// PHashEntry is a file's perceptual hash, the input to duplicate clustering.
+type PHashEntry struct {
+	ID    uuid.UUID
+	PHash int64
+}
+
+// DuplicatePair is an unordered pair of files whose perceptual hashes are within
+// the configured Hamming threshold. FileA < FileB by UUID byte order (canonical),
+// so a pair is represented exactly once.
+type DuplicatePair struct {
+	FileA    uuid.UUID
+	FileB    uuid.UUID
+	Distance int
+}
diff --git a/backend/internal/handler/duplicate_handler.go b/backend/internal/handler/duplicate_handler.go
new file mode 100644
index 0000000..7018e59
--- /dev/null
+++ b/backend/internal/handler/duplicate_handler.go
@@ -0,0 +1,122 @@
+package handler
+
+import (
+	"net/http"
+	"strconv"
+
+	"github.com/gin-gonic/gin"
+
+	"tanabata/backend/internal/domain"
+	"tanabata/backend/internal/service"
+)
+
+// DuplicateHandler handles the /files/duplicates endpoints.
+type DuplicateHandler struct {
+	dupSvc *service.DuplicateService
+}
+
+// NewDuplicateHandler creates a DuplicateHandler.
+func NewDuplicateHandler(dupSvc *service.DuplicateService) *DuplicateHandler {
+	return &DuplicateHandler{dupSvc: dupSvc}
+}
+
+// List handles GET /files/duplicates — an offset-paginated list of duplicate
+// clusters, each a group of files within the perceptual-hash threshold.
+func (h *DuplicateHandler) List(c *gin.Context) {
+	limit, offset := 20, 0
+	if n, err := strconv.Atoi(c.Query("limit")); err == nil {
+		limit = n
+	}
+	if n, err := strconv.Atoi(c.Query("offset")); err == nil {
+		offset = n
+	}
+	if limit < 1 {
+		limit = 1
+	}
+	if limit > 50 {
+		limit = 50
+	}
+	if offset < 0 {
+		offset = 0
+	}
+
+	clusters, total, err := h.dupSvc.Clusters(c.Request.Context(), limit, offset)
+	if err != nil {
+		respondError(c, err)
+		return
+	}
+
+	items := make([]gin.H, len(clusters))
+	for i, files := range clusters {
+		fs := make([]fileJSON, len(files))
+		for j, f := range files {
+			fs[j] = toFileJSON(f)
+		}
+		items[i] = gin.H{"files": fs}
+	}
+	respondJSON(c, http.StatusOK, gin.H{
+		"items":  items,
+		"total":  total,
+		"limit":  limit,
+		"offset": offset,
+	})
+}
+
+// Dismiss handles POST /files/duplicates/dismiss — mark a pair "not a duplicate".
+func (h *DuplicateHandler) Dismiss(c *gin.Context) {
+	var body struct {
+		FileIDA string `json:"file_id_a" binding:"required"`
+		FileIDB string `json:"file_id_b" binding:"required"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		respondError(c, domain.ErrValidation)
+		return
+	}
+	ids, err := parseUUIDs([]string{body.FileIDA, body.FileIDB})
+	if err != nil {
+		respondError(c, domain.ErrValidation)
+		return
+	}
+	if err := h.dupSvc.Dismiss(c.Request.Context(), ids[0], ids[1]); err != nil {
+		respondError(c, err)
+		return
+	}
+	c.Status(http.StatusNoContent)
+}
+
+// Resolve handles POST /files/duplicates/resolve — merge a duplicate pair,
+// keeping one file and folding the chosen fields in from the other. Returns the
+// updated survivor. delete_discarded defaults to true.
+func (h *DuplicateHandler) Resolve(c *gin.Context) {
+	var body struct {
+		Keep            string              `json:"keep"    binding:"required"`
+		Discard         string              `json:"discard" binding:"required"`
+		Fields          service.MergeFields `json:"fields"`
+		DeleteDiscarded *bool               `json:"delete_discarded"`
+	}
+	if err := c.ShouldBindJSON(&body); err != nil {
+		respondError(c, domain.ErrValidation)
+		return
+	}
+	ids, err := parseUUIDs([]string{body.Keep, body.Discard})
+	if err != nil {
+		respondError(c, domain.ErrValidation)
+		return
+	}
+
+	del := true
+	if body.DeleteDiscarded != nil {
+		del = *body.DeleteDiscarded
+	}
+	f, err := h.dupSvc.Resolve(c.Request.Context(), service.MergeSpec{
+		Keep:            ids[0],
+		Discard:         ids[1],
+		Fields:          body.Fields,
+		DeleteDiscarded: del,
+	})
+	if err != nil {
+		respondError(c, err)
+		return
+	}
+	respondJSON(c, http.StatusOK, toFileJSON(*f))
+}
diff --git a/backend/internal/handler/router.go b/backend/internal/handler/router.go
index 85b5a5b..041cae7 100644
--- a/backend/internal/handler/router.go
+++ b/backend/internal/handler/router.go
@@ -26,6 +26,7 @@ func NewRouter(
 	auth *AuthMiddleware,
 	authHandler *AuthHandler,
 	fileHandler *FileHandler,
+	duplicateHandler *DuplicateHandler,
 	tagHandler *TagHandler,
 	categoryHandler *CategoryHandler,
 	poolHandler *PoolHandler,
@@ -80,7 +81,11 @@ func NewRouter(
 		files.GET("", fileHandler.List)
 		files.POST("", fileHandler.Upload)
 
-		// Bulk + import routes registered before /:id to prevent param collision.
+		// Bulk + import + duplicates routes registered before /:id to prevent
+		// param collision (e.g. "duplicates" being captured as :id).
+		files.GET("/duplicates", duplicateHandler.List)
+		files.POST("/duplicates/dismiss", duplicateHandler.Dismiss)
+		files.POST("/duplicates/resolve", duplicateHandler.Resolve)
 		files.POST("/bulk/tags", fileHandler.BulkSetTags)
 		files.POST("/bulk/delete", fileHandler.BulkDelete)
 		files.POST("/bulk/review", fileHandler.BulkReview)
diff --git a/backend/internal/handler/router_test.go b/backend/internal/handler/router_test.go
index aaba7e3..8a4e042 100644
--- a/backend/internal/handler/router_test.go
+++ b/backend/internal/handler/router_test.go
@@ -10,7 +10,7 @@ import "testing"
 func TestNewRouterRegisters(t *testing.T) {
 	r, err := NewRouter(
 		(*AuthMiddleware)(nil), (*AuthHandler)(nil),
-		(*FileHandler)(nil), (*TagHandler)(nil), (*CategoryHandler)(nil), (*PoolHandler)(nil),
+		(*FileHandler)(nil), (*DuplicateHandler)(nil), (*TagHandler)(nil), (*CategoryHandler)(nil), (*PoolHandler)(nil),
 		(*UserHandler)(nil), (*ACLHandler)(nil), (*AuditHandler)(nil),
 		"", nil,
 	)
diff --git a/backend/internal/integration/server_test.go b/backend/internal/integration/server_test.go
index 549b676..ae65a0c 100644
--- a/backend/internal/integration/server_test.go
+++ b/backend/internal/integration/server_test.go
@@ -56,6 +56,9 @@ type harness struct {
 	client    *http.Client
 	importDir string
 	pool      *pgxpool.Pool
+	// dupSvc lets duplicate tests trigger a pairs rescan directly: rebuilding the
+	// pairs table is a CLI/maintenance action with no HTTP endpoint.
+	dupSvc *service.DuplicateService
 }
 
 // setupSuite creates an ephemeral database, runs migrations, wires the full
@@ -125,6 +128,8 @@ func setupSuite(t *testing.T) *harness {
 	tagRuleRepo := postgres.NewTagRuleRepo(pool)
 	categoryRepo := postgres.NewCategoryRepo(pool)
 	poolRepo := postgres.NewPoolRepo(pool)
+	duplicatePairRepo := postgres.NewDuplicatePairRepo(pool)
+	dismissalRepo := postgres.NewDismissalRepo(pool)
 	transactor := postgres.NewTransactor(pool)
 
 	// --- Services ------------------------------------------------------------
@@ -134,6 +139,7 @@ func setupSuite(t *testing.T) *harness {
 	tagSvc := service.NewTagService(tagRepo, tagRuleRepo, aclSvc, auditSvc, transactor)
 	categorySvc := service.NewCategoryService(categoryRepo, tagRepo, aclSvc, auditSvc)
 	poolSvc := service.NewPoolService(poolRepo, aclSvc, auditSvc)
+	duplicateSvc := service.NewDuplicateService(fileRepo, duplicatePairRepo, dismissalRepo, aclSvc, auditSvc, transactor, 10)
 	fileSvc := service.NewFileService(fileRepo, mimeRepo, diskStorage, aclSvc, auditSvc, tagSvc, transactor, importDir)
 	userSvc := service.NewUserService(userRepo, sessionRepo, auditSvc)
 
@@ -145,6 +151,7 @@ func setupSuite(t *testing.T) *harness {
 	authMiddleware := handler.NewAuthMiddleware(authSvc)
 	authHandler := handler.NewAuthHandler(authSvc)
 	fileHandler := handler.NewFileHandler(fileSvc, tagSvc, authSvc, 500<<20)
+	duplicateHandler := handler.NewDuplicateHandler(duplicateSvc)
 	tagHandler := handler.NewTagHandler(tagSvc, fileSvc)
 	categoryHandler := handler.NewCategoryHandler(categorySvc)
 	poolHandler := handler.NewPoolHandler(poolSvc)
@@ -154,7 +161,7 @@ func setupSuite(t *testing.T) *harness {
 
 	r, err := handler.NewRouter(
 		authMiddleware, authHandler,
-		fileHandler, tagHandler, categoryHandler, poolHandler,
+		fileHandler, duplicateHandler, tagHandler, categoryHandler, poolHandler,
 		userHandler, aclHandler, auditHandler,
 		"",
 		nil,
@@ -170,6 +177,7 @@ func setupSuite(t *testing.T) *harness {
 		client:    srv.Client(),
 		importDir: importDir,
 		pool:      pool,
+		dupSvc:    duplicateSvc,
 	}
 }
 
@@ -1643,3 +1651,103 @@ var (
 	_ = freePort
 	_ = writeFile
 )
+
+// dupListResponse decodes GET /files/duplicates.
+type dupListResponse struct {
+	Items []struct {
+		Files []struct {
+			ID   string `json:"id"`
+			Tags []struct {
+				ID string `json:"id"`
+			} `json:"tags"`
+		} `json:"files"`
+	} `json:"items"`
+	Total int `json:"total"`
+}
+
+// TestDuplicateDetection exercises the full duplicate lifecycle: perceptual hashes
+// are computed on upload, a rescan builds the pairs table, the cluster surfaces,
+// a field-by-field merge unions tags and trashes the discarded file, and a
+// dismissal hides a pair permanently (surviving a re-rescan).
+//
+// minimalJPEG() is a 1×1 image, so every upload hashes identically — in a fresh
+// database any two uploads form one duplicate pair, which keeps this deterministic.
+func TestDuplicateDetection(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping integration test in short mode")
+	}
+
+	h := setupSuite(t)
+	ctx := context.Background()
+	admin := h.login("admin", "admin")
+
+	// --- two uploads => one duplicate pair after a rescan ---------------------
+	f1 := h.uploadJPEG(admin, "a.jpg")["id"].(string)
+	f2 := h.uploadJPEG(admin, "b.jpg")["id"].(string)
+
+	// Tag f2 so the merge has something to union onto the survivor.
+	resp := h.doJSON("POST", "/tags", map[string]any{"name": "kept", "is_public": true}, admin)
+	require.Equal(t, http.StatusCreated, resp.StatusCode, resp.String())
+	var tag map[string]any
+	resp.decode(t, &tag)
+	tagID := tag["id"].(string)
+	resp = h.doJSON("PUT", "/files/"+f2+"/tags", map[string]any{"tag_ids": []string{tagID}}, admin)
+	require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
+
+	require.NoError(t, h.dupSvc.Rescan(ctx, nil))
+
+	resp = h.doJSON("GET", "/files/duplicates", nil, admin)
+	require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
+	var list dupListResponse
+	resp.decode(t, &list)
+	require.Equal(t, 1, list.Total, "expected one duplicate cluster: %s", resp)
+	require.Len(t, list.Items, 1)
+	require.Len(t, list.Items[0].Files, 2)
+
+	// --- resolve: keep f1, union tags from f2, trash f2 ----------------------
+	resp = h.doJSON("POST", "/files/duplicates/resolve", map[string]any{
+		"keep":             f1,
+		"discard":          f2,
+		"fields":           map[string]any{"tags": "both"},
+		"delete_discarded": true,
+	}, admin)
+	require.Equal(t, http.StatusOK, resp.StatusCode, resp.String())
+	var survivor struct {
+		ID   string `json:"id"`
+		Tags []struct {
+			ID string `json:"id"`
+		} `json:"tags"`
+	}
+	resp.decode(t, &survivor)
+	assert.Equal(t, f1, survivor.ID)
+	require.Len(t, survivor.Tags, 1, "survivor should have inherited the discarded file's tag")
+	assert.Equal(t, tagID, survivor.Tags[0].ID)
+
+	// f2 is now trashed, so the pair drops out of the duplicates view.
+	resp = h.doJSON("GET", "/files/duplicates", nil, admin)
+	resp.decode(t, &list)
+	assert.Equal(t, 0, list.Total, "resolved pair should no longer surface: %s", resp)
+
+	// --- dismiss: a new pair, hidden and staying hidden across a rescan -------
+	f3 := h.uploadJPEG(admin, "c.jpg")["id"].(string)
+	require.NoError(t, h.dupSvc.Rescan(ctx, nil))
+
+	resp = h.doJSON("GET", "/files/duplicates", nil, admin)
+	resp.decode(t, &list)
+	require.Equal(t, 1, list.Total, "f1 and f3 should now form a cluster: %s", resp)
+
+	resp = h.doJSON("POST", "/files/duplicates/dismiss", map[string]any{
+		"file_id_a": f1, "file_id_b": f3,
+	}, admin)
+	require.Equal(t, http.StatusNoContent, resp.StatusCode, resp.String())
+
+	resp = h.doJSON("GET", "/files/duplicates", nil, admin)
+	resp.decode(t, &list)
+	assert.Equal(t, 0, list.Total, "dismissed pair should be hidden")
+
+	// A rescan re-finds the pair but the dismissal still hides it.
+	require.NoError(t, h.dupSvc.Rescan(ctx, nil))
+	resp = h.doJSON("GET", "/files/duplicates", nil, admin)
+	resp.decode(t, &list)
+	assert.Equal(t, 0, list.Total, "dismissal must survive a rescan")
+}
diff --git a/backend/internal/port/repository.go b/backend/internal/port/repository.go
index 48d7cc9..524b739 100644
--- a/backend/internal/port/repository.go
+++ b/backend/internal/port/repository.go
@@ -52,6 +52,15 @@ type FileRepo interface {
 	SetNeedsReview(ctx context.Context, ids []uuid.UUID, value bool) error
 	// SetPHash sets (or clears, when nil) the perceptual hash of a file.
 	SetPHash(ctx context.Context, id uuid.UUID, phash *int64) error
+	// ListMissingPHash returns live image/video files that have no perceptual
+	// hash yet (the dedup backfill work list).
+	ListMissingPHash(ctx context.Context) ([]domain.File, error)
+	// ListAllPHashes returns the id and perceptual hash of every live, hashed
+	// file (the global input to the dedup rescan; not ACL-filtered).
+	ListAllPHashes(ctx context.Context) ([]domain.PHashEntry, error)
+	// CopyPoolMemberships adds targetID to every pool sourceID belongs to,
+	// skipping pools target is already in (used by the duplicate merge).
+	CopyPoolMemberships(ctx context.Context, targetID, sourceID uuid.UUID) error
 	// SoftDelete moves a file to trash (sets is_deleted = true).
 	SoftDelete(ctx context.Context, id uuid.UUID) error
 	// Restore moves a file out of trash (sets is_deleted = false).
@@ -72,6 +81,21 @@ type FileRepo interface {
 	RecordTagUses(ctx context.Context, userID int16, filterDSL string) error
 }
 
+// DuplicatePairRepo persists the precomputed near-duplicate candidate pairs.
+type DuplicatePairRepo interface {
+	// ReplaceAll atomically replaces the whole pairs table (used by the rescan).
+	ReplaceAll(ctx context.Context, pairs []domain.DuplicatePair) error
+	// ListVisible returns pairs whose both files are live, not dismissed, and
+	// (for non-admins) visible to the viewer.
+	ListVisible(ctx context.Context, viewerID int16, isAdmin bool) ([]domain.DuplicatePair, error)
+}
+
+// DismissalRepo persists "not a duplicate" decisions.
+type DismissalRepo interface {
+	// Add records a pair as dismissed (canonical order, idempotent).
+	Add(ctx context.Context, a, b uuid.UUID, userID int16) error
+}
+
 // TagRepo is the persistence interface for tags.
 type TagRepo interface {
 	List(ctx context.Context, params OffsetParams) (*domain.TagOffsetPage, error)
diff --git a/backend/internal/service/duplicate_index.go b/backend/internal/service/duplicate_index.go
new file mode 100644
index 0000000..475e088
--- /dev/null
+++ b/backend/internal/service/duplicate_index.go
@@ -0,0 +1,148 @@
+package service
+
+import (
+	"bytes"
+	"math/bits"
+	"sort"
+
+	"github.com/google/uuid"
+
+	"tanabata/backend/internal/domain"
+)
+
+// hamming returns the number of differing bits between two perceptual hashes.
+func hamming(a, b uint64) int { return bits.OnesCount64(a ^ b) }
+
+// bkNode is a node in a BK-tree over Hamming distance. Files that share the exact
+// same hash are collected in ids (a distance-0 collision), so identical images
+// don't degenerate the tree into a chain.
+type bkNode struct {
+	hash     uint64
+	ids      []uuid.UUID
+	children map[int]*bkNode
+}
+
+// bkTree indexes perceptual hashes for sublinear radius queries. Building one and
+// querying every element with a small radius is far cheaper than the O(N²) all-
+// pairs comparison at 100k+ files.
+type bkTree struct{ root *bkNode }
+
+func (t *bkTree) insert(hash uint64, id uuid.UUID) {
+	if t.root == nil {
+		t.root = &bkNode{hash: hash, ids: []uuid.UUID{id}, children: map[int]*bkNode{}}
+		return
+	}
+	node := t.root
+	for {
+		d := hamming(hash, node.hash)
+		if d == 0 {
+			node.ids = append(node.ids, id)
+			return
+		}
+		child, ok := node.children[d]
+		if !ok {
+			node.children[d] = &bkNode{hash: hash, ids: []uuid.UUID{id}, children: map[int]*bkNode{}}
+			return
+		}
+		node = child
+	}
+}
+
+// query visits every node whose hash is within radius of target. The triangle
+// inequality bounds which children can hold a match to [d-radius, d+radius].
+func (t *bkTree) query(target uint64, radius int, visit func(node *bkNode, dist int)) {
+	if t.root == nil {
+		return
+	}
+	stack := []*bkNode{t.root}
+	for len(stack) > 0 {
+		node := stack[len(stack)-1]
+		stack = stack[:len(stack)-1]
+
+		d := hamming(target, node.hash)
+		if d <= radius {
+			visit(node, d)
+		}
+		lo, hi := d-radius, d+radius
+		for cd, child := range node.children {
+			if cd >= lo && cd <= hi {
+				stack = append(stack, child)
+			}
+		}
+	}
+}
+
+// buildPairs returns every unordered pair of files whose hashes are within
+// threshold, each emitted exactly once with FileA < FileB (UUID byte order).
+// onProgress, if set, is called periodically with (processed, total).
+func buildPairs(entries []domain.PHashEntry, threshold int, onProgress func(done, total int)) []domain.DuplicatePair {
+	tree := &bkTree{}
+	for _, e := range entries {
+		tree.insert(uint64(e.PHash), e.ID)
+	}
+
+	var pairs []domain.DuplicatePair
+	total := len(entries)
+	for i := range entries {
+		e := entries[i]
+		tree.query(uint64(e.PHash), threshold, func(node *bkNode, dist int) {
+			for _, other := range node.ids {
+				// Emit each pair once, from the smaller id, which also skips self.
+				if bytes.Compare(e.ID[:], other[:]) < 0 {
+					pairs = append(pairs, domain.DuplicatePair{FileA: e.ID, FileB: other, Distance: dist})
+				}
+			}
+		})
+		if onProgress != nil && (i+1)%1000 == 0 {
+			onProgress(i+1, total)
+		}
+	}
+	if onProgress != nil {
+		onProgress(total, total)
+	}
+	return pairs
+}
+
+// clusterPairs groups pairs into connected components (transitive closure) via
+// union-find. Every returned cluster has at least two files; clusters and the ids
+// within them are sorted by UUID for stable pagination.
+func clusterPairs(pairs []domain.DuplicatePair) [][]uuid.UUID {
+	parent := map[uuid.UUID]uuid.UUID{}
+	var find func(uuid.UUID) uuid.UUID
+	find = func(x uuid.UUID) uuid.UUID {
+		p, ok := parent[x]
+		if !ok {
+			parent[x] = x
+			return x
+		}
+		if p != x {
+			parent[x] = find(p)
+		}
+		return parent[x]
+	}
+	union := func(a, b uuid.UUID) {
+		ra, rb := find(a), find(b)
+		if ra != rb {
+			parent[ra] = rb
+		}
+	}
+	for _, p := range pairs {
+		union(p.FileA, p.FileB)
+	}
+
+	groups := map[uuid.UUID][]uuid.UUID{}
+	for node := range parent {
+		root := find(node)
+		groups[root] = append(groups[root], node)
+	}
+
+	clusters := make([][]uuid.UUID, 0, len(groups))
+	for _, ids := range groups {
+		sort.Slice(ids, func(i, j int) bool { return bytes.Compare(ids[i][:], ids[j][:]) < 0 })
+		clusters = append(clusters, ids)
+	}
+	sort.Slice(clusters, func(i, j int) bool {
+		return bytes.Compare(clusters[i][0][:], clusters[j][0][:]) < 0
+	})
+	return clusters
+}
diff --git a/backend/internal/service/duplicate_service.go b/backend/internal/service/duplicate_service.go
new file mode 100644
index 0000000..b076ef2
--- /dev/null
+++ b/backend/internal/service/duplicate_service.go
@@ -0,0 +1,361 @@
+package service
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"time"
+
+	"github.com/google/uuid"
+
+	"tanabata/backend/internal/domain"
+	"tanabata/backend/internal/port"
+)
+
+// Merge field source values.
+const (
+	mergeKeep    = "keep"
+	mergeDiscard = "discard"
+	mergeBoth    = "both"
+	mergeMerge   = "merge"
+)
+
+// MergeFields chooses, per field, which file supplies the survivor's value when
+// resolving a duplicate. Scalars accept "keep"/"discard"; metadata also accepts
+// "merge" (shallow object merge, survivor wins on key conflicts); relations
+// (tags, pools) accept "keep"/"both" (union) — there is deliberately no option
+// to drop the survivor's own tags/pools. An empty value defaults to "keep".
+type MergeFields struct {
+	OriginalName    string `json:"original_name"`
+	Notes           string `json:"notes"`
+	ContentDatetime string `json:"content_datetime"`
+	IsPublic        string `json:"is_public"`
+	Metadata        string `json:"metadata"`
+	Tags            string `json:"tags"`
+	Pools           string `json:"pools"`
+}
+
+// MergeSpec is the input to a duplicate resolution: keep one file, fold chosen
+// fields in from the other, and (usually) trash the other.
+type MergeSpec struct {
+	Keep            uuid.UUID
+	Discard         uuid.UUID
+	Fields          MergeFields
+	DeleteDiscarded bool
+}
+
+// normalize fills empty choices with "keep" and rejects unknown values.
+func (m *MergeSpec) normalize() error {
+	scalar := func(v *string) error {
+		if *v == "" {
+			*v = mergeKeep
+		}
+		if *v != mergeKeep && *v != mergeDiscard {
+			return domain.ErrValidation
+		}
+		return nil
+	}
+	relation := func(v *string) error {
+		if *v == "" {
+			*v = mergeKeep
+		}
+		if *v != mergeKeep && *v != mergeBoth {
+			return domain.ErrValidation
+		}
+		return nil
+	}
+	f := &m.Fields
+	if err := scalar(&f.OriginalName); err != nil {
+		return err
+	}
+	if err := scalar(&f.Notes); err != nil {
+		return err
+	}
+	if err := scalar(&f.ContentDatetime); err != nil {
+		return err
+	}
+	if err := scalar(&f.IsPublic); err != nil {
+		return err
+	}
+	if f.Metadata == "" {
+		f.Metadata = mergeKeep
+	}
+	if f.Metadata != mergeKeep && f.Metadata != mergeDiscard && f.Metadata != mergeMerge {
+		return domain.ErrValidation
+	}
+	if err := relation(&f.Tags); err != nil {
+		return err
+	}
+	if err := relation(&f.Pools); err != nil {
+		return err
+	}
+	return nil
+}
+
+// DuplicateService finds near-duplicate clusters and resolves them.
+type DuplicateService struct {
+	files      port.FileRepo
+	pairs      port.DuplicatePairRepo
+	dismissals port.DismissalRepo
+	acl        *ACLService
+	audit      *AuditService
+	tx         port.Transactor
+	threshold  int
+}
+
+// NewDuplicateService creates a DuplicateService. threshold is the maximum
+// Hamming distance for two files to be treated as duplicate candidates.
+func NewDuplicateService(
+	files port.FileRepo,
+	pairs port.DuplicatePairRepo,
+	dismissals port.DismissalRepo,
+	acl *ACLService,
+	audit *AuditService,
+	tx port.Transactor,
+	threshold int,
+) *DuplicateService {
+	return &DuplicateService{
+		files:      files,
+		pairs:      pairs,
+		dismissals: dismissals,
+		acl:        acl,
+		audit:      audit,
+		tx:         tx,
+		threshold:  threshold,
+	}
+}
+
+// Clusters returns a page of duplicate clusters visible to the caller. Pairs are
+// read from the precomputed table (no all-pairs scan here) and grouped into
+// connected components; pagination is over whole clusters.
+func (s *DuplicateService) Clusters(ctx context.Context, limit, offset int) (clusters [][]domain.File, total int, err error) {
+	userID, isAdmin, _ := domain.UserFromContext(ctx)
+
+	pairs, err := s.pairs.ListVisible(ctx, userID, isAdmin)
+	if err != nil {
+		return nil, 0, err
+	}
+	groups := clusterPairs(pairs)
+	total = len(groups)
+
+	if offset < 0 {
+		offset = 0
+	}
+	if offset >= len(groups) {
+		return [][]domain.File{}, total, nil
+	}
+	end := offset + limit
+	if end > len(groups) || limit <= 0 {
+		end = len(groups)
+	}
+
+	out := make([][]domain.File, 0, end-offset)
+	for _, ids := range groups[offset:end] {
+		files := make([]domain.File, 0, len(ids))
+		for _, id := range ids {
+			f, err := s.files.GetByID(ctx, id)
+			if err != nil {
+				// A file deleted between the pair read and now just drops out.
+				if errors.Is(err, domain.ErrNotFound) {
+					continue
+				}
+				return nil, 0, err
+			}
+			files = append(files, *f)
+		}
+		if len(files) >= 2 {
+			out = append(out, files)
+		}
+	}
+	return out, total, nil
+}
+
+// Rescan recomputes the entire duplicate_pairs table from the current set of
+// perceptual hashes. It is the only thing that populates the table, so the
+// duplicates view reflects state as of the last rescan. Called by the dedup CLI.
+func (s *DuplicateService) Rescan(ctx context.Context, onProgress func(done, total int)) error {
+	entries, err := s.files.ListAllPHashes(ctx)
+	if err != nil {
+		return err
+	}
+	pairs := buildPairs(entries, s.threshold, onProgress)
+	return s.pairs.ReplaceAll(ctx, pairs)
+}
+
+// Dismiss records two files as "not a duplicate" so the pair stops surfacing.
+// The caller must be able to view both files.
+func (s *DuplicateService) Dismiss(ctx context.Context, a, b uuid.UUID) error {
+	if a == b {
+		return domain.ErrValidation
+	}
+	userID, isAdmin, _ := domain.UserFromContext(ctx)
+	for _, id := range []uuid.UUID{a, b} {
+		f, err := s.files.GetByID(ctx, id)
+		if err != nil {
+			return err
+		}
+		ok, err := s.acl.CanView(ctx, userID, isAdmin, f.CreatorID, f.IsPublic, fileObjectTypeID, id)
+		if err != nil {
+			return err
+		}
+		if !ok {
+			return domain.ErrForbidden
+		}
+	}
+	if err := s.dismissals.Add(ctx, a, b, userID); err != nil {
+		return err
+	}
+	objType := fileObjectType
+	_ = s.audit.Log(ctx, "duplicate_dismiss", &objType, &a, map[string]any{"other": b.String()})
+	return nil
+}
+
+// Resolve merges a duplicate pair: the survivor (keep) takes the chosen fields
+// from the other (discard), and the other is trashed when DeleteDiscarded is set.
+// The caller must be able to edit both files. Returns the updated survivor.
+func (s *DuplicateService) Resolve(ctx context.Context, spec MergeSpec) (*domain.File, error) {
+	if spec.Keep == spec.Discard {
+		return nil, domain.ErrValidation
+	}
+	if err := spec.normalize(); err != nil {
+		return nil, err
+	}
+
+	keep, err := s.files.GetByID(ctx, spec.Keep)
+	if err != nil {
+		return nil, err
+	}
+	discard, err := s.files.GetByID(ctx, spec.Discard)
+	if err != nil {
+		return nil, err
+	}
+
+	userID, isAdmin, _ := domain.UserFromContext(ctx)
+	for _, f := range []*domain.File{keep, discard} {
+		ok, err := s.acl.CanEdit(ctx, userID, isAdmin, f.CreatorID, fileObjectTypeID, f.ID)
+		if err != nil {
+			return nil, err
+		}
+		if !ok {
+			return nil, domain.ErrForbidden
+		}
+	}
+
+	// FileRepo.Update rewrites all editable scalar columns, so build the complete
+	// resolved set (each field from keep or discard) rather than a sparse patch.
+	patch := &domain.File{
+		OriginalName:    pickPtr(spec.Fields.OriginalName, keep.OriginalName, discard.OriginalName),
+		Notes:           pickPtr(spec.Fields.Notes, keep.Notes, discard.Notes),
+		ContentDatetime: pickTime(spec.Fields.ContentDatetime, keep.ContentDatetime, discard.ContentDatetime),
+		IsPublic:        pickBool(spec.Fields.IsPublic, keep.IsPublic, discard.IsPublic),
+		Metadata:        pickMetadata(spec.Fields.Metadata, keep.Metadata, discard.Metadata),
+	}
+
+	var result *domain.File
+	txErr := s.tx.WithTx(ctx, func(ctx context.Context) error {
+		updated, err := s.files.Update(ctx, keep.ID, patch)
+		if err != nil {
+			return err
+		}
+
+		if spec.Fields.Tags == mergeBoth {
+			if err := s.files.SetTags(ctx, keep.ID, unionTagIDs(keep.Tags, discard.Tags)); err != nil {
+				return err
+			}
+			tags, err := s.files.ListTags(ctx, keep.ID)
+			if err != nil {
+				return err
+			}
+			updated.Tags = tags
+		}
+		if spec.Fields.Pools == mergeBoth {
+			if err := s.files.CopyPoolMemberships(ctx, keep.ID, discard.ID); err != nil {
+				return err
+			}
+		}
+		if spec.DeleteDiscarded {
+			if err := s.files.SoftDelete(ctx, discard.ID); err != nil {
+				return err
+			}
+		}
+		result = updated
+		return nil
+	})
+	if txErr != nil {
+		return nil, txErr
+	}
+
+	objType := fileObjectType
+	_ = s.audit.Log(ctx, "file_merge", &objType, &keep.ID, map[string]any{
+		"discard":           spec.Discard.String(),
+		"fields":            spec.Fields,
+		"deleted_discarded": spec.DeleteDiscarded,
+	})
+	return result, nil
+}
+
+// --- field pickers ---------------------------------------------------------
+
+func pickPtr(choice string, keep, discard *string) *string {
+	if choice == mergeDiscard {
+		return discard
+	}
+	return keep
+}
+
+func pickBool(choice string, keep, discard bool) bool {
+	if choice == mergeDiscard {
+		return discard
+	}
+	return keep
+}
+
+func pickTime(choice string, keep, discard time.Time) time.Time {
+	if choice == mergeDiscard {
+		return discard
+	}
+	return keep
+}
+
+func unionTagIDs(a, b []domain.Tag) []uuid.UUID {
+	seen := make(map[uuid.UUID]bool, len(a)+len(b))
+	ids := make([]uuid.UUID, 0, len(a)+len(b))
+	for _, t := range append(append([]domain.Tag{}, a...), b...) {
+		if !seen[t.ID] {
+			seen[t.ID] = true
+			ids = append(ids, t.ID)
+		}
+	}
+	return ids
+}
+
+// pickMetadata returns keep's metadata, discard's, or a shallow merge in which
+// the survivor's keys win on conflict.
+func pickMetadata(choice string, keep, discard json.RawMessage) json.RawMessage {
+	switch choice {
+	case mergeDiscard:
+		return discard
+	case mergeMerge:
+		km := map[string]json.RawMessage{}
+		dm := map[string]json.RawMessage{}
+		_ = json.Unmarshal(keep, &km)
+		_ = json.Unmarshal(discard, &dm)
+		out := make(map[string]json.RawMessage, len(km)+len(dm))
+		for k, v := range dm {
+			out[k] = v
+		}
+		for k, v := range km { // survivor wins
+			out[k] = v
+		}
+		if len(out) == 0 {
+			return keep
+		}
+		b, err := json.Marshal(out)
+		if err != nil {
+			return keep
+		}
+		return b
+	default:
+		return keep
+	}
+}
diff --git a/backend/internal/service/duplicate_test.go b/backend/internal/service/duplicate_test.go
new file mode 100644
index 0000000..af8b0e2
--- /dev/null
+++ b/backend/internal/service/duplicate_test.go
@@ -0,0 +1,152 @@
+package service
+
+import (
+	"bytes"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"sort"
+	"testing"
+
+	"github.com/google/uuid"
+
+	"tanabata/backend/internal/domain"
+)
+
+// id builds a deterministic UUID whose byte order matches n, so tests can reason
+// about the canonical (FileA < FileB) ordering buildPairs produces.
+func id(n int) uuid.UUID {
+	return uuid.MustParse(fmt.Sprintf("00000000-0000-0000-0000-%012d", n))
+}
+
+func entry(n int, hash uint64) domain.PHashEntry {
+	return domain.PHashEntry{ID: id(n), PHash: int64(hash)}
+}
+
+// pairKey canonicalises a pair for set comparison regardless of emission order.
+func pairKey(p domain.DuplicatePair) string {
+	a, b := p.FileA, p.FileB
+	if bytes.Compare(a[:], b[:]) > 0 {
+		a, b = b, a
+	}
+	return fmt.Sprintf("%s|%s|%d", a, b, p.Distance)
+}
+
+func TestBuildPairs_ThresholdAndCanonicalOrder(t *testing.T) {
+	entries := []domain.PHashEntry{
+		entry(1, 0x0000000000000000),
+		entry(2, 0x0000000000000001), // distance 1 from #1
+		entry(3, 0x00000000000000FF), // distance 8 from #1, 7 from #2
+		entry(4, 0xFFFFFFFFFFFFFFFF), // distance 64 from #1
+	}
+
+	// Tight threshold: only the distance-1 pair qualifies.
+	got := buildPairs(entries, 2, nil)
+	if len(got) != 1 {
+		t.Fatalf("threshold 2: got %d pairs, want 1: %+v", len(got), got)
+	}
+	if got[0].FileA != id(1) || got[0].FileB != id(2) || got[0].Distance != 1 {
+		t.Errorf("threshold 2: unexpected pair %+v", got[0])
+	}
+	// Canonical order always FileA < FileB.
+	if bytes.Compare(got[0].FileA[:], got[0].FileB[:]) >= 0 {
+		t.Error("pair not in canonical FileA < FileB order")
+	}
+
+	// Looser threshold pulls in #3's pairs but never #4.
+	got8 := buildPairs(entries, 8, nil)
+	want := map[string]bool{
+		pairKey(domain.DuplicatePair{FileA: id(1), FileB: id(2), Distance: 1}): true,
+		pairKey(domain.DuplicatePair{FileA: id(1), FileB: id(3), Distance: 8}): true,
+		pairKey(domain.DuplicatePair{FileA: id(2), FileB: id(3), Distance: 7}): true,
+	}
+	if len(got8) != len(want) {
+		t.Fatalf("threshold 8: got %d pairs, want %d: %+v", len(got8), len(want), got8)
+	}
+	for _, p := range got8 {
+		if !want[pairKey(p)] {
+			t.Errorf("threshold 8: unexpected pair %+v", p)
+		}
+	}
+}
+
+func TestBuildPairs_IdenticalHashesPairAtDistanceZero(t *testing.T) {
+	entries := []domain.PHashEntry{
+		entry(1, 0xABCDABCDABCDABCD),
+		entry(2, 0xABCDABCDABCDABCD),
+	}
+	got := buildPairs(entries, 0, nil)
+	if len(got) != 1 || got[0].Distance != 0 || got[0].FileA != id(1) || got[0].FileB != id(2) {
+		t.Fatalf("identical hashes: got %+v, want one distance-0 pair (1,2)", got)
+	}
+}
+
+func TestClusterPairs_ConnectedComponents(t *testing.T) {
+	pairs := []domain.DuplicatePair{
+		{FileA: id(1), FileB: id(2)},
+		{FileA: id(2), FileB: id(3)}, // transitively joins 1-2-3
+		{FileA: id(5), FileB: id(6)},
+	}
+	clusters := clusterPairs(pairs)
+	if len(clusters) != 2 {
+		t.Fatalf("got %d clusters, want 2: %+v", len(clusters), clusters)
+	}
+	// Sorted by smallest id: {1,2,3} then {5,6}.
+	if len(clusters[0]) != 3 || clusters[0][0] != id(1) || clusters[0][2] != id(3) {
+		t.Errorf("cluster 0 = %v, want [1 2 3]", clusters[0])
+	}
+	if len(clusters[1]) != 2 || clusters[1][0] != id(5) {
+		t.Errorf("cluster 1 = %v, want [5 6]", clusters[1])
+	}
+	// Each cluster's ids are sorted.
+	for _, c := range clusters {
+		if !sort.SliceIsSorted(c, func(i, j int) bool { return bytes.Compare(c[i][:], c[j][:]) < 0 }) {
+			t.Errorf("cluster not sorted: %v", c)
+		}
+	}
+}
+
+func TestPickMetadata_Merge(t *testing.T) {
+	keep := json.RawMessage(`{"a":1,"b":2}`)
+	discard := json.RawMessage(`{"b":9,"c":3}`)
+
+	out := pickMetadata(mergeMerge, keep, discard)
+	var m map[string]int
+	if err := json.Unmarshal(out, &m); err != nil {
+		t.Fatalf("merge result not valid JSON: %v (%s)", err, out)
+	}
+	want := map[string]int{"a": 1, "b": 2, "c": 3} // survivor wins on "b"
+	if fmt.Sprint(m) != fmt.Sprint(want) {
+		t.Errorf("merge = %v, want %v", m, want)
+	}
+
+	if string(pickMetadata(mergeKeep, keep, discard)) != string(keep) {
+		t.Error("keep choice should return survivor metadata unchanged")
+	}
+	if string(pickMetadata(mergeDiscard, keep, discard)) != string(discard) {
+		t.Error("discard choice should return the other file's metadata")
+	}
+}
+
+func TestMergeSpec_Normalize(t *testing.T) {
+	// Empty fields default to "keep".
+	spec := MergeSpec{Keep: id(1), Discard: id(2)}
+	if err := spec.normalize(); err != nil {
+		t.Fatalf("normalize empty: %v", err)
+	}
+	if spec.Fields.OriginalName != mergeKeep || spec.Fields.Tags != mergeKeep || spec.Fields.Metadata != mergeKeep {
+		t.Errorf("empty fields not defaulted to keep: %+v", spec.Fields)
+	}
+
+	// "both" is invalid for a scalar field.
+	bad := MergeSpec{Keep: id(1), Discard: id(2), Fields: MergeFields{Notes: mergeBoth}}
+	if err := bad.normalize(); !errors.Is(err, domain.ErrValidation) {
+		t.Errorf("scalar=both: got %v, want ErrValidation", err)
+	}
+
+	// "discard" is invalid for a relation field.
+	badRel := MergeSpec{Keep: id(1), Discard: id(2), Fields: MergeFields{Tags: mergeDiscard}}
+	if err := badRel.normalize(); !errors.Is(err, domain.ErrValidation) {
+		t.Errorf("relation=discard: got %v, want ErrValidation", err)
+	}
+}
diff --git a/backend/migrations/003_data_tables.sql b/backend/migrations/003_data_tables.sql
index 0fc9f1b..288d509 100644
--- a/backend/migrations/003_data_tables.sql
+++ b/backend/migrations/003_data_tables.sql
@@ -92,6 +92,31 @@ CREATE TABLE data.file_pool (
     PRIMARY KEY (file_id, pool_id)
 );
 
+-- Precomputed near-duplicate candidates (phash Hamming distance <= threshold),
+-- (re)built in full by the dedup rescan. Stored once per unordered pair with a
+-- canonical file_a < file_b ordering so a pair is never duplicated as (a,b)/(b,a).
+CREATE TABLE data.duplicate_pairs (
+    file_a   uuid     NOT NULL REFERENCES data.files(id) ON UPDATE CASCADE ON DELETE CASCADE,
+    file_b   uuid     NOT NULL REFERENCES data.files(id) ON UPDATE CASCADE ON DELETE CASCADE,
+    distance smallint NOT NULL,
+
+    CONSTRAINT chk__duplicate_pairs__order CHECK (file_a < file_b),
+    PRIMARY KEY (file_a, file_b)
+);
+
+-- "Not a duplicate" decisions: a global overlay that hides a candidate pair from
+-- the duplicates view. Survives rescans (the pair may be re-found but stays
+-- hidden). Same canonical file_a < file_b ordering as data.duplicate_pairs.
+CREATE TABLE data.duplicate_dismissals (
+    file_a       uuid        NOT NULL REFERENCES data.files(id) ON UPDATE CASCADE ON DELETE CASCADE,
+    file_b       uuid        NOT NULL REFERENCES data.files(id) ON UPDATE CASCADE ON DELETE CASCADE,
+    dismissed_by smallint    NOT NULL REFERENCES core.users(id) ON UPDATE CASCADE ON DELETE RESTRICT,
+    dismissed_at timestamptz NOT NULL DEFAULT clock_timestamp(),
+
+    CONSTRAINT chk__duplicate_dismissals__order CHECK (file_a < file_b),
+    PRIMARY KEY (file_a, file_b)
+);
+
 COMMENT ON TABLE  data.categories IS 'Logical grouping of tags';
 COMMENT ON TABLE  data.tags       IS 'File labels/tags';
 COMMENT ON TABLE  data.tag_rules  IS 'Auto-tagging rules: when when_tag is assigned, then_tag follows';
@@ -99,6 +124,8 @@ COMMENT ON TABLE  data.files      IS 'Managed files; actual content stored on di
 COMMENT ON TABLE  data.file_tag   IS 'Many-to-many: files <-> tags';
 COMMENT ON TABLE  data.pools      IS 'Ordered collections of files';
 COMMENT ON TABLE  data.file_pool  IS 'Many-to-many: files <-> pools, with ordering';
+COMMENT ON TABLE  data.duplicate_pairs       IS 'Precomputed near-duplicate candidate pairs (perceptual-hash distance)';
+COMMENT ON TABLE  data.duplicate_dismissals  IS 'Pairs marked "not a duplicate"; hidden from the duplicates view';
 
 COMMENT ON COLUMN data.files.original_name    IS 'Original filename at upload time';
 COMMENT ON COLUMN data.files.content_datetime IS 'Content datetime (e.g. when photo was taken); falls back to EXIF DateTimeOriginal';
@@ -110,6 +137,8 @@ COMMENT ON COLUMN data.file_pool.position     IS 'Manual ordering within pool; u
 
 -- +goose Down
 
+DROP TABLE IF EXISTS data.duplicate_dismissals;
+DROP TABLE IF EXISTS data.duplicate_pairs;
 DROP TABLE IF EXISTS data.file_pool;
 DROP TABLE IF EXISTS data.pools;
 DROP TABLE IF EXISTS data.file_tag;
diff --git a/backend/migrations/006_indexes.sql b/backend/migrations/006_indexes.sql
index 8f09758..638e98c 100644
--- a/backend/migrations/006_indexes.sql
+++ b/backend/migrations/006_indexes.sql
@@ -26,6 +26,12 @@ CREATE INDEX idx__files__needs_review      ON data.files USING btree (id) WHERE
 CREATE INDEX idx__file_tag__tag_id  ON data.file_tag USING hash (tag_id);
 CREATE INDEX idx__file_tag__file_id ON data.file_tag USING hash (file_id);
 
+-- data.duplicate_pairs / data.duplicate_dismissals
+-- The composite primary keys cover lookups on file_a; these add the file_b side
+-- (used by the ON DELETE CASCADE and by the visibility join on the second file).
+CREATE INDEX idx__duplicate_pairs__file_b      ON data.duplicate_pairs      USING hash (file_b);
+CREATE INDEX idx__duplicate_dismissals__file_b ON data.duplicate_dismissals USING hash (file_b);
+
 -- data.pools
 CREATE INDEX idx__pools__creator_id ON data.pools USING hash (creator_id);
 
@@ -70,6 +76,8 @@ DROP INDEX IF EXISTS activity.idx__sessions__token_hash;
 DROP INDEX IF EXISTS activity.idx__sessions__user_id;
 DROP INDEX IF EXISTS acl.idx__acl__user;
 DROP INDEX IF EXISTS acl.idx__acl__object;
+DROP INDEX IF EXISTS data.idx__duplicate_dismissals__file_b;
+DROP INDEX IF EXISTS data.idx__duplicate_pairs__file_b;
 DROP INDEX IF EXISTS data.idx__file_pool__file_id;
 DROP INDEX IF EXISTS data.idx__file_pool__pool_id;
 DROP INDEX IF EXISTS data.idx__pools__creator_id;
diff --git a/backend/migrations/007_seed_data.sql b/backend/migrations/007_seed_data.sql
index 2de972d..2486029 100644
--- a/backend/migrations/007_seed_data.sql
+++ b/backend/migrations/007_seed_data.sql
@@ -21,6 +21,7 @@ INSERT INTO activity.action_types (name) VALUES
     -- Files
     ('file_create'), ('file_edit'), ('file_delete'), ('file_restore'),
     ('file_permanent_delete'), ('file_replace'), ('file_review'),
+    ('file_merge'), ('duplicate_dismiss'),
     -- Tags
     ('tag_create'), ('tag_edit'), ('tag_delete'),
     -- Categories