feat(backend): perceptual hashing for images and video
Adds a 64-bit dHash perceptual hash (internal/imagehash, built on the existing disintegration/imaging — no new dependency) and starts populating the long-unused data.files.phash column: - Upload sets phash inline for images (cheap, from the in-memory bytes). - Replace recomputes it from new content for images and clears it for anything else, so a stale hash never survives a content swap. - FileRepo.SetPHash sets/clears the hash (used by Replace and, later, the dedup backfill). - DiskStorage.VideoFrameMiddle extracts a frame from the middle of a clip (ffprobe duration -> ffmpeg -ss duration/2), avoiding the shared-intro collision a fixed early offset causes. It is a concrete method, not part of the storage port: only the dedup CLI needs it, keeping ffmpeg off the upload path. Video phashes are therefore computed by that CLI, not at upload time. - DUPLICATE_HASH_THRESHOLD config (default 10/64) for the later pair rescan. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -434,6 +434,18 @@ func (r *FileRepo) SetNeedsReview(ctx context.Context, ids []uuid.UUID, value bo
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetPHash sets (or clears, when phash is nil) the perceptual hash of a file.
|
||||
// Used by the dedup backfill and on content replacement; phash is non-critical,
|
||||
// recomputable metadata, so callers may treat failures as best-effort.
|
||||
func (r *FileRepo) SetPHash(ctx context.Context, id uuid.UUID, phash *int64) error {
|
||||
const sqlStr = `UPDATE data.files SET phash = $2 WHERE id = $1`
|
||||
q := connOrTx(ctx, r.pool)
|
||||
if _, err := q.Exec(ctx, sqlStr, id, phash); err != nil {
|
||||
return fmt.Errorf("FileRepo.SetPHash: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// SoftDelete / Restore / DeletePermanent
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user