feat(backend): perceptual hashing for images and video
Adds a 64-bit dHash perceptual hash (internal/imagehash, built on the existing disintegration/imaging — no new dependency) and starts populating the long-unused data.files.phash column: - Upload sets phash inline for images (cheap, from the in-memory bytes). - Replace recomputes it from new content for images and clears it for anything else, so a stale hash never survives a content swap. - FileRepo.SetPHash sets/clears the hash (used by Replace and, later, the dedup backfill). - DiskStorage.VideoFrameMiddle extracts a frame from the middle of a clip (ffprobe duration -> ffmpeg -ss duration/2), avoiding the shared-intro collision a fixed early offset causes. It is a concrete method, not part of the storage port: only the dedup CLI needs it, keeping ffmpeg off the upload path. Video phashes are therefore computed by that CLI, not at upload time. - DUPLICATE_HASH_THRESHOLD config (default 10/64) for the later pair rescan. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
// Package imagehash computes a 64-bit perceptual hash (dHash) of an image and
|
||||
// compares two hashes by Hamming distance. It is used for near-duplicate
|
||||
// detection: visually similar images (re-encoded, resized, recompressed) produce
|
||||
// hashes a small distance apart, while unrelated images are far apart.
|
||||
//
|
||||
// dHash is chosen for its robustness and simplicity: the image is reduced to a
|
||||
// 9×8 grayscale and each pixel is compared to its right-hand neighbour, yielding
|
||||
// 64 gradient-direction bits. It tolerates scaling and brightness/contrast
|
||||
// changes well, which is exactly what re-encoded duplicates exhibit.
|
||||
package imagehash
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"image"
|
||||
_ "image/gif" // register GIF decoder
|
||||
_ "image/jpeg" // register JPEG decoder
|
||||
_ "image/png" // register PNG decoder
|
||||
"math/bits"
|
||||
|
||||
"github.com/disintegration/imaging"
|
||||
_ "golang.org/x/image/webp" // register WebP decoder
|
||||
)
|
||||
|
||||
// hashWidth/hashHeight define the reduced grayscale used for dHash. The extra
|
||||
// column (width = height+1) provides the right-hand neighbour for the 64
|
||||
// horizontal comparisons that make up the hash.
|
||||
const (
|
||||
hashHeight = 8
|
||||
hashWidth = hashHeight + 1
|
||||
)
|
||||
|
||||
// FromImage reduces img to a 9×8 grayscale and returns its 64-bit dHash. The
|
||||
// uint64 of gradient bits is returned as int64 (a plain bit reinterpretation) so
|
||||
// it fits PostgreSQL's bigint; equality and Distance are bitwise, so the signed
|
||||
// interpretation never matters.
|
||||
func FromImage(img image.Image) int64 {
|
||||
small := imaging.Grayscale(imaging.Resize(img, hashWidth, hashHeight, imaging.Lanczos))
|
||||
|
||||
var hash uint64
|
||||
bit := 0
|
||||
for y := 0; y < hashHeight; y++ {
|
||||
for x := 0; x < hashHeight; x++ {
|
||||
// After Grayscale, R == G == B, so the red channel is the luminance.
|
||||
left := small.Pix[small.PixOffset(x, y)]
|
||||
right := small.Pix[small.PixOffset(x+1, y)]
|
||||
if left < right {
|
||||
hash |= 1 << uint(63-bit)
|
||||
}
|
||||
bit++
|
||||
}
|
||||
}
|
||||
return int64(hash)
|
||||
}
|
||||
|
||||
// FromBytes decodes data (JPEG/PNG/GIF/WebP) and returns its dHash. ok is false
|
||||
// when the bytes are not a decodable image, so callers can simply skip hashing
|
||||
// (e.g. leave phash NULL) rather than fail.
|
||||
func FromBytes(data []byte) (hash int64, ok bool) {
|
||||
img, _, err := image.Decode(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return FromImage(img), true
|
||||
}
|
||||
|
||||
// Distance returns the Hamming distance (0–64) between two hashes: the number of
|
||||
// differing bits. 0 means identical; small values mean near-duplicate.
|
||||
func Distance(a, b int64) int {
|
||||
return bits.OnesCount64(uint64(a) ^ uint64(b))
|
||||
}
|
||||
Reference in New Issue
Block a user