diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index 5aea09f..eaf94a7 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -63,6 +63,12 @@ type Config struct { // Import ImportPath string + // DuplicateHashThreshold is the maximum Hamming distance (out of 64) between + // two perceptual hashes for the files to be treated as duplicate candidates. + // Lower = stricter (fewer, more confident matches); higher = looser. Used only + // by the dedup rescan that (re)builds data.duplicate_pairs. + DuplicateHashThreshold int + // Static SPA. When set, the server serves the built frontend (and falls // back to index.html for client routes) on the same port as the API. Empty // in local development, where the Vite dev server serves the UI separately. @@ -176,6 +182,8 @@ func Load() (*Config, error) { ImportPath: requireStr("IMPORT_PATH"), + DuplicateHashThreshold: parseInt("DUPLICATE_HASH_THRESHOLD", 10), + StaticDir: defaultStr("STATIC_DIR", ""), } diff --git a/backend/internal/db/postgres/file_repo.go b/backend/internal/db/postgres/file_repo.go index 2c7ec63..22d9990 100644 --- a/backend/internal/db/postgres/file_repo.go +++ b/backend/internal/db/postgres/file_repo.go @@ -434,6 +434,18 @@ func (r *FileRepo) SetNeedsReview(ctx context.Context, ids []uuid.UUID, value bo return nil } +// SetPHash sets (or clears, when phash is nil) the perceptual hash of a file. +// Used by the dedup backfill and on content replacement; phash is non-critical, +// recomputable metadata, so callers may treat failures as best-effort. +func (r *FileRepo) SetPHash(ctx context.Context, id uuid.UUID, phash *int64) error { + const sqlStr = `UPDATE data.files SET phash = $2 WHERE id = $1` + q := connOrTx(ctx, r.pool) + if _, err := q.Exec(ctx, sqlStr, id, phash); err != nil { + return fmt.Errorf("FileRepo.SetPHash: %w", err) + } + return nil +} + // --------------------------------------------------------------------------- // SoftDelete / Restore / DeletePermanent // --------------------------------------------------------------------------- diff --git a/backend/internal/imagehash/imagehash.go b/backend/internal/imagehash/imagehash.go new file mode 100644 index 0000000..20e07eb --- /dev/null +++ b/backend/internal/imagehash/imagehash.go @@ -0,0 +1,70 @@ +// Package imagehash computes a 64-bit perceptual hash (dHash) of an image and +// compares two hashes by Hamming distance. It is used for near-duplicate +// detection: visually similar images (re-encoded, resized, recompressed) produce +// hashes a small distance apart, while unrelated images are far apart. +// +// dHash is chosen for its robustness and simplicity: the image is reduced to a +// 9×8 grayscale and each pixel is compared to its right-hand neighbour, yielding +// 64 gradient-direction bits. It tolerates scaling and brightness/contrast +// changes well, which is exactly what re-encoded duplicates exhibit. +package imagehash + +import ( + "bytes" + "image" + _ "image/gif" // register GIF decoder + _ "image/jpeg" // register JPEG decoder + _ "image/png" // register PNG decoder + "math/bits" + + "github.com/disintegration/imaging" + _ "golang.org/x/image/webp" // register WebP decoder +) + +// hashWidth/hashHeight define the reduced grayscale used for dHash. The extra +// column (width = height+1) provides the right-hand neighbour for the 64 +// horizontal comparisons that make up the hash. +const ( + hashHeight = 8 + hashWidth = hashHeight + 1 +) + +// FromImage reduces img to a 9×8 grayscale and returns its 64-bit dHash. The +// uint64 of gradient bits is returned as int64 (a plain bit reinterpretation) so +// it fits PostgreSQL's bigint; equality and Distance are bitwise, so the signed +// interpretation never matters. +func FromImage(img image.Image) int64 { + small := imaging.Grayscale(imaging.Resize(img, hashWidth, hashHeight, imaging.Lanczos)) + + var hash uint64 + bit := 0 + for y := 0; y < hashHeight; y++ { + for x := 0; x < hashHeight; x++ { + // After Grayscale, R == G == B, so the red channel is the luminance. + left := small.Pix[small.PixOffset(x, y)] + right := small.Pix[small.PixOffset(x+1, y)] + if left < right { + hash |= 1 << uint(63-bit) + } + bit++ + } + } + return int64(hash) +} + +// FromBytes decodes data (JPEG/PNG/GIF/WebP) and returns its dHash. ok is false +// when the bytes are not a decodable image, so callers can simply skip hashing +// (e.g. leave phash NULL) rather than fail. +func FromBytes(data []byte) (hash int64, ok bool) { + img, _, err := image.Decode(bytes.NewReader(data)) + if err != nil { + return 0, false + } + return FromImage(img), true +} + +// Distance returns the Hamming distance (0–64) between two hashes: the number of +// differing bits. 0 means identical; small values mean near-duplicate. +func Distance(a, b int64) int { + return bits.OnesCount64(uint64(a) ^ uint64(b)) +} diff --git a/backend/internal/imagehash/imagehash_test.go b/backend/internal/imagehash/imagehash_test.go new file mode 100644 index 0000000..6e47a3a --- /dev/null +++ b/backend/internal/imagehash/imagehash_test.go @@ -0,0 +1,99 @@ +package imagehash + +import ( + "bytes" + "image" + "image/color" + "image/jpeg" + "image/png" + "math" + "testing" +) + +// radial renders a smooth grayscale image whose brightness falls off with +// distance from (cx, cy). Smooth gradients are the realistic case for perceptual +// hashing and survive JPEG re-encoding well, so they make stable test fixtures. +func radial(w, h int, cx, cy float64) image.Image { + img := image.NewRGBA(image.Rect(0, 0, w, h)) + maxD := math.Hypot(float64(w), float64(h)) + for y := 0; y < h; y++ { + for x := 0; x < w; x++ { + d := math.Hypot(float64(x)-cx, float64(y)-cy) + v := uint8(255 * (1 - d/maxD)) + img.Set(x, y, color.RGBA{v, v, v, 255}) + } + } + return img +} + +func encodePNG(t *testing.T, img image.Image) []byte { + t.Helper() + var buf bytes.Buffer + if err := png.Encode(&buf, img); err != nil { + t.Fatalf("png encode: %v", err) + } + return buf.Bytes() +} + +func encodeJPEG(t *testing.T, img image.Image, quality int) []byte { + t.Helper() + var buf bytes.Buffer + if err := jpeg.Encode(&buf, img, &jpeg.Options{Quality: quality}); err != nil { + t.Fatalf("jpeg encode: %v", err) + } + return buf.Bytes() +} + +// The same image re-encoded as PNG (lossless) and JPEG (lossy) must hash to a +// small Hamming distance — that is the whole point of a perceptual hash. +func TestFromBytes_SameImageAcrossEncodings(t *testing.T) { + img := radial(64, 64, 32, 32) + + pngHash, ok := FromBytes(encodePNG(t, img)) + if !ok { + t.Fatal("FromBytes(PNG): ok=false") + } + jpgHash, ok := FromBytes(encodeJPEG(t, img, 90)) + if !ok { + t.Fatal("FromBytes(JPEG): ok=false") + } + + if d := Distance(pngHash, jpgHash); d > 8 { + t.Errorf("same image, different encodings: distance = %d, want <= 8", d) + } +} + +// Visually different images must be far apart, and clearly farther than the same +// image across encodings. +func TestDistance_DifferentImagesAreFarApart(t *testing.T) { + a := FromImage(radial(64, 64, 32, 32)) // centred + b := FromImage(radial(64, 64, 0, 0)) // corner + + same, _ := FromBytes(encodeJPEG(t, radial(64, 64, 32, 32), 90)) + + d := Distance(a, b) + if d < 12 { + t.Errorf("different images: distance = %d, want >= 12", d) + } + if d <= Distance(a, same) { + t.Errorf("different images (%d) not farther than re-encoded same image (%d)", d, Distance(a, same)) + } +} + +func TestDistance_SymmetricAndZeroForEqual(t *testing.T) { + a := FromImage(radial(64, 64, 20, 40)) + b := FromImage(radial(64, 64, 40, 20)) + + if Distance(a, a) != 0 { + t.Errorf("Distance(a, a) = %d, want 0", Distance(a, a)) + } + if Distance(a, b) != Distance(b, a) { + t.Errorf("Distance not symmetric: %d vs %d", Distance(a, b), Distance(b, a)) + } +} + +func TestFromBytes_RejectsNonImage(t *testing.T) { + if _, ok := FromBytes([]byte("definitely not an image")); ok { + t.Error("FromBytes on garbage: ok=true, want false") + } +} diff --git a/backend/internal/port/repository.go b/backend/internal/port/repository.go index da60e57..48d7cc9 100644 --- a/backend/internal/port/repository.go +++ b/backend/internal/port/repository.go @@ -50,6 +50,8 @@ type FileRepo interface { Update(ctx context.Context, id uuid.UUID, f *domain.File) (*domain.File, error) // SetNeedsReview sets the review status on the given (non-trashed) files. SetNeedsReview(ctx context.Context, ids []uuid.UUID, value bool) error + // SetPHash sets (or clears, when nil) the perceptual hash of a file. + SetPHash(ctx context.Context, id uuid.UUID, phash *int64) error // SoftDelete moves a file to trash (sets is_deleted = true). SoftDelete(ctx context.Context, id uuid.UUID) error // Restore moves a file out of trash (sets is_deleted = false). diff --git a/backend/internal/service/file_service.go b/backend/internal/service/file_service.go index 59de2bc..55baf33 100644 --- a/backend/internal/service/file_service.go +++ b/backend/internal/service/file_service.go @@ -16,6 +16,7 @@ import ( "github.com/google/uuid" "tanabata/backend/internal/domain" + "tanabata/backend/internal/imagehash" "tanabata/backend/internal/port" ) @@ -154,6 +155,17 @@ func (s *FileService) Upload(ctx context.Context, p UploadParams) (*domain.File, } exifData, exifDatetime := extractMetadata(data, origName, p.ContentDatetimeFallback) + // Compute a perceptual hash for images so duplicate detection can later match + // near-identical files. Best-effort: a decode failure just leaves phash unset + // (the dedup CLI backfills it). Video is hashed by that CLI, not inline, to keep + // ffmpeg off the upload path. + var phash *int64 + if strings.HasPrefix(mime.Name, "image/") { + if h, ok := imagehash.FromBytes(data); ok { + phash = &h + } + } + // Resolve content datetime: explicit > metadata date > fallback (e.g. import mtime) > zero. var contentDatetime time.Time if p.ContentDatetime != nil { @@ -187,6 +199,7 @@ func (s *FileService) Upload(ctx context.Context, p UploadParams) (*domain.File, Notes: p.Notes, Metadata: p.Metadata, EXIF: exifData, + PHash: phash, CreatorID: userID, IsPublic: p.IsPublic, } @@ -453,6 +466,18 @@ func (s *FileService) Replace(ctx context.Context, id uuid.UUID, p UploadParams) return nil, err } + // Recompute the perceptual hash from the new content: images inline, anything + // else cleared to NULL so the old content's hash never lingers (the dedup CLI + // recomputes video). Best-effort, like on upload — phash is recomputable. + var phash *int64 + if strings.HasPrefix(mime.Name, "image/") { + if h, ok := imagehash.FromBytes(data); ok { + phash = &h + } + } + _ = s.files.SetPHash(ctx, id, phash) + updated.PHash = phash + objType := fileObjectType _ = s.audit.Log(ctx, "file_replace", &objType, &id, nil) return updated, nil diff --git a/backend/internal/storage/disk.go b/backend/internal/storage/disk.go index 365e3fb..e389f40 100644 --- a/backend/internal/storage/disk.go +++ b/backend/internal/storage/disk.go @@ -16,6 +16,8 @@ import ( "os/exec" "path/filepath" "runtime" + "strconv" + "strings" "time" "github.com/disintegration/imaging" @@ -155,6 +157,30 @@ func (s *DiskStorage) Preview(ctx context.Context, id uuid.UUID) (io.ReadCloser, return s.serveGenerated(ctx, id, s.previewCachePath(id), s.previewWidth, s.previewHeight) } +// VideoFrameMiddle decodes a representative frame from the middle of a video +// (duration/2). The midpoint avoids the shared intros, title cards and black +// lead-in frames that make a fixed early offset collide across unrelated clips, +// so it is the right source for the video's perceptual (duplicate-detection) +// hash. The file must already exist in storage; ffmpeg/ffprobe must be installed. +// This is not part of port.FileStorage — only the dedup CLI needs it, with a +// concrete *DiskStorage — so the interface stays lean and ffmpeg stays out of the +// upload path. +func (s *DiskStorage) VideoFrameMiddle(ctx context.Context, id uuid.UUID) (image.Image, error) { + srcPath := s.originalPath(id) + if _, err := os.Stat(srcPath); err != nil { + if os.IsNotExist(err) { + return nil, domain.ErrNotFound + } + return nil, fmt.Errorf("storage: stat %q: %w", srcPath, err) + } + // Fall back to a 1s offset if duration can't be probed — better a frame than none. + at := 1.0 + if d, err := videoDurationSeconds(ctx, srcPath); err == nil && d > 0 { + at = d / 2 + } + return extractVideoFrameAt(ctx, srcPath, at) +} + // --------------------------------------------------------------------------- // Internal helpers // --------------------------------------------------------------------------- @@ -342,19 +368,25 @@ func (s *DiskStorage) vipsThumbnail(ctx context.Context, srcPath, cachePath stri return f, nil } -// extractVideoFrame uses ffmpeg to extract a single frame from a video file. -// It seeks 1 second in (keyframe-accurate fast seek) and pipes the frame out -// as PNG. If the video is shorter than 1 s the seek is silently ignored by -// ffmpeg and the first available frame is returned instead. -// Returns an error if ffmpeg is not installed or produces no output. The run is -// bounded by a timeout so a malformed file cannot hang the request indefinitely. +// extractVideoFrame extracts a single frame ~1 second into the video — a safe +// default for thumbnails. See extractVideoFrameAt for the mechanics. func extractVideoFrame(ctx context.Context, srcPath string) (image.Image, error) { + return extractVideoFrameAt(ctx, srcPath, 1) +} + +// extractVideoFrameAt uses ffmpeg to extract a single frame at atSec seconds into +// the video, piped out as PNG. The fast input seek (-ss before -i) is keyframe- +// accurate and cheap; if atSec is past the end the seek is silently ignored and +// the first available frame is returned instead. Returns an error if ffmpeg is +// not installed or produces no output. The run is bounded by a timeout so a +// malformed file cannot hang the caller indefinitely. +func extractVideoFrameAt(ctx context.Context, srcPath string, atSec float64) (image.Image, error) { ctx, cancel := context.WithTimeout(ctx, 30*time.Second) defer cancel() var out bytes.Buffer cmd := exec.CommandContext(ctx, "ffmpeg", - "-ss", "1", // fast input seek; ignored gracefully on short files + "-ss", strconv.FormatFloat(atSec, 'f', 3, 64), // fast input seek; ignored gracefully past end "-i", srcPath, "-vframes", "1", "-f", "image2", @@ -370,6 +402,29 @@ func extractVideoFrame(ctx context.Context, srcPath string) (image.Image, error) return imaging.Decode(&out) } +// videoDurationSeconds returns the container duration in seconds via ffprobe. +// Used to seek to the middle of a clip for perceptual hashing. +func videoDurationSeconds(ctx context.Context, srcPath string) (float64, error) { + ctx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + cmd := exec.CommandContext(ctx, "ffprobe", + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + srcPath, + ) + out, err := cmd.Output() + if err != nil { + return 0, fmt.Errorf("ffprobe duration: %w", err) + } + d, err := strconv.ParseFloat(strings.TrimSpace(string(out)), 64) + if err != nil { + return 0, fmt.Errorf("ffprobe duration parse %q: %w", out, err) + } + return d, nil +} + // --------------------------------------------------------------------------- // Path helpers // ---------------------------------------------------------------------------