fix(backend): bound thumbnail generation and decode larger images

Thumbnails/previews are generated lazily per request with no concurrency
limit, and the imaging resize already fans out across every core — so
scrolling to a handful of large images spawned that many all-core,
hundreds-of-MB decodes at once and pegged the server. Add a generation
semaphore (THUMB_CONCURRENCY, default = half the CPUs) so only a bounded
number run at a time; queued requests wait and re-check the cache.

Also raise the decode cap from 64 Mpx to a configurable ~300 Mpx default
(THUMB_MAX_PIXELS) so genuinely large photos (e.g. 13000×17000 ≈ 221
Mpx) get a real thumbnail instead of falling back to a placeholder.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-12 01:07:30 +03:00
parent a371045b41
commit 2d2a42d523
5 changed files with 157 additions and 13 deletions
+1
View File
@@ -52,6 +52,7 @@ func main() {
cfg.ThumbsCachePath,
cfg.ThumbWidth, cfg.ThumbHeight,
cfg.PreviewWidth, cfg.PreviewHeight,
cfg.ThumbMaxPixels, cfg.ThumbConcurrency,
)
if err != nil {
slog.Error("failed to initialise storage", "err", err)
+10
View File
@@ -35,6 +35,14 @@ type Config struct {
ThumbHeight int
PreviewWidth int
PreviewHeight int
// ThumbMaxPixels caps the pixel count of a source image we will decode into
// memory to generate a thumbnail/preview (a decode bombs guard, and a memory
// bound). Larger images fall back to a placeholder.
ThumbMaxPixels int
// ThumbConcurrency bounds how many thumbnails/previews are generated at once,
// so a burst of large images can't saturate every core or exhaust RAM. 0 =
// auto (half the available CPUs).
ThumbConcurrency int
// Import
ImportPath string
@@ -123,6 +131,8 @@ func Load() (*Config, error) {
ThumbHeight: parseInt("THUMB_HEIGHT", 160),
PreviewWidth: parseInt("PREVIEW_WIDTH", 1920),
PreviewHeight: parseInt("PREVIEW_HEIGHT", 1080),
ThumbMaxPixels: parseInt("THUMB_MAX_PIXELS", 300_000_000), // ~300 Mpx (e.g. 13000×17000)
ThumbConcurrency: parseInt("THUMB_CONCURRENCY", 0), // 0 = auto
ImportPath: requireStr("IMPORT_PATH"),
+1 -1
View File
@@ -110,7 +110,7 @@ func setupSuite(t *testing.T) *harness {
thumbsDir := t.TempDir()
importDir := t.TempDir()
diskStorage, err := storage.NewDiskStorage(filesDir, thumbsDir, 160, 160, 1920, 1080)
diskStorage, err := storage.NewDiskStorage(filesDir, thumbsDir, 160, 160, 1920, 1080, 0, 0)
require.NoError(t, err)
// --- Repositories --------------------------------------------------------
+42 -8
View File
@@ -15,6 +15,7 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"time"
"github.com/disintegration/imaging"
@@ -38,20 +39,36 @@ type DiskStorage struct {
thumbHeight int
previewWidth int
previewHeight int
maxPixels int
// genSem bounds concurrent thumbnail/preview generation. Each resize already
// fans out across every core (imaging uses GOMAXPROCS), and large sources cost
// hundreds of MB to decode, so unbounded parallelism on a burst of big images
// pegs the CPU and can exhaust RAM. A buffered channel caps how many run at once.
genSem chan struct{}
}
var _ port.FileStorage = (*DiskStorage)(nil)
// NewDiskStorage creates a DiskStorage and ensures both directories exist.
//
// maxPixels caps the source pixel count we will decode in-process (0 → a sane
// default). concurrency bounds simultaneous generation (≤0 → half the CPUs).
func NewDiskStorage(
filesPath, thumbsPath string,
thumbW, thumbH, prevW, prevH int,
maxPixels, concurrency int,
) (*DiskStorage, error) {
for _, p := range []string{filesPath, thumbsPath} {
if err := os.MkdirAll(p, 0o755); err != nil {
return nil, fmt.Errorf("storage: create directory %q: %w", p, err)
}
}
if maxPixels <= 0 {
maxPixels = defaultMaxDecodePixels
}
if concurrency <= 0 {
concurrency = max(1, runtime.GOMAXPROCS(0)/2)
}
return &DiskStorage{
filesPath: filesPath,
thumbsPath: thumbsPath,
@@ -59,6 +76,8 @@ func NewDiskStorage(
thumbHeight: thumbH,
previewWidth: prevW,
previewHeight: prevH,
maxPixels: maxPixels,
genSem: make(chan struct{}, concurrency),
}, nil
}
@@ -164,11 +183,26 @@ func (s *DiskStorage) serveGenerated(ctx context.Context, id uuid.UUID, cachePat
return nil, fmt.Errorf("storage: stat %q: %w", srcPath, err)
}
// Bound concurrent generation so a burst of large images can't peg every core
// or exhaust RAM. Queue here (respecting cancellation) rather than starting
// the heavy decode immediately.
select {
case s.genSem <- struct{}{}:
defer func() { <-s.genSem }()
case <-ctx.Done():
return nil, ctx.Err()
}
// Another request may have generated this while we waited on the semaphore.
if f, err := os.Open(cachePath); err == nil {
return f, nil
}
// 1. Try still-image decode (JPEG/PNG/GIF), rejecting decompression bombs.
// 2. Try video frame extraction via ffmpeg.
// 3. Fall back to placeholder.
var img image.Image
if decoded, err := decodeImageLimited(srcPath); err == nil {
if decoded, err := decodeImageLimited(srcPath, s.maxPixels); err == nil {
img = imaging.Fit(decoded, maxW, maxH, imaging.Lanczos)
} else if frame, err := extractVideoFrame(ctx, srcPath); err == nil {
img = imaging.Fit(frame, maxW, maxH, imaging.Lanczos)
@@ -221,15 +255,15 @@ func writeCache(cachePath string, img image.Image) (io.ReadCloser, error) {
return f, nil
}
// maxDecodePixels caps the pixel count of an image we are willing to decode
// into memory, bounding the cost of a decompression bomb (a tiny file that
// expands to an enormous raster). 64 Mpx is ~ an 8192×8192 image.
const maxDecodePixels = 64 << 20
// defaultMaxDecodePixels is the fallback cap when none is configured. It bounds
// the cost of a decompression bomb (a tiny file that expands to an enormous
// raster) and the per-image memory; ~300 Mpx covers e.g. a 13000×17000 photo.
const defaultMaxDecodePixels = 300_000_000
// decodeImageLimited decodes the image at path after first inspecting its header
// dimensions via image.DecodeConfig (which does not allocate the raster), and
// refuses images whose pixel count exceeds maxDecodePixels.
func decodeImageLimited(path string) (image.Image, error) {
// refuses images whose pixel count exceeds maxPixels.
func decodeImageLimited(path string, maxPixels int) (image.Image, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
@@ -240,7 +274,7 @@ func decodeImageLimited(path string) (image.Image, error) {
if err != nil {
return nil, err
}
if int64(cfg.Width)*int64(cfg.Height) > maxDecodePixels {
if int64(cfg.Width)*int64(cfg.Height) > int64(maxPixels) {
return nil, fmt.Errorf("image too large to decode: %dx%d", cfg.Width, cfg.Height)
}
if _, err := f.Seek(0, io.SeekStart); err != nil {
+99
View File
@@ -0,0 +1,99 @@
package storage
import (
"bytes"
"context"
"image"
"image/color"
"image/png"
"io"
"os"
"path/filepath"
"testing"
"github.com/disintegration/imaging"
"github.com/google/uuid"
)
// writeTestImage writes a w×h PNG filled with a distinct (non-placeholder)
// colour so a generated thumbnail can be told apart from the grey placeholder.
func writeTestImage(t *testing.T, path string, w, h int) {
t.Helper()
img := image.NewNRGBA(image.Rect(0, 0, w, h))
for y := 0; y < h; y++ {
for x := 0; x < w; x++ {
img.Set(x, y, color.NRGBA{R: 0xC0, G: 0x10, B: 0x20, A: 0xFF})
}
}
f, err := os.Create(path)
if err != nil {
t.Fatal(err)
}
defer f.Close()
if err := png.Encode(f, img); err != nil {
t.Fatal(err)
}
}
func TestDecodeImageLimited(t *testing.T) {
dir := t.TempDir()
p := filepath.Join(dir, "img.png")
writeTestImage(t, p, 100, 80) // 8000 px
if _, err := decodeImageLimited(p, 4000); err == nil {
t.Fatal("expected rejection for an image over the pixel cap")
}
img, err := decodeImageLimited(p, 100000)
if err != nil {
t.Fatalf("expected decode within the cap, got %v", err)
}
if b := img.Bounds(); b.Dx() != 100 || b.Dy() != 80 {
t.Fatalf("unexpected decoded size %v", b.Size())
}
}
// TestThumbnailGeneratesAndCaches exercises the full generation path (semaphore
// acquire → decode → fit → encode → cache) and the cache fast path on re-request.
func TestThumbnailGeneratesAndCaches(t *testing.T) {
files := t.TempDir()
thumbs := t.TempDir()
id := uuid.New()
writeTestImage(t, filepath.Join(files, id.String()), 100, 80)
s, err := NewDiskStorage(files, thumbs, 160, 160, 1920, 1080, 0, 1)
if err != nil {
t.Fatal(err)
}
rc, err := s.Thumbnail(context.Background(), id)
if err != nil {
t.Fatalf("Thumbnail: %v", err)
}
data, _ := io.ReadAll(rc)
rc.Close()
out, err := imaging.Decode(bytes.NewReader(data))
if err != nil {
t.Fatalf("decode thumbnail: %v", err)
}
// The source fits within 160×160, so it is not upscaled.
if b := out.Bounds(); b.Dx() != 100 || b.Dy() != 80 {
t.Fatalf("unexpected thumbnail size %v", b.Size())
}
// Centre pixel should be the source's red, not the grey placeholder.
r, g, b, _ := out.At(50, 40).RGBA()
if !(r>>8 > g>>8+40 && r>>8 > b>>8+40) {
t.Fatalf("thumbnail is not the source image (got r=%d g=%d b=%d) — fell back to placeholder?", r>>8, g>>8, b>>8)
}
// The cache file must now exist, and a second request must serve it.
if _, err := os.Stat(s.thumbCachePath(id)); err != nil {
t.Fatalf("cache file not written: %v", err)
}
rc2, err := s.Thumbnail(context.Background(), id)
if err != nil {
t.Fatalf("Thumbnail (cached): %v", err)
}
rc2.Close()
}