From 2d2a42d523a4219761baa94a19ed207b16894612 Mon Sep 17 00:00:00 2001 From: Masahiko AMANO Date: Fri, 12 Jun 2026 01:07:30 +0300 Subject: [PATCH] fix(backend): bound thumbnail generation and decode larger images MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thumbnails/previews are generated lazily per request with no concurrency limit, and the imaging resize already fans out across every core — so scrolling to a handful of large images spawned that many all-core, hundreds-of-MB decodes at once and pegged the server. Add a generation semaphore (THUMB_CONCURRENCY, default = half the CPUs) so only a bounded number run at a time; queued requests wait and re-check the cache. Also raise the decode cap from 64 Mpx to a configurable ~300 Mpx default (THUMB_MAX_PIXELS) so genuinely large photos (e.g. 13000×17000 ≈ 221 Mpx) get a real thumbnail instead of falling back to a placeholder. Co-Authored-By: Claude Opus 4.8 --- backend/cmd/server/main.go | 1 + backend/internal/config/config.go | 18 +++- backend/internal/integration/server_test.go | 2 +- backend/internal/storage/disk.go | 50 +++++++++-- backend/internal/storage/disk_test.go | 99 +++++++++++++++++++++ 5 files changed, 157 insertions(+), 13 deletions(-) create mode 100644 backend/internal/storage/disk_test.go diff --git a/backend/cmd/server/main.go b/backend/cmd/server/main.go index a6df5d5..1c0068d 100644 --- a/backend/cmd/server/main.go +++ b/backend/cmd/server/main.go @@ -52,6 +52,7 @@ func main() { cfg.ThumbsCachePath, cfg.ThumbWidth, cfg.ThumbHeight, cfg.PreviewWidth, cfg.PreviewHeight, + cfg.ThumbMaxPixels, cfg.ThumbConcurrency, ) if err != nil { slog.Error("failed to initialise storage", "err", err) diff --git a/backend/internal/config/config.go b/backend/internal/config/config.go index cfc0049..91c15c6 100644 --- a/backend/internal/config/config.go +++ b/backend/internal/config/config.go @@ -35,6 +35,14 @@ type Config struct { ThumbHeight int PreviewWidth int PreviewHeight int + // ThumbMaxPixels caps the pixel count of a source image we will decode into + // memory to generate a thumbnail/preview (a decode bombs guard, and a memory + // bound). Larger images fall back to a placeholder. + ThumbMaxPixels int + // ThumbConcurrency bounds how many thumbnails/previews are generated at once, + // so a burst of large images can't saturate every core or exhaust RAM. 0 = + // auto (half the available CPUs). + ThumbConcurrency int // Import ImportPath string @@ -119,10 +127,12 @@ func Load() (*Config, error) { ThumbsCachePath: requireStr("THUMBS_CACHE_PATH"), MaxUploadBytes: parseInt64("MAX_UPLOAD_BYTES", 500<<20), // 500 MiB - ThumbWidth: parseInt("THUMB_WIDTH", 160), - ThumbHeight: parseInt("THUMB_HEIGHT", 160), - PreviewWidth: parseInt("PREVIEW_WIDTH", 1920), - PreviewHeight: parseInt("PREVIEW_HEIGHT", 1080), + ThumbWidth: parseInt("THUMB_WIDTH", 160), + ThumbHeight: parseInt("THUMB_HEIGHT", 160), + PreviewWidth: parseInt("PREVIEW_WIDTH", 1920), + PreviewHeight: parseInt("PREVIEW_HEIGHT", 1080), + ThumbMaxPixels: parseInt("THUMB_MAX_PIXELS", 300_000_000), // ~300 Mpx (e.g. 13000×17000) + ThumbConcurrency: parseInt("THUMB_CONCURRENCY", 0), // 0 = auto ImportPath: requireStr("IMPORT_PATH"), diff --git a/backend/internal/integration/server_test.go b/backend/internal/integration/server_test.go index fded83f..dfd7dea 100644 --- a/backend/internal/integration/server_test.go +++ b/backend/internal/integration/server_test.go @@ -110,7 +110,7 @@ func setupSuite(t *testing.T) *harness { thumbsDir := t.TempDir() importDir := t.TempDir() - diskStorage, err := storage.NewDiskStorage(filesDir, thumbsDir, 160, 160, 1920, 1080) + diskStorage, err := storage.NewDiskStorage(filesDir, thumbsDir, 160, 160, 1920, 1080, 0, 0) require.NoError(t, err) // --- Repositories -------------------------------------------------------- diff --git a/backend/internal/storage/disk.go b/backend/internal/storage/disk.go index fddd832..b57ad6e 100644 --- a/backend/internal/storage/disk.go +++ b/backend/internal/storage/disk.go @@ -15,6 +15,7 @@ import ( "os" "os/exec" "path/filepath" + "runtime" "time" "github.com/disintegration/imaging" @@ -38,20 +39,36 @@ type DiskStorage struct { thumbHeight int previewWidth int previewHeight int + maxPixels int + // genSem bounds concurrent thumbnail/preview generation. Each resize already + // fans out across every core (imaging uses GOMAXPROCS), and large sources cost + // hundreds of MB to decode, so unbounded parallelism on a burst of big images + // pegs the CPU and can exhaust RAM. A buffered channel caps how many run at once. + genSem chan struct{} } var _ port.FileStorage = (*DiskStorage)(nil) // NewDiskStorage creates a DiskStorage and ensures both directories exist. +// +// maxPixels caps the source pixel count we will decode in-process (0 → a sane +// default). concurrency bounds simultaneous generation (≤0 → half the CPUs). func NewDiskStorage( filesPath, thumbsPath string, thumbW, thumbH, prevW, prevH int, + maxPixels, concurrency int, ) (*DiskStorage, error) { for _, p := range []string{filesPath, thumbsPath} { if err := os.MkdirAll(p, 0o755); err != nil { return nil, fmt.Errorf("storage: create directory %q: %w", p, err) } } + if maxPixels <= 0 { + maxPixels = defaultMaxDecodePixels + } + if concurrency <= 0 { + concurrency = max(1, runtime.GOMAXPROCS(0)/2) + } return &DiskStorage{ filesPath: filesPath, thumbsPath: thumbsPath, @@ -59,6 +76,8 @@ func NewDiskStorage( thumbHeight: thumbH, previewWidth: prevW, previewHeight: prevH, + maxPixels: maxPixels, + genSem: make(chan struct{}, concurrency), }, nil } @@ -164,11 +183,26 @@ func (s *DiskStorage) serveGenerated(ctx context.Context, id uuid.UUID, cachePat return nil, fmt.Errorf("storage: stat %q: %w", srcPath, err) } + // Bound concurrent generation so a burst of large images can't peg every core + // or exhaust RAM. Queue here (respecting cancellation) rather than starting + // the heavy decode immediately. + select { + case s.genSem <- struct{}{}: + defer func() { <-s.genSem }() + case <-ctx.Done(): + return nil, ctx.Err() + } + + // Another request may have generated this while we waited on the semaphore. + if f, err := os.Open(cachePath); err == nil { + return f, nil + } + // 1. Try still-image decode (JPEG/PNG/GIF), rejecting decompression bombs. // 2. Try video frame extraction via ffmpeg. // 3. Fall back to placeholder. var img image.Image - if decoded, err := decodeImageLimited(srcPath); err == nil { + if decoded, err := decodeImageLimited(srcPath, s.maxPixels); err == nil { img = imaging.Fit(decoded, maxW, maxH, imaging.Lanczos) } else if frame, err := extractVideoFrame(ctx, srcPath); err == nil { img = imaging.Fit(frame, maxW, maxH, imaging.Lanczos) @@ -221,15 +255,15 @@ func writeCache(cachePath string, img image.Image) (io.ReadCloser, error) { return f, nil } -// maxDecodePixels caps the pixel count of an image we are willing to decode -// into memory, bounding the cost of a decompression bomb (a tiny file that -// expands to an enormous raster). 64 Mpx is ~ an 8192×8192 image. -const maxDecodePixels = 64 << 20 +// defaultMaxDecodePixels is the fallback cap when none is configured. It bounds +// the cost of a decompression bomb (a tiny file that expands to an enormous +// raster) and the per-image memory; ~300 Mpx covers e.g. a 13000×17000 photo. +const defaultMaxDecodePixels = 300_000_000 // decodeImageLimited decodes the image at path after first inspecting its header // dimensions via image.DecodeConfig (which does not allocate the raster), and -// refuses images whose pixel count exceeds maxDecodePixels. -func decodeImageLimited(path string) (image.Image, error) { +// refuses images whose pixel count exceeds maxPixels. +func decodeImageLimited(path string, maxPixels int) (image.Image, error) { f, err := os.Open(path) if err != nil { return nil, err @@ -240,7 +274,7 @@ func decodeImageLimited(path string) (image.Image, error) { if err != nil { return nil, err } - if int64(cfg.Width)*int64(cfg.Height) > maxDecodePixels { + if int64(cfg.Width)*int64(cfg.Height) > int64(maxPixels) { return nil, fmt.Errorf("image too large to decode: %dx%d", cfg.Width, cfg.Height) } if _, err := f.Seek(0, io.SeekStart); err != nil { diff --git a/backend/internal/storage/disk_test.go b/backend/internal/storage/disk_test.go new file mode 100644 index 0000000..8ff02c4 --- /dev/null +++ b/backend/internal/storage/disk_test.go @@ -0,0 +1,99 @@ +package storage + +import ( + "bytes" + "context" + "image" + "image/color" + "image/png" + "io" + "os" + "path/filepath" + "testing" + + "github.com/disintegration/imaging" + "github.com/google/uuid" +) + +// writeTestImage writes a w×h PNG filled with a distinct (non-placeholder) +// colour so a generated thumbnail can be told apart from the grey placeholder. +func writeTestImage(t *testing.T, path string, w, h int) { + t.Helper() + img := image.NewNRGBA(image.Rect(0, 0, w, h)) + for y := 0; y < h; y++ { + for x := 0; x < w; x++ { + img.Set(x, y, color.NRGBA{R: 0xC0, G: 0x10, B: 0x20, A: 0xFF}) + } + } + f, err := os.Create(path) + if err != nil { + t.Fatal(err) + } + defer f.Close() + if err := png.Encode(f, img); err != nil { + t.Fatal(err) + } +} + +func TestDecodeImageLimited(t *testing.T) { + dir := t.TempDir() + p := filepath.Join(dir, "img.png") + writeTestImage(t, p, 100, 80) // 8000 px + + if _, err := decodeImageLimited(p, 4000); err == nil { + t.Fatal("expected rejection for an image over the pixel cap") + } + + img, err := decodeImageLimited(p, 100000) + if err != nil { + t.Fatalf("expected decode within the cap, got %v", err) + } + if b := img.Bounds(); b.Dx() != 100 || b.Dy() != 80 { + t.Fatalf("unexpected decoded size %v", b.Size()) + } +} + +// TestThumbnailGeneratesAndCaches exercises the full generation path (semaphore +// acquire → decode → fit → encode → cache) and the cache fast path on re-request. +func TestThumbnailGeneratesAndCaches(t *testing.T) { + files := t.TempDir() + thumbs := t.TempDir() + id := uuid.New() + writeTestImage(t, filepath.Join(files, id.String()), 100, 80) + + s, err := NewDiskStorage(files, thumbs, 160, 160, 1920, 1080, 0, 1) + if err != nil { + t.Fatal(err) + } + + rc, err := s.Thumbnail(context.Background(), id) + if err != nil { + t.Fatalf("Thumbnail: %v", err) + } + data, _ := io.ReadAll(rc) + rc.Close() + + out, err := imaging.Decode(bytes.NewReader(data)) + if err != nil { + t.Fatalf("decode thumbnail: %v", err) + } + // The source fits within 160×160, so it is not upscaled. + if b := out.Bounds(); b.Dx() != 100 || b.Dy() != 80 { + t.Fatalf("unexpected thumbnail size %v", b.Size()) + } + // Centre pixel should be the source's red, not the grey placeholder. + r, g, b, _ := out.At(50, 40).RGBA() + if !(r>>8 > g>>8+40 && r>>8 > b>>8+40) { + t.Fatalf("thumbnail is not the source image (got r=%d g=%d b=%d) — fell back to placeholder?", r>>8, g>>8, b>>8) + } + + // The cache file must now exist, and a second request must serve it. + if _, err := os.Stat(s.thumbCachePath(id)); err != nil { + t.Fatalf("cache file not written: %v", err) + } + rc2, err := s.Thumbnail(context.Background(), id) + if err != nil { + t.Fatalf("Thumbnail (cached): %v", err) + } + rc2.Close() +}