fix(backend): bound thumbnail generation and decode larger images
Thumbnails/previews are generated lazily per request with no concurrency limit, and the imaging resize already fans out across every core — so scrolling to a handful of large images spawned that many all-core, hundreds-of-MB decodes at once and pegged the server. Add a generation semaphore (THUMB_CONCURRENCY, default = half the CPUs) so only a bounded number run at a time; queued requests wait and re-check the cache. Also raise the decode cap from 64 Mpx to a configurable ~300 Mpx default (THUMB_MAX_PIXELS) so genuinely large photos (e.g. 13000×17000 ≈ 221 Mpx) get a real thumbnail instead of falling back to a placeholder. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -52,6 +52,7 @@ func main() {
|
||||
cfg.ThumbsCachePath,
|
||||
cfg.ThumbWidth, cfg.ThumbHeight,
|
||||
cfg.PreviewWidth, cfg.PreviewHeight,
|
||||
cfg.ThumbMaxPixels, cfg.ThumbConcurrency,
|
||||
)
|
||||
if err != nil {
|
||||
slog.Error("failed to initialise storage", "err", err)
|
||||
|
||||
@@ -35,6 +35,14 @@ type Config struct {
|
||||
ThumbHeight int
|
||||
PreviewWidth int
|
||||
PreviewHeight int
|
||||
// ThumbMaxPixels caps the pixel count of a source image we will decode into
|
||||
// memory to generate a thumbnail/preview (a decode bombs guard, and a memory
|
||||
// bound). Larger images fall back to a placeholder.
|
||||
ThumbMaxPixels int
|
||||
// ThumbConcurrency bounds how many thumbnails/previews are generated at once,
|
||||
// so a burst of large images can't saturate every core or exhaust RAM. 0 =
|
||||
// auto (half the available CPUs).
|
||||
ThumbConcurrency int
|
||||
|
||||
// Import
|
||||
ImportPath string
|
||||
@@ -119,10 +127,12 @@ func Load() (*Config, error) {
|
||||
ThumbsCachePath: requireStr("THUMBS_CACHE_PATH"),
|
||||
MaxUploadBytes: parseInt64("MAX_UPLOAD_BYTES", 500<<20), // 500 MiB
|
||||
|
||||
ThumbWidth: parseInt("THUMB_WIDTH", 160),
|
||||
ThumbHeight: parseInt("THUMB_HEIGHT", 160),
|
||||
PreviewWidth: parseInt("PREVIEW_WIDTH", 1920),
|
||||
PreviewHeight: parseInt("PREVIEW_HEIGHT", 1080),
|
||||
ThumbWidth: parseInt("THUMB_WIDTH", 160),
|
||||
ThumbHeight: parseInt("THUMB_HEIGHT", 160),
|
||||
PreviewWidth: parseInt("PREVIEW_WIDTH", 1920),
|
||||
PreviewHeight: parseInt("PREVIEW_HEIGHT", 1080),
|
||||
ThumbMaxPixels: parseInt("THUMB_MAX_PIXELS", 300_000_000), // ~300 Mpx (e.g. 13000×17000)
|
||||
ThumbConcurrency: parseInt("THUMB_CONCURRENCY", 0), // 0 = auto
|
||||
|
||||
ImportPath: requireStr("IMPORT_PATH"),
|
||||
|
||||
|
||||
@@ -110,7 +110,7 @@ func setupSuite(t *testing.T) *harness {
|
||||
thumbsDir := t.TempDir()
|
||||
importDir := t.TempDir()
|
||||
|
||||
diskStorage, err := storage.NewDiskStorage(filesDir, thumbsDir, 160, 160, 1920, 1080)
|
||||
diskStorage, err := storage.NewDiskStorage(filesDir, thumbsDir, 160, 160, 1920, 1080, 0, 0)
|
||||
require.NoError(t, err)
|
||||
|
||||
// --- Repositories --------------------------------------------------------
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/disintegration/imaging"
|
||||
@@ -38,20 +39,36 @@ type DiskStorage struct {
|
||||
thumbHeight int
|
||||
previewWidth int
|
||||
previewHeight int
|
||||
maxPixels int
|
||||
// genSem bounds concurrent thumbnail/preview generation. Each resize already
|
||||
// fans out across every core (imaging uses GOMAXPROCS), and large sources cost
|
||||
// hundreds of MB to decode, so unbounded parallelism on a burst of big images
|
||||
// pegs the CPU and can exhaust RAM. A buffered channel caps how many run at once.
|
||||
genSem chan struct{}
|
||||
}
|
||||
|
||||
var _ port.FileStorage = (*DiskStorage)(nil)
|
||||
|
||||
// NewDiskStorage creates a DiskStorage and ensures both directories exist.
|
||||
//
|
||||
// maxPixels caps the source pixel count we will decode in-process (0 → a sane
|
||||
// default). concurrency bounds simultaneous generation (≤0 → half the CPUs).
|
||||
func NewDiskStorage(
|
||||
filesPath, thumbsPath string,
|
||||
thumbW, thumbH, prevW, prevH int,
|
||||
maxPixels, concurrency int,
|
||||
) (*DiskStorage, error) {
|
||||
for _, p := range []string{filesPath, thumbsPath} {
|
||||
if err := os.MkdirAll(p, 0o755); err != nil {
|
||||
return nil, fmt.Errorf("storage: create directory %q: %w", p, err)
|
||||
}
|
||||
}
|
||||
if maxPixels <= 0 {
|
||||
maxPixels = defaultMaxDecodePixels
|
||||
}
|
||||
if concurrency <= 0 {
|
||||
concurrency = max(1, runtime.GOMAXPROCS(0)/2)
|
||||
}
|
||||
return &DiskStorage{
|
||||
filesPath: filesPath,
|
||||
thumbsPath: thumbsPath,
|
||||
@@ -59,6 +76,8 @@ func NewDiskStorage(
|
||||
thumbHeight: thumbH,
|
||||
previewWidth: prevW,
|
||||
previewHeight: prevH,
|
||||
maxPixels: maxPixels,
|
||||
genSem: make(chan struct{}, concurrency),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -164,11 +183,26 @@ func (s *DiskStorage) serveGenerated(ctx context.Context, id uuid.UUID, cachePat
|
||||
return nil, fmt.Errorf("storage: stat %q: %w", srcPath, err)
|
||||
}
|
||||
|
||||
// Bound concurrent generation so a burst of large images can't peg every core
|
||||
// or exhaust RAM. Queue here (respecting cancellation) rather than starting
|
||||
// the heavy decode immediately.
|
||||
select {
|
||||
case s.genSem <- struct{}{}:
|
||||
defer func() { <-s.genSem }()
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
|
||||
// Another request may have generated this while we waited on the semaphore.
|
||||
if f, err := os.Open(cachePath); err == nil {
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// 1. Try still-image decode (JPEG/PNG/GIF), rejecting decompression bombs.
|
||||
// 2. Try video frame extraction via ffmpeg.
|
||||
// 3. Fall back to placeholder.
|
||||
var img image.Image
|
||||
if decoded, err := decodeImageLimited(srcPath); err == nil {
|
||||
if decoded, err := decodeImageLimited(srcPath, s.maxPixels); err == nil {
|
||||
img = imaging.Fit(decoded, maxW, maxH, imaging.Lanczos)
|
||||
} else if frame, err := extractVideoFrame(ctx, srcPath); err == nil {
|
||||
img = imaging.Fit(frame, maxW, maxH, imaging.Lanczos)
|
||||
@@ -221,15 +255,15 @@ func writeCache(cachePath string, img image.Image) (io.ReadCloser, error) {
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// maxDecodePixels caps the pixel count of an image we are willing to decode
|
||||
// into memory, bounding the cost of a decompression bomb (a tiny file that
|
||||
// expands to an enormous raster). 64 Mpx is ~ an 8192×8192 image.
|
||||
const maxDecodePixels = 64 << 20
|
||||
// defaultMaxDecodePixels is the fallback cap when none is configured. It bounds
|
||||
// the cost of a decompression bomb (a tiny file that expands to an enormous
|
||||
// raster) and the per-image memory; ~300 Mpx covers e.g. a 13000×17000 photo.
|
||||
const defaultMaxDecodePixels = 300_000_000
|
||||
|
||||
// decodeImageLimited decodes the image at path after first inspecting its header
|
||||
// dimensions via image.DecodeConfig (which does not allocate the raster), and
|
||||
// refuses images whose pixel count exceeds maxDecodePixels.
|
||||
func decodeImageLimited(path string) (image.Image, error) {
|
||||
// refuses images whose pixel count exceeds maxPixels.
|
||||
func decodeImageLimited(path string, maxPixels int) (image.Image, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -240,7 +274,7 @@ func decodeImageLimited(path string) (image.Image, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if int64(cfg.Width)*int64(cfg.Height) > maxDecodePixels {
|
||||
if int64(cfg.Width)*int64(cfg.Height) > int64(maxPixels) {
|
||||
return nil, fmt.Errorf("image too large to decode: %dx%d", cfg.Width, cfg.Height)
|
||||
}
|
||||
if _, err := f.Seek(0, io.SeekStart); err != nil {
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/png"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/disintegration/imaging"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// writeTestImage writes a w×h PNG filled with a distinct (non-placeholder)
|
||||
// colour so a generated thumbnail can be told apart from the grey placeholder.
|
||||
func writeTestImage(t *testing.T, path string, w, h int) {
|
||||
t.Helper()
|
||||
img := image.NewNRGBA(image.Rect(0, 0, w, h))
|
||||
for y := 0; y < h; y++ {
|
||||
for x := 0; x < w; x++ {
|
||||
img.Set(x, y, color.NRGBA{R: 0xC0, G: 0x10, B: 0x20, A: 0xFF})
|
||||
}
|
||||
}
|
||||
f, err := os.Create(path)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
if err := png.Encode(f, img); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecodeImageLimited(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
p := filepath.Join(dir, "img.png")
|
||||
writeTestImage(t, p, 100, 80) // 8000 px
|
||||
|
||||
if _, err := decodeImageLimited(p, 4000); err == nil {
|
||||
t.Fatal("expected rejection for an image over the pixel cap")
|
||||
}
|
||||
|
||||
img, err := decodeImageLimited(p, 100000)
|
||||
if err != nil {
|
||||
t.Fatalf("expected decode within the cap, got %v", err)
|
||||
}
|
||||
if b := img.Bounds(); b.Dx() != 100 || b.Dy() != 80 {
|
||||
t.Fatalf("unexpected decoded size %v", b.Size())
|
||||
}
|
||||
}
|
||||
|
||||
// TestThumbnailGeneratesAndCaches exercises the full generation path (semaphore
|
||||
// acquire → decode → fit → encode → cache) and the cache fast path on re-request.
|
||||
func TestThumbnailGeneratesAndCaches(t *testing.T) {
|
||||
files := t.TempDir()
|
||||
thumbs := t.TempDir()
|
||||
id := uuid.New()
|
||||
writeTestImage(t, filepath.Join(files, id.String()), 100, 80)
|
||||
|
||||
s, err := NewDiskStorage(files, thumbs, 160, 160, 1920, 1080, 0, 1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
rc, err := s.Thumbnail(context.Background(), id)
|
||||
if err != nil {
|
||||
t.Fatalf("Thumbnail: %v", err)
|
||||
}
|
||||
data, _ := io.ReadAll(rc)
|
||||
rc.Close()
|
||||
|
||||
out, err := imaging.Decode(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
t.Fatalf("decode thumbnail: %v", err)
|
||||
}
|
||||
// The source fits within 160×160, so it is not upscaled.
|
||||
if b := out.Bounds(); b.Dx() != 100 || b.Dy() != 80 {
|
||||
t.Fatalf("unexpected thumbnail size %v", b.Size())
|
||||
}
|
||||
// Centre pixel should be the source's red, not the grey placeholder.
|
||||
r, g, b, _ := out.At(50, 40).RGBA()
|
||||
if !(r>>8 > g>>8+40 && r>>8 > b>>8+40) {
|
||||
t.Fatalf("thumbnail is not the source image (got r=%d g=%d b=%d) — fell back to placeholder?", r>>8, g>>8, b>>8)
|
||||
}
|
||||
|
||||
// The cache file must now exist, and a second request must serve it.
|
||||
if _, err := os.Stat(s.thumbCachePath(id)); err != nil {
|
||||
t.Fatalf("cache file not written: %v", err)
|
||||
}
|
||||
rc2, err := s.Thumbnail(context.Background(), id)
|
||||
if err != nil {
|
||||
t.Fatalf("Thumbnail (cached): %v", err)
|
||||
}
|
||||
rc2.Close()
|
||||
}
|
||||
Reference in New Issue
Block a user