Files
tanabata/backend/internal/config/config.go
T
H1K0 88849cc16b feat(backend): perceptual hashing for images and video
Adds a 64-bit dHash perceptual hash (internal/imagehash, built on the existing
disintegration/imaging — no new dependency) and starts populating the long-unused
data.files.phash column:

- Upload sets phash inline for images (cheap, from the in-memory bytes).
- Replace recomputes it from new content for images and clears it for anything
  else, so a stale hash never survives a content swap.
- FileRepo.SetPHash sets/clears the hash (used by Replace and, later, the dedup
  backfill).
- DiskStorage.VideoFrameMiddle extracts a frame from the middle of a clip
  (ffprobe duration -> ffmpeg -ss duration/2), avoiding the shared-intro collision
  a fixed early offset causes. It is a concrete method, not part of the storage
  port: only the dedup CLI needs it, keeping ffmpeg off the upload path. Video
  phashes are therefore computed by that CLI, not at upload time.
- DUPLICATE_HASH_THRESHOLD config (default 10/64) for the later pair rescan.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-16 12:20:52 +03:00

195 lines
6.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package config
import (
"errors"
"fmt"
"os"
"strconv"
"strings"
"time"
"github.com/joho/godotenv"
)
// Config holds all application configuration loaded from environment variables.
type Config struct {
// Server
ListenAddr string
JWTSecret string
JWTAccessTTL time.Duration
JWTRefreshTTL time.Duration
// ContentTokenTTL is how long a content token stays valid. The token is a
// single-file capability used to open or stream an original by URL (e.g. a
// long video in a new tab); it is deliberately longer-lived than the access
// token and independent of the session, so playback survives access-token
// expiry and refresh rotation. Keep it only as long as a viewing session
// plausibly lasts — it is a bearer credential for that one file until expiry.
ContentTokenTTL time.Duration
// TrustedProxies lists the reverse-proxy hops (CIDRs or IPs) whose
// X-Forwarded-For header is trusted. The auth rate limiter keys on the
// client IP, so this must match the proxy in front of the app — otherwise
// every request appears to come from the proxy (one shared bucket) or a
// direct caller could forge the header. Default covers loopback and the
// Docker bridge ranges a host reverse proxy reaches the container through.
TrustedProxies []string
// Initial admin bootstrap (applied on startup if the user does not exist)
AdminUsername string
AdminPassword string
// Database
DatabaseURL string
// Storage
FilesPath string
ThumbsCachePath string
MaxUploadBytes int64 // reject uploads larger than this (bytes)
// Thumbnails
ThumbWidth int
ThumbHeight int
PreviewWidth int
PreviewHeight int
// ThumbMaxPixels caps the pixel count of a source image decoded in-process by
// the pure-Go fallback (a decompression-bomb guard and memory bound); larger
// images then get a placeholder. It does not apply when vipsthumbnail is
// installed, which shrinks on load regardless of source size.
ThumbMaxPixels int
// ThumbConcurrency bounds how many thumbnails/previews are generated at once,
// so a burst of large images can't saturate every core or exhaust RAM. 0 =
// auto (half the available CPUs).
ThumbConcurrency int
// Import
ImportPath string
// DuplicateHashThreshold is the maximum Hamming distance (out of 64) between
// two perceptual hashes for the files to be treated as duplicate candidates.
// Lower = stricter (fewer, more confident matches); higher = looser. Used only
// by the dedup rescan that (re)builds data.duplicate_pairs.
DuplicateHashThreshold int
// Static SPA. When set, the server serves the built frontend (and falls
// back to index.html for client routes) on the same port as the API. Empty
// in local development, where the Vite dev server serves the UI separately.
StaticDir string
}
// Load reads a .env file (if present) then loads all configuration from
// environment variables. Returns an error listing every missing or invalid var.
func Load() (*Config, error) {
// Non-fatal: .env may not exist in production.
_ = godotenv.Load()
var errs []error
requireStr := func(key string) string {
v := os.Getenv(key)
if v == "" {
errs = append(errs, fmt.Errorf("%s is required", key))
}
return v
}
defaultStr := func(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
}
return def
}
// parseDuration parses a duration env var. Every duration in this config is a
// token TTL, which must be strictly positive — a zero/negative TTL would mint
// already-expired tokens (no login, no media playback) — so reject those here
// rather than fail mysteriously at runtime.
parseDuration := func(key, def string) time.Duration {
raw := defaultStr(key, def)
d, err := time.ParseDuration(raw)
if err != nil {
errs = append(errs, fmt.Errorf("%s: invalid duration %q: %w", key, raw, err))
return 0
}
if d <= 0 {
errs = append(errs, fmt.Errorf("%s must be positive, got %q", key, raw))
return 0
}
return d
}
parseInt := func(key string, def int) int {
raw := os.Getenv(key)
if raw == "" {
return def
}
n, err := strconv.Atoi(raw)
if err != nil {
errs = append(errs, fmt.Errorf("%s: invalid integer %q: %w", key, raw, err))
return def
}
return n
}
parseCSV := func(key, def string) []string {
raw := defaultStr(key, def)
parts := strings.Split(raw, ",")
out := make([]string, 0, len(parts))
for _, p := range parts {
if p = strings.TrimSpace(p); p != "" {
out = append(out, p)
}
}
return out
}
parseInt64 := func(key string, def int64) int64 {
raw := os.Getenv(key)
if raw == "" {
return def
}
n, err := strconv.ParseInt(raw, 10, 64)
if err != nil {
errs = append(errs, fmt.Errorf("%s: invalid integer %q: %w", key, raw, err))
return def
}
return n
}
cfg := &Config{
ListenAddr: defaultStr("LISTEN_ADDR", ":42776"),
JWTSecret: requireStr("JWT_SECRET"),
JWTAccessTTL: parseDuration("JWT_ACCESS_TTL", "15m"),
JWTRefreshTTL: parseDuration("JWT_REFRESH_TTL", "720h"),
ContentTokenTTL: parseDuration("CONTENT_TOKEN_TTL", "6h"),
TrustedProxies: parseCSV("TRUSTED_PROXIES", "127.0.0.1/32,::1/128,172.16.0.0/12"),
AdminUsername: defaultStr("ADMIN_USERNAME", "admin"),
AdminPassword: requireStr("ADMIN_PASSWORD"),
DatabaseURL: requireStr("DATABASE_URL"),
FilesPath: requireStr("FILES_PATH"),
ThumbsCachePath: requireStr("THUMBS_CACHE_PATH"),
MaxUploadBytes: parseInt64("MAX_UPLOAD_BYTES", 500<<20), // 500 MiB
ThumbWidth: parseInt("THUMB_WIDTH", 160),
ThumbHeight: parseInt("THUMB_HEIGHT", 160),
PreviewWidth: parseInt("PREVIEW_WIDTH", 1920),
PreviewHeight: parseInt("PREVIEW_HEIGHT", 1080),
ThumbMaxPixels: parseInt("THUMB_MAX_PIXELS", 300_000_000), // ~300 Mpx (e.g. 13000×17000)
ThumbConcurrency: parseInt("THUMB_CONCURRENCY", 0), // 0 = auto
ImportPath: requireStr("IMPORT_PATH"),
DuplicateHashThreshold: parseInt("DUPLICATE_HASH_THRESHOLD", 10),
StaticDir: defaultStr("STATIC_DIR", ""),
}
if len(errs) > 0 {
return nil, errors.Join(errs...)
}
return cfg, nil
}