9216a8687f
Adds the duplicate-detection backend on top of perceptual hashing:
- Two tables (edited into the original migrations): data.duplicate_pairs holds
precomputed near-duplicate candidates (rebuilt wholesale by the rescan), and
data.duplicate_dismissals is a global "not a duplicate" overlay that survives
rescans. New audit actions file_merge / duplicate_dismiss.
- DuplicateService:
- Rescan builds every pair within DUPLICATE_HASH_THRESHOLD via a BK-tree over
the perceptual hashes and replaces the pairs table. This is the only thing
that populates pairs, so GET never compares all-vs-all (scales to 110k+).
- Clusters reads the precomputed pairs (ACL-filtered, non-trashed, non-
dismissed), groups them into connected components via union-find, and
paginates whole clusters.
- Resolve merges a pair field-by-field: each scalar from keep or discard,
metadata keep/discard/shallow-merge, tags/pools keep or union; then trashes
the discarded file. Enforces edit ACL on both.
- Dismiss records a canonical pair (view ACL on both).
- Endpoints under /files: GET /files/duplicates, POST /files/duplicates/dismiss,
POST /files/duplicates/resolve (registered before /:id to avoid collision).
Plain delete reuses /files/bulk/delete.
- Repo support: ListMissingPHash, ListAllPHashes, CopyPoolMemberships, plus the
DuplicatePairRepo (ReplaceAll via COPY, ListVisible) and DismissalRepo.
Unit tests cover the BK-tree pairing, union-find clustering, metadata merge and
field validation; an integration test covers rescan -> list -> merge -> dismiss
(including that a dismissal survives a re-rescan).
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
151 lines
4.3 KiB
Go
151 lines
4.3 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"log/slog"
|
|
"net/http"
|
|
"os"
|
|
"time"
|
|
|
|
"github.com/jackc/pgx/v5/stdlib"
|
|
"github.com/pressly/goose/v3"
|
|
|
|
"tanabata/backend/internal/config"
|
|
"tanabata/backend/internal/db/postgres"
|
|
"tanabata/backend/internal/handler"
|
|
"tanabata/backend/internal/service"
|
|
"tanabata/backend/internal/storage"
|
|
"tanabata/backend/migrations"
|
|
)
|
|
|
|
func main() {
|
|
cfg, err := config.Load()
|
|
if err != nil {
|
|
slog.Error("failed to load config", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
pool, err := postgres.NewPool(context.Background(), cfg.DatabaseURL)
|
|
if err != nil {
|
|
slog.Error("failed to connect to database", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
defer pool.Close()
|
|
slog.Info("database connected")
|
|
|
|
migDB := stdlib.OpenDBFromPool(pool)
|
|
goose.SetBaseFS(migrations.FS)
|
|
if err := goose.SetDialect("postgres"); err != nil {
|
|
slog.Error("goose dialect error", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
if err := goose.Up(migDB, "."); err != nil {
|
|
slog.Error("migrations failed", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
migDB.Close()
|
|
slog.Info("migrations applied")
|
|
|
|
// Storage
|
|
diskStorage, err := storage.NewDiskStorage(
|
|
cfg.FilesPath,
|
|
cfg.ThumbsCachePath,
|
|
cfg.ThumbWidth, cfg.ThumbHeight,
|
|
cfg.PreviewWidth, cfg.PreviewHeight,
|
|
cfg.ThumbMaxPixels, cfg.ThumbConcurrency,
|
|
)
|
|
if err != nil {
|
|
slog.Error("failed to initialise storage", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Repositories
|
|
userRepo := postgres.NewUserRepo(pool)
|
|
sessionRepo := postgres.NewSessionRepo(pool)
|
|
fileRepo := postgres.NewFileRepo(pool)
|
|
mimeRepo := postgres.NewMimeRepo(pool)
|
|
aclRepo := postgres.NewACLRepo(pool)
|
|
auditRepo := postgres.NewAuditRepo(pool)
|
|
tagRepo := postgres.NewTagRepo(pool)
|
|
tagRuleRepo := postgres.NewTagRuleRepo(pool)
|
|
categoryRepo := postgres.NewCategoryRepo(pool)
|
|
poolRepo := postgres.NewPoolRepo(pool)
|
|
duplicatePairRepo := postgres.NewDuplicatePairRepo(pool)
|
|
dismissalRepo := postgres.NewDismissalRepo(pool)
|
|
transactor := postgres.NewTransactor(pool)
|
|
|
|
// Services
|
|
authSvc := service.NewAuthService(
|
|
userRepo,
|
|
sessionRepo,
|
|
cfg.JWTSecret,
|
|
cfg.JWTAccessTTL,
|
|
cfg.JWTRefreshTTL,
|
|
cfg.ContentTokenTTL,
|
|
)
|
|
aclSvc := service.NewACLService(aclRepo, fileRepo, tagRepo, categoryRepo, poolRepo, transactor)
|
|
auditSvc := service.NewAuditService(auditRepo)
|
|
tagSvc := service.NewTagService(tagRepo, tagRuleRepo, aclSvc, auditSvc, transactor)
|
|
categorySvc := service.NewCategoryService(categoryRepo, tagRepo, aclSvc, auditSvc)
|
|
poolSvc := service.NewPoolService(poolRepo, aclSvc, auditSvc)
|
|
duplicateSvc := service.NewDuplicateService(
|
|
fileRepo, duplicatePairRepo, dismissalRepo, aclSvc, auditSvc, transactor, cfg.DuplicateHashThreshold,
|
|
)
|
|
fileSvc := service.NewFileService(
|
|
fileRepo,
|
|
mimeRepo,
|
|
diskStorage,
|
|
aclSvc,
|
|
auditSvc,
|
|
tagSvc,
|
|
transactor,
|
|
cfg.ImportPath,
|
|
)
|
|
userSvc := service.NewUserService(userRepo, sessionRepo, auditSvc)
|
|
|
|
// Bootstrap the initial administrator (idempotent).
|
|
if err := userSvc.EnsureAdmin(context.Background(), cfg.AdminUsername, cfg.AdminPassword); err != nil {
|
|
slog.Error("failed to bootstrap admin user", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Handlers
|
|
authMiddleware := handler.NewAuthMiddleware(authSvc)
|
|
authHandler := handler.NewAuthHandler(authSvc)
|
|
fileHandler := handler.NewFileHandler(fileSvc, tagSvc, authSvc, cfg.MaxUploadBytes)
|
|
duplicateHandler := handler.NewDuplicateHandler(duplicateSvc)
|
|
tagHandler := handler.NewTagHandler(tagSvc, fileSvc)
|
|
categoryHandler := handler.NewCategoryHandler(categorySvc)
|
|
poolHandler := handler.NewPoolHandler(poolSvc)
|
|
userHandler := handler.NewUserHandler(userSvc)
|
|
aclHandler := handler.NewACLHandler(aclSvc)
|
|
auditHandler := handler.NewAuditHandler(auditSvc)
|
|
|
|
r, err := handler.NewRouter(
|
|
authMiddleware, authHandler,
|
|
fileHandler, duplicateHandler, tagHandler, categoryHandler, poolHandler,
|
|
userHandler, aclHandler, auditHandler,
|
|
cfg.StaticDir,
|
|
cfg.TrustedProxies,
|
|
)
|
|
if err != nil {
|
|
slog.Error("building router", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// ReadHeaderTimeout bounds slow-header (Slowloris) attacks; body read/write
|
|
// are left unbounded so large file uploads and downloads can stream.
|
|
srv := &http.Server{
|
|
Addr: cfg.ListenAddr,
|
|
Handler: r,
|
|
ReadHeaderTimeout: 10 * time.Second,
|
|
IdleTimeout: 120 * time.Second,
|
|
}
|
|
|
|
slog.Info("starting server", "addr", cfg.ListenAddr)
|
|
if err := srv.ListenAndServe(); err != nil {
|
|
slog.Error("server error", "err", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|