diff --git a/backend/internal/service/file_service.go b/backend/internal/service/file_service.go index 8a8aa2b..dcf2453 100644 --- a/backend/internal/service/file_service.go +++ b/backend/internal/service/file_service.go @@ -13,7 +13,6 @@ import ( "github.com/gabriel-vasile/mimetype" "github.com/google/uuid" - "github.com/rwcarlsen/goexif/exif" "tanabata/backend/internal/domain" "tanabata/backend/internal/port" @@ -112,7 +111,7 @@ func NewFileService( // Upload validates the MIME type, saves the file to storage, creates the DB // record, and applies any initial tags — all within a single transaction. -// If ContentDatetime is nil and EXIF DateTimeOriginal is present, it is used. +// If ContentDatetime is nil and the metadata carries a capture date, it is used. func (s *FileService) Upload(ctx context.Context, p UploadParams) (*domain.File, error) { userID, _, _ := domain.UserFromContext(ctx) @@ -129,10 +128,14 @@ func (s *FileService) Upload(ctx context.Context, p UploadParams) (*domain.File, } data := buf.Bytes() - // Extract EXIF metadata (best-effort; non-image files will error silently). - exifData, exifDatetime := extractEXIFWithDatetime(data) + // Extract rich metadata (best-effort; covers images, video and audio). + var origName string + if p.OriginalName != nil { + origName = *p.OriginalName + } + exifData, exifDatetime := extractMetadata(data, origName, p.ContentDatetimeFallback) - // Resolve content datetime: explicit > EXIF > fallback (e.g. import mtime) > zero. + // Resolve content datetime: explicit > metadata date > fallback (e.g. import mtime) > zero. var contentDatetime time.Time if p.ContentDatetime != nil { contentDatetime = *p.ContentDatetime @@ -405,7 +408,11 @@ func (s *FileService) Replace(ctx context.Context, id uuid.UUID, p UploadParams) return nil, fmt.Errorf("FileService.Replace: read body: %w", err) } data := buf.Bytes() - exifData, _ := extractEXIFWithDatetime(data) + var origName string + if p.OriginalName != nil { + origName = *p.OriginalName + } + exifData, _ := extractMetadata(data, origName, nil) if _, err := s.storage.Save(ctx, id, bytes.NewReader(data)); err != nil { return nil, fmt.Errorf("FileService.Replace: save to storage: %w", err) @@ -656,21 +663,3 @@ func confineToBase(base, target string) (string, error) { } return absTarget, nil } - -// extractEXIFWithDatetime parses EXIF from raw bytes, returning both the JSON -// representation and the DateTimeOriginal (if present). Both may be nil. -func extractEXIFWithDatetime(data []byte) (json.RawMessage, *time.Time) { - x, err := exif.Decode(bytes.NewReader(data)) - if err != nil { - return nil, nil - } - b, err := x.MarshalJSON() - if err != nil { - return nil, nil - } - var dt *time.Time - if t, err := x.DateTime(); err == nil { - dt = &t - } - return json.RawMessage(b), dt -} diff --git a/backend/internal/service/metadata.go b/backend/internal/service/metadata.go new file mode 100644 index 0000000..5ef28e0 --- /dev/null +++ b/backend/internal/service/metadata.go @@ -0,0 +1,183 @@ +package service + +import ( + "bytes" + "context" + "encoding/json" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/rwcarlsen/goexif/exif" +) + +// exiftoolPath is resolved once at startup. When exiftool isn't installed we +// skip the subprocess and fall back to the pure-Go EXIF reader, so the server +// still runs (with thinner metadata) on hosts without it. +var exiftoolPath, _ = exec.LookPath("exiftool") + +// metadataTimeout bounds a single exiftool invocation so a pathological file +// can't wedge an upload. +const metadataTimeout = 30 * time.Second + +// metaTempFileKeys are exiftool fields that describe the temporary file we feed +// it rather than the content. Dropping them avoids leaking internal paths and +// recording the temp file's permissions/inode timestamps. +var metaTempFileKeys = []string{ + "SourceFile", + "Directory", + "FileAccessDate", + "FileInodeChangeDate", + "FilePermissions", +} + +// metaDateKeys are the metadata fields, in priority order, holding the moment +// the content was actually captured/created — photos first, then video atoms. +var metaDateKeys = []string{ + "DateTimeOriginal", + "CreateDate", + "MediaCreateDate", + "TrackCreateDate", + "ModifyDate", +} + +// extractMetadata returns rich metadata as JSON plus the best content datetime +// it can find. It prefers exiftool, which understands video, audio and every +// image format and emits machine-readable numeric values (the basis for later +// analytics); when exiftool is unavailable it falls back to the pure-Go EXIF +// reader, which only handles JPEG/TIFF. +// +// originalName supplies the extension exiftool uses for format detection and the +// FileName reported back. mtime, when set (e.g. a server-side import), is stamped +// onto the temp file so FileModifyDate reflects the real source. +func extractMetadata(data []byte, originalName string, mtime *time.Time) (json.RawMessage, *time.Time) { + if exiftoolPath != "" { + if raw, dt, ok := exiftoolExtract(data, originalName, mtime); ok { + return raw, dt + } + } + return extractEXIFWithDatetime(data) +} + +// exiftoolExtract stages the bytes in a temp file and shells out to exiftool. +// It returns ok=false on any failure so the caller can fall back. +func exiftoolExtract(data []byte, originalName string, mtime *time.Time) (json.RawMessage, *time.Time, bool) { + // exiftool reads a real file far more reliably than a pipe (it seeks freely, + // e.g. to a trailing MP4 moov atom), so stage the bytes in a temp file whose + // extension matches the original for accurate format detection. + tmp, err := os.CreateTemp("", "tfm-meta-*"+filepath.Ext(originalName)) + if err != nil { + return nil, nil, false + } + tmpName := tmp.Name() + defer os.Remove(tmpName) + + if _, err := tmp.Write(data); err != nil { + tmp.Close() + return nil, nil, false + } + if err := tmp.Close(); err != nil { + return nil, nil, false + } + if mtime != nil { + _ = os.Chtimes(tmpName, *mtime, *mtime) + } + + ctx, cancel := context.WithTimeout(context.Background(), metadataTimeout) + defer cancel() + // -n forces raw numeric/machine values for every tag (no "3.53 Mbps" strings) + // so the metadata is analytics-ready. -all extracts every tag. largefilesupport + // handles multi-GB videos. Output is a one-element JSON array. + out, err := exec.CommandContext(ctx, exiftoolPath, + "-n", "-all", "-json", "-api", "largefilesupport=1", tmpName, + ).Output() + if err != nil { + return nil, nil, false + } + + var arr []map[string]json.RawMessage + if err := json.Unmarshal(out, &arr); err != nil || len(arr) == 0 { + return nil, nil, false + } + m := arr[0] + + dt := pickMetaDatetime(m) + + // Strip temp-file artifacts and substitute the real name. + for _, k := range metaTempFileKeys { + delete(m, k) + } + if mtime == nil { + // Without a real source mtime this is just the temp file's write time. + delete(m, "FileModifyDate") + } + if originalName != "" { + if nb, err := json.Marshal(originalName); err == nil { + m["FileName"] = nb + } + } else { + delete(m, "FileName") + } + + raw, err := json.Marshal(m) + if err != nil { + return nil, nil, false + } + return raw, dt, true +} + +// pickMetaDatetime returns the first parseable content date among metaDateKeys. +func pickMetaDatetime(m map[string]json.RawMessage) *time.Time { + for _, key := range metaDateKeys { + raw, ok := m[key] + if !ok { + continue + } + var s string + if err := json.Unmarshal(raw, &s); err != nil { + continue + } + if t, ok := parseExifDate(s); ok { + return &t + } + } + return nil +} + +// parseExifDate parses exiftool's "YYYY:MM:DD HH:MM:SS" timestamps, with or +// without a trailing timezone offset. Zeroed placeholders ("0000:00:00 ...") +// fail to parse and are skipped by the caller. +func parseExifDate(s string) (time.Time, bool) { + s = strings.TrimSpace(s) + for _, layout := range []string{ + "2006:01:02 15:04:05-07:00", + "2006:01:02 15:04:05Z07:00", + "2006:01:02 15:04:05", + } { + if t, err := time.Parse(layout, s); err == nil { + return t, true + } + } + return time.Time{}, false +} + +// extractEXIFWithDatetime is the pure-Go fallback used when exiftool is absent. +// It parses EXIF from raw bytes (JPEG/TIFF only), returning both the JSON +// representation and the DateTimeOriginal (if present). Both may be nil. +func extractEXIFWithDatetime(data []byte) (json.RawMessage, *time.Time) { + x, err := exif.Decode(bytes.NewReader(data)) + if err != nil { + return nil, nil + } + b, err := x.MarshalJSON() + if err != nil { + return nil, nil + } + var dt *time.Time + if t, err := x.DateTime(); err == nil { + dt = &t + } + return json.RawMessage(b), dt +} diff --git a/backend/internal/service/metadata_test.go b/backend/internal/service/metadata_test.go new file mode 100644 index 0000000..a49bae5 --- /dev/null +++ b/backend/internal/service/metadata_test.go @@ -0,0 +1,110 @@ +package service + +import ( + "bytes" + "encoding/json" + "image" + "image/color" + "image/png" + "testing" + "time" +) + +func TestParseExifDate(t *testing.T) { + cases := []struct { + in string + ok bool + want time.Time + }{ + {"2026:03:24 16:57:58", true, time.Date(2026, 3, 24, 16, 57, 58, 0, time.UTC)}, + {"2026:05:08 23:07:55+03:00", true, time.Date(2026, 5, 8, 23, 7, 55, 0, time.FixedZone("", 3*3600))}, + {" 2026:01:02 03:04:05 ", true, time.Date(2026, 1, 2, 3, 4, 5, 0, time.UTC)}, + {"0000:00:00 00:00:00", false, time.Time{}}, + {"not a date", false, time.Time{}}, + {"", false, time.Time{}}, + } + for _, c := range cases { + got, ok := parseExifDate(c.in) + if ok != c.ok { + t.Errorf("parseExifDate(%q) ok=%v, want %v", c.in, ok, c.ok) + continue + } + if ok && !got.Equal(c.want) { + t.Errorf("parseExifDate(%q) = %v, want %v", c.in, got, c.want) + } + } +} + +// tinyPNG returns a valid 2x3 PNG with no embedded EXIF/date. +func tinyPNG(t *testing.T) []byte { + t.Helper() + img := image.NewRGBA(image.Rect(0, 0, 2, 3)) + img.Set(0, 0, color.RGBA{R: 10, G: 20, B: 30, A: 255}) + var buf bytes.Buffer + if err := png.Encode(&buf, img); err != nil { + t.Fatalf("encode png: %v", err) + } + return buf.Bytes() +} + +func TestExtractMetadataExiftool(t *testing.T) { + if exiftoolPath == "" { + t.Skip("exiftool not installed; metadata extraction falls back to goexif") + } + + raw, dt := extractMetadata(tinyPNG(t), "snapshot.png", nil) + if raw == nil { + t.Fatal("expected non-nil metadata JSON") + } + if dt != nil { + t.Errorf("a PNG without a capture date should yield no content datetime, got %v", dt) + } + + var m map[string]json.RawMessage + if err := json.Unmarshal(raw, &m); err != nil { + t.Fatalf("metadata is not valid JSON: %v", err) + } + + // exiftool understood the format (goexif never would for PNG). + if v := jsonString(t, m, "FileType"); v != "PNG" { + t.Errorf("FileType = %q, want PNG", v) + } + + // Dimensions are numeric, not human-readable strings. + for _, key := range []string{"ImageWidth", "ImageHeight"} { + raw, ok := m[key] + if !ok { + t.Errorf("missing %s", key) + continue + } + var n float64 + if err := json.Unmarshal(raw, &n); err != nil { + t.Errorf("%s is not numeric: %s", key, raw) + } + } + + // FileName is the original, not the temp file; temp-file artifacts are gone. + if v := jsonString(t, m, "FileName"); v != "snapshot.png" { + t.Errorf("FileName = %q, want snapshot.png", v) + } + for _, leaked := range []string{"SourceFile", "Directory", "FilePermissions", "FileModifyDate"} { + if _, ok := m[leaked]; ok { + t.Errorf("temp-file field %q should have been stripped", leaked) + } + } +} + +func jsonString(t *testing.T, m map[string]json.RawMessage, key string) string { + t.Helper() + raw, ok := m[key] + if !ok { + t.Errorf("missing key %q", key) + return "" + } + var s string + if err := json.Unmarshal(raw, &s); err != nil { + t.Errorf("key %q is not a string: %s", key, raw) + return "" + } + return s +}