docs(project): document duplicate detection endpoints

Adds GET /files/duplicates, POST /files/duplicates/dismiss and POST
/files/duplicates/resolve to the OpenAPI spec, plus the DuplicateCluster,
DuplicateClusterPage and DuplicateResolve (with MergeScalarChoice /
MergeRelationChoice) schemas describing the field-by-field merge contract.

Also fills a pre-existing gap in the File schema: it now documents the `tags`
array (always returned by the API) and marks the always-present fields required,
so generated clients type these as non-optional.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-16 12:48:21 +03:00
parent 6e3e6a4194
commit 96a903aaff
+178
View File
@@ -709,6 +709,89 @@ paths:
type: string type: string
format: uuid format: uuid
# --- Duplicate detection ---
/files/duplicates:
get:
tags: [Files]
summary: List duplicate clusters
description: >-
Groups of perceptually similar files (within the server's hash-distance
threshold), read from a precomputed pairs table — this never compares all
files on each call. Pairs are (re)built offline by the dedup tool, so the
result reflects state as of the last rescan. Only files the caller may view
are included; dismissed and trashed pairs are excluded.
parameters:
- name: limit
in: query
schema:
type: integer
default: 20
minimum: 1
maximum: 50
description: Maximum number of clusters to return
- name: offset
in: query
schema:
type: integer
default: 0
minimum: 0
responses:
'200':
description: A page of duplicate clusters
content:
application/json:
schema:
$ref: '#/components/schemas/DuplicateClusterPage'
/files/duplicates/dismiss:
post:
tags: [Files]
summary: Mark two files as not duplicates
description: >-
Records a global "not a duplicate" decision so the pair stops appearing in
the duplicates view (it survives future rescans). The caller must be able
to view both files.
requestBody:
required: true
content:
application/json:
schema:
type: object
required: [file_id_a, file_id_b]
properties:
file_id_a:
type: string
format: uuid
file_id_b:
type: string
format: uuid
responses:
'204':
description: Pair dismissed
/files/duplicates/resolve:
post:
tags: [Files]
summary: Resolve a duplicate by merging two files
description: >-
Keeps one file and folds the chosen fields in from the other, then (by
default) trashes the other. The caller must be able to edit both files. To
simply delete one/both or to keep both, use the bulk-delete and dismiss
endpoints instead. Returns the updated survivor.
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/DuplicateResolve'
responses:
'200':
description: The updated surviving file
content:
application/json:
schema:
$ref: '#/components/schemas/File'
# --- File import --- # --- File import ---
/files/import: /files/import:
post: post:
@@ -1766,6 +1849,19 @@ components:
# --- File --- # --- File ---
File: File:
type: object type: object
required:
- id
- mime_type
- mime_extension
- content_datetime
- exif
- creator_id
- creator_name
- is_public
- is_deleted
- needs_review
- created_at
- tags
properties: properties:
id: id:
type: string type: string
@@ -1814,6 +1910,11 @@ components:
type: string type: string
format: date-time format: date-time
description: Extracted from UUID v7 description: Extracted from UUID v7
tags:
type: array
description: Tags assigned to the file
items:
$ref: '#/components/schemas/Tag'
FileUpdate: FileUpdate:
type: object type: object
@@ -1846,6 +1947,83 @@ components:
nullable: true nullable: true
description: Cursor for loading previous (backward) page; null if at the beginning description: Cursor for loading previous (backward) page; null if at the beginning
# --- Duplicates ---
DuplicateCluster:
type: object
properties:
files:
type: array
description: Two or more mutually similar files
items:
$ref: '#/components/schemas/File'
DuplicateClusterPage:
type: object
properties:
items:
type: array
items:
$ref: '#/components/schemas/DuplicateCluster'
total:
type: integer
description: Total number of clusters (not files)
offset:
type: integer
limit:
type: integer
MergeScalarChoice:
type: string
enum: [keep, discard]
default: keep
description: Take this field's value from the kept file or the discarded one
MergeRelationChoice:
type: string
enum: [keep, both]
default: keep
description: Keep only the survivor's relations, or union both files' relations
DuplicateResolve:
type: object
required: [keep, discard]
properties:
keep:
type: string
format: uuid
description: The file to keep (the survivor)
discard:
type: string
format: uuid
description: The other file in the pair
delete_discarded:
type: boolean
default: true
description: Move the discarded file to trash after merging
fields:
type: object
description: Per-field source for the merge; omitted fields default to "keep"
properties:
original_name:
$ref: '#/components/schemas/MergeScalarChoice'
notes:
$ref: '#/components/schemas/MergeScalarChoice'
content_datetime:
$ref: '#/components/schemas/MergeScalarChoice'
is_public:
$ref: '#/components/schemas/MergeScalarChoice'
metadata:
type: string
enum: [keep, discard, merge]
default: keep
description: >-
Keep or take the discarded file's metadata object, or shallow-merge
them with the survivor winning on key conflicts
tags:
$ref: '#/components/schemas/MergeRelationChoice'
pools:
$ref: '#/components/schemas/MergeRelationChoice'
# --- Tag --- # --- Tag ---
Tag: Tag:
type: object type: object