From ed70a140a2d6cb2d2108ce7b4e73533a34bb852e Mon Sep 17 00:00:00 2001 From: Ziki Shay Date: Fri, 8 May 2026 00:05:51 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9A=82=E5=AD=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env | 5 +- .version | 1 + apidoc/README.md | 49 + apidoc/adminapi.md | 355 ++++++ apidoc/evidenceapi.md | 382 ++++++ apidoc/importapi.md | 6 +- apidoc/searchapi.md | 49 +- app/controller/AdminController.php | 64 + app/controller/Api/AdminAuthController.php | 129 ++ app/controller/Api/AdminConsoleController.php | 351 ++++++ app/controller/Api/EvidenceController.php | 255 ++++ app/service/AdminAuthService.php | 63 + app/service/AdminConsole/AdminDocService.php | 76 ++ .../AdminConsole/ArchiveAdminService.php | 205 ++++ .../AdminConsole/MaintenanceScriptService.php | 148 +++ app/service/AdminConsole/MarkdownRenderer.php | 185 +++ .../AdminConsole/OpenSearchAdminService.php | 153 +++ app/service/AdminUserRepository.php | 108 ++ app/service/ArchiveRepository.php | 160 +++ app/service/ArticleImportService.php | 65 +- .../Search/ChunkSearchIndexRepository.php | 18 + app/service/Search/OpenSearchChunkIndex.php | 27 + .../Search/OpenSearchSearchService.php | 3 + app/view/admin/dashboard.html | 835 +++++++++++++ app/view/admin/landing.html | 83 ++ app/view/admin/login.html | 87 ++ config/admin.php | 5 + config/opensearch.php | 4 +- config/route.php | 27 +- public/admin.css | 1054 +++++++++++++++++ readme.md | 27 +- scriptdoc/README.md | 35 + scriptdoc/backfill_archive_content.md | 76 ++ scriptdoc/reindex_opensearch.md | 93 ++ scriptdoc/setup_admin_users.md | 56 + scriptdoc/setup_database.md | 51 + scriptdoc/setup_opensearch.md | 59 + scripts/backfill_archive_content.php | 117 ++ scripts/reindex_opensearch.php | 68 ++ scripts/setup_admin_users.php | 92 ++ 40 files changed, 5590 insertions(+), 36 deletions(-) create mode 100644 .version create mode 100644 apidoc/README.md create mode 100644 apidoc/adminapi.md create mode 100644 apidoc/evidenceapi.md create mode 100644 app/controller/AdminController.php create mode 100644 app/controller/Api/AdminAuthController.php create mode 100644 app/controller/Api/AdminConsoleController.php create mode 100644 app/controller/Api/EvidenceController.php create mode 100644 app/service/AdminAuthService.php create mode 100644 app/service/AdminConsole/AdminDocService.php create mode 100644 app/service/AdminConsole/ArchiveAdminService.php create mode 100644 app/service/AdminConsole/MaintenanceScriptService.php create mode 100644 app/service/AdminConsole/MarkdownRenderer.php create mode 100644 app/service/AdminConsole/OpenSearchAdminService.php create mode 100644 app/service/AdminUserRepository.php create mode 100644 app/view/admin/dashboard.html create mode 100644 app/view/admin/landing.html create mode 100644 app/view/admin/login.html create mode 100644 config/admin.php create mode 100644 public/admin.css create mode 100644 scriptdoc/README.md create mode 100644 scriptdoc/backfill_archive_content.md create mode 100644 scriptdoc/reindex_opensearch.md create mode 100644 scriptdoc/setup_admin_users.md create mode 100644 scriptdoc/setup_database.md create mode 100644 scriptdoc/setup_opensearch.md create mode 100644 scripts/backfill_archive_content.php create mode 100644 scripts/reindex_opensearch.php create mode 100644 scripts/setup_admin_users.php diff --git a/.env b/.env index 1568105..ff8a035 100644 --- a/.env +++ b/.env @@ -7,6 +7,7 @@ LLM_METADATA_ENABLED="true" LLM_METADATA_MODEL="glm-4.7-flash" LLM_METADATA_MAX_TOKENS=2480 LLM_METADATA_TEMPERATURE=0.1 -OPENSEARCH_HOST="http://localhost:9200" +OPENSEARCH_HOST="https://localhost:9200" OPENSEARCH_USERNAME="admin" -OPENSEARCH_PASSWORD="proofdb" \ No newline at end of file +OPENSEARCH_PASSWORD="proofdb" +ARCHIVE_CASK_URL="https://archive-cask.example.com" diff --git a/.version b/.version new file mode 100644 index 0000000..6c6aa7c --- /dev/null +++ b/.version @@ -0,0 +1 @@ +0.1.0 \ No newline at end of file diff --git a/apidoc/README.md b/apidoc/README.md new file mode 100644 index 0000000..b90938d --- /dev/null +++ b/apidoc/README.md @@ -0,0 +1,49 @@ +# API 文档总览 + +当前 `apidoc/` 中的文档按接口域拆分: + +- [importapi.md](/www/proofdb/apidoc/importapi.md): 档案导入接口 +- [adminapi.md](/www/proofdb/apidoc/adminapi.md): 管理员认证与后台维护接口 +- [searchapi.md](/www/proofdb/apidoc/searchapi.md): 全文、向量、混合搜索接口 +- [evidenceapi.md](/www/proofdb/apidoc/evidenceapi.md): chunk 详情与 evidence 接口 + +## 当前已实现接口 + +```http +POST /api/articles/import +POST /api/admin/login +POST /api/admin/logout +GET /api/admin/me +GET /api/admin/archives +GET /api/admin/archives/{archive_uid} +PATCH /api/admin/archives/{archive_uid} +DELETE /api/admin/archives/{archive_uid} +GET /api/admin/opensearch/status +GET /api/admin/opensearch/documents +GET /api/admin/users +POST /api/admin/users +PATCH /api/admin/users/{id} +GET /api/admin/docs +GET /api/admin/docs/{name} +GET /api/admin/scripts +GET /api/admin/scripts/{name} +POST /api/admin/scripts/run +POST /api/search/fulltext +POST /api/search/vector +POST /api/search/hybrid +GET /api/chunks/{chunk_uid} +GET /api/evidence/{chunk_uid} +``` + +## 当前接口分层 + +- 导入层:把 Markdown 档案解析为 archive / chunk,并写入 PostgreSQL。 +- 管理层:管理员登录、会话识别、archives 表管理、OpenSearch 状态、用户管理、文档查看与维护脚本执行。 +- 检索层:从 OpenSearch 做 BM25、向量和 hybrid 检索。 +- 证据层:把 `chunk_uid` 落到 citation、页码和证据正文。 + +## 说明 + +- 搜索接口中的 `hits` 始终表示“当前请求下返回的候选结果数组”,不是数据库全量导出。 +- `fulltext`、`vector`、`hybrid` 都支持 `limit`。 +- `hybrid` 的 `total` 表示融合后的候选总数;更细的来源统计在 `sources` 字段中。 diff --git a/apidoc/adminapi.md b/apidoc/adminapi.md new file mode 100644 index 0000000..c963f63 --- /dev/null +++ b/apidoc/adminapi.md @@ -0,0 +1,355 @@ +# 管理员后台 API + +## 接口说明 + +这组接口服务于 Proof DB 的管理员维护面板,包括: + +- 管理员登录与会话读取 +- `archives` 表管理 +- OpenSearch 状态查看 +- 管理员用户管理 +- APIDOC 文档查看 +- 维护脚本执行 + +管理员网页入口仍然是: + +- `GET /` +- `GET /admin/login` +- `GET /admin` + +## 管理员认证 + +### 管理员登录 + +```http +POST /api/admin/login +``` + +`Content-Type: application/json` + +| 字段 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `username` | string | 是 | 管理员用户名 | +| `password` | string | 是 | 管理员密码 | + +### 管理员退出登录 + +```http +POST /api/admin/logout +``` + +### 当前管理员会话 + +```http +GET /api/admin/me +``` + +未登录时返回: + +```json +{ + "code": 401, + "message": "Admin session not found." +} +``` + +## archives 表管理 + +### 获取档案列表 + +```http +GET /api/admin/archives +``` + +### 查询参数 + +| 字段 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `query` | string | 否 | 按 `archive_uid`、`title`、`summary`、`author`、`source`、`series` 模糊搜索 | +| `page` | integer | 否 | 页码,默认 `1` | +| `page_size` | integer | 否 | 每页条数,默认 `20`,最大 `100` | + +### 请求示例 + +```bash +curl '/api/admin/archives?query=iraq&page=1&page_size=20' +``` + +### 成功响应 + +```json +{ + "code": 0, + "message": "Archive list loaded.", + "data": { + "items": [ + { + "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", + "title": "1.test", + "summary": "....", + "year": 1991, + "author": "....", + "source": "....", + "series": null, + "tags": ["Iraq", "Kuwait"], + "chunk_count": 14, + "created_time": "2026-05-07 12:00:00+00", + "updated_time": "2026-05-07 12:10:00+00" + } + ], + "total": 1, + "page": 1, + "page_size": 20 + } +} +``` + +### 获取单条档案详情 + +```http +GET /api/admin/archives/{archive_uid} +``` + +### 更新单条档案 + +```http +PATCH /api/admin/archives/{archive_uid} +``` + +`Content-Type: application/json` + +可更新字段: + +- `title` +- `summary` +- `year` +- `author` +- `source` +- `series` +- `tags` +- `metadata` +- `content` +- `raw` + +其中: + +- `tags` 可以传字符串,也可以传数组;字符串会按逗号或换行拆分 +- `metadata` 可以传 JSON 对象,也可以传 JSON 字符串 +- `year` 为空时会写回 `null` + +### 更新请求示例 + +```bash +curl -X PATCH /api/admin/archives/01KQHVREB6XPYF604RVZAP9NNY \ + -H 'Content-Type: application/json' \ + --data '{ + "title": "Updated Title", + "summary": "Updated summary", + "year": 1991, + "tags": ["Iraq", "Kuwait"], + "metadata": { + "reviewed_by": "admin" + } + }' +``` + +### 删除单条档案 + +```http +DELETE /api/admin/archives/{archive_uid} +``` + +删除后会因外键约束级联删除对应 `chunks` 记录。 + +## OpenSearch 状态查看 + +### 获取 OpenSearch 管理状态 + +```http +GET /api/admin/opensearch/status +``` + +### 成功响应要点 + +响应中会同时返回: + +- OpenSearch 连接配置摘要 +- PostgreSQL 侧 `archives/chunks` 数量 +- `embedded_chunks` +- `indexed_chunks` +- 当前索引是否存在 +- `docs_count` +- cluster 健康状态 +- mapping 字段列表 + +如果 OpenSearch 当前不可达,仍会返回数据库部分统计,但 `opensearch.error` 会带出错误信息。 + +### 获取 OpenSearch 索引文档粗览 + +```http +GET /api/admin/opensearch/documents +``` + +### 查询参数 + +| 字段 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `query` | string | 否 | 按 `title`、`summary`、`source`、`author`、`text` 等字段做粗略搜索 | +| `size` | integer | 否 | 返回条数,默认 `20`,最大 `50` | + +说明: + +- 这是索引粗览接口,不返回向量字段本身。 +- 返回中会包含 `text_preview`,用于后台快速检查索引内容是否正确进入 OpenSearch。 + +## 管理员用户管理 + +### 获取管理员用户列表 + +```http +GET /api/admin/users +``` + +### 创建管理员用户 + +```http +POST /api/admin/users +``` + +`Content-Type: application/json` + +| 字段 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `username` | string | 是 | 新管理员用户名 | +| `password` | string | 是 | 新管理员密码 | +| `display_name` | string | 否 | 展示名称 | + +### 更新管理员用户 + +```http +PATCH /api/admin/users/{id} +``` + +`Content-Type: application/json` + +可更新字段: + +- `display_name` +- `password` +- `is_active` + +说明: + +- `password` 为空时表示不修改 +- `is_active=false` 后,该账号将不能再登录 + +## APIDOC 查看 + +### 获取文档列表 + +```http +GET /api/admin/docs +``` + +返回 `/apidoc` 目录下当前可查看的 `.md` 文档列表。 + +### 获取单份文档内容 + +```http +GET /api/admin/docs/{name} +``` + +例如: + +```bash +curl /api/admin/docs/searchapi.md +``` + +响应中会带: + +- `name` +- `title` +- `content` +- `html` + +其中: + +- `content` 为原始 Markdown 文本 +- `html` 为后台面板可直接渲染的 HTML + +## 维护脚本伪终端 + +### 获取白名单脚本列表 + +```http +GET /api/admin/scripts +``` + +当前返回的是允许在管理员面板里执行的 `scripts/*.php` 白名单,而不是任意文件系统扫描。 + +如果对应脚本在 `/scriptdoc` 中存在同名文档,列表接口也会带出: + +- `doc_title` +- `doc_html` +- `doc_content` + +### 获取单个维护脚本详情 + +```http +GET /api/admin/scripts/{name} +``` + +这个接口会返回单个脚本的说明、参数提示,以及可用的脚本文档内容。 + +### 执行维护脚本 + +```http +POST /api/admin/scripts/run +``` + +`Content-Type: application/json` + +| 字段 | 类型 | 必填 | 说明 | +| --- | --- | --- | --- | +| `script_name` | string | 是 | 白名单脚本名,如 `reindex_opensearch` | +| `args` | string[] | 否 | 参数数组,仅允许 `--key=value` 风格 | + +### 请求示例 + +```bash +curl -X POST /api/admin/scripts/run \ + -H 'Content-Type: application/json' \ + --data '{ + "script_name": "reindex_opensearch", + "args": ["--archive_uid=01KQHVREB6XPYF604RVZAP9NNY"] + }' +``` + +### 成功响应 + +```json +{ + "code": 0, + "message": "Maintenance script finished.", + "data": { + "script_name": "reindex_opensearch", + "command": [ + "php", + "scripts/reindex_opensearch.php", + "--archive_uid=01KQHVREB6XPYF604RVZAP9NNY" + ], + "exit_code": 0, + "stdout": "....", + "stderr": "", + "ok": true + } +} +``` + +## 权限与错误语义 + +- 除 `POST /api/admin/login` 外,本文件中的所有接口都要求已有管理员会话。 +- 未登录时统一返回 `401`。 +- 参数不合法时通常返回 `422`。 +- JSON 格式错误时返回 `400`。 +- 后端异常时返回 `500`。 diff --git a/apidoc/evidenceapi.md b/apidoc/evidenceapi.md new file mode 100644 index 0000000..23c8a99 --- /dev/null +++ b/apidoc/evidenceapi.md @@ -0,0 +1,382 @@ +# Chunk 与 Evidence API + +## 接口说明 + +这组接口用于把搜索结果落到可读的证据对象。 + +- `GET /api/archives/{archive_uid}` 返回 archive 级详情。 +- `GET /api/archives/{archive_uid}/chunks` 返回该 archive 下的 chunk 列表。 +- `GET /api/archives/{archive_uid}/evidence` 返回该 archive 下适合引用/AI 消费的证据列表。 +- `GET /api/chunks/{chunk_uid}` 偏底层,返回 chunk 详情和所属 archive 信息。 +- `GET /api/evidence/{chunk_uid}` 偏引用与展示,返回 citation、页码标签和证据正文。 + +其中 archive 接口以 `archive_uid` 为主键,另外两者以 `chunk_uid` 为主键。 + +## Archive 详情 + +```http +GET /api/archives/{archive_uid} +``` + +### 请求示例 + +```bash +curl /api/archives/01KQHVREB6XPYF604RVZAP9NNY +``` + +### 成功响应 + +状态码: + +```http +200 OK +``` + +响应示例: + +```json +{ + "code": 0, + "message": "Archive loaded.", + "data": { + "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", + "title": "1.test", + "summary": "This directive, signed by Brent Scowcroft ...", + "year": 1992, + "author": "Brent Scowcroft", + "source": "test/1.test.md", + "series": null, + "tags": ["National Security", "Policy"], + "metadata": { + "ai_enrichment": { + "provider": "bigmodel" + } + }, + "content": "full normalized archive content ...", + "raw": "# 1.test ...", + "chunks": [ + "01KQHVREB6XPYF604RVZAP9NNY_1_39003", + "01KQHVREB6XPYF604RVZAP9NNY_2_12345" + ], + "chunk_count": 14 + } +} +``` + +说明: + +- `content` 是归一化后的 archive 正文。 +- `raw` 是导入时保存的原始 Markdown。 +- `chunks` 是当前 archive 关联的 `chunk_uid` 列表。 +- `chunk_count` 方便调用方快速判断档案规模,而不必自己数数组长度。 + +### 错误响应 + +#### archive 不存在 + +状态码: + +```http +404 Not Found +``` + +```json +{ + "code": 404, + "message": "Archive not found.", + "errors": { + "archive_uid": "missing_archive_uid" + } +} +``` + +## Archive 下的 Chunk 列表 + +```http +GET /api/archives/{archive_uid}/chunks +``` + +### 请求示例 + +```bash +curl /api/archives/01KQHVREB6XPYF604RVZAP9NNY/chunks +``` + +### 成功响应 + +状态码: + +```http +200 OK +``` + +响应示例: + +```json +{ + "code": 0, + "message": "Archive chunks loaded.", + "data": { + "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", + "title": "1.test", + "summary": "This directive, signed by Brent Scowcroft ...", + "source": "test/1.test.md", + "author": "Brent Scowcroft", + "year": 1992, + "series": null, + "tags": ["National Security", "Policy"], + "chunk_count": 14, + "chunks": [ + { + "chunk_uid": "01KQHVREB6XPYF604RVZAP9NNY_1_39003", + "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", + "chunk_index": 1, + "page_start": 1, + "page_end": 1, + "pages": [1], + "text": "chunk text...", + "length": 300, + "embedding_status": 3, + "embedding_ref": { + "provider": "bigmodel", + "model": "embedding-3", + "dimensions": 2048 + }, + "embedding_model": "embedding-3", + "embedding_error": null, + "search_index_status": 3, + "search_index_error": null, + "archive": { + "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", + "title": "1.test", + "summary": "This directive, signed by Brent Scowcroft ...", + "year": 1992, + "author": "Brent Scowcroft", + "source": "test/1.test.md", + "series": null, + "tags": ["National Security", "Policy"], + "metadata": {} + } + } + ] + } +} +``` + +说明: + +- 这个接口偏底层,适合按 archive 批量读取完整 chunk 数据。 +- `chunks` 按 `chunk_index` 升序返回。 + +## Archive 级 Evidence 列表 + +```http +GET /api/archives/{archive_uid}/evidence +``` + +### 请求示例 + +```bash +curl /api/archives/01KQHVREB6XPYF604RVZAP9NNY/evidence +``` + +### 成功响应 + +状态码: + +```http +200 OK +``` + +响应示例: + +```json +{ + "code": 0, + "message": "Archive evidence loaded.", + "data": { + "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", + "title": "1.test", + "summary": "This directive, signed by Brent Scowcroft ...", + "source": "test/1.test.md", + "author": "Brent Scowcroft", + "year": 1992, + "series": null, + "tags": ["National Security", "Policy"], + "chunk_count": 14, + "evidence": [ + { + "chunk_uid": "01KQHVREB6XPYF604RVZAP9NNY_1_39003", + "chunk_index": 1, + "page_start": 1, + "page_end": 1, + "pages": [1], + "page_label": "p. 1", + "citation": "1.test | Brent Scowcroft | 1992 | p. 1 | test/1.test.md", + "quote": "chunk text...", + "length": 300, + "embedding_model": "embedding-3", + "embedding_status": 3, + "search_index_status": 3 + } + ] + } +} +``` + +说明: + +- 这个接口偏上层,适合 AI、RAG、引用构造和前端证据列表展示。 +- `evidence` 里的每一项都保留了 citation 所需的页码和引用文本。 + +## Chunk 详情 + +```http +GET /api/chunks/{chunk_uid} +``` + +### 请求示例 + +```bash +curl /api/chunks/01KQHVREB6XPYF604RVZAP9NNY_14_97554 +``` + +### 成功响应 + +状态码: + +```http +200 OK +``` + +响应示例: + +```json +{ + "code": 0, + "message": "Chunk loaded.", + "data": { + "chunk_uid": "01KQHVREB6XPYF604RVZAP9NNY_14_97554", + "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", + "chunk_index": 14, + "page_start": 8, + "page_end": 8, + "pages": [8], + "text": "NSD 45 20 AUG 90 U.S. Policy in Response to the Iraqi Invasion of Kuwait ...", + "length": 148, + "embedding_status": 3, + "embedding_ref": { + "provider": "bigmodel", + "model": "embedding-3", + "dimensions": 2048 + }, + "embedding_model": "embedding-3", + "embedding_error": null, + "search_index_status": 3, + "search_index_error": null, + "archive": { + "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", + "title": "1.test", + "summary": null, + "year": 1992, + "author": "Brent Scowcroft", + "source": "test/1.test.md", + "series": null, + "tags": [], + "metadata": {} + } + } +} +``` + +### 错误响应 + +#### chunk 不存在 + +状态码: + +```http +404 Not Found +``` + +```json +{ + "code": 404, + "message": "Chunk not found.", + "errors": { + "chunk_uid": "missing_chunk_uid" + } +} +``` + +## Evidence 详情 + +```http +GET /api/evidence/{chunk_uid} +``` + +### 请求示例 + +```bash +curl /api/evidence/01KQHVREB6XPYF604RVZAP9NNY_14_97554 +``` + +### 成功响应 + +状态码: + +```http +200 OK +``` + +响应示例: + +```json +{ + "code": 0, + "message": "Evidence loaded.", + "data": { + "chunk_uid": "01KQHVREB6XPYF604RVZAP9NNY_14_97554", + "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", + "title": "1.test", + "source": "test/1.test.md", + "author": "Brent Scowcroft", + "year": 1992, + "series": null, + "tags": [], + "page_start": 8, + "page_end": 8, + "pages": [8], + "page_label": "p. 8", + "citation": "1.test | Brent Scowcroft | 1992 | p. 8 | test/1.test.md", + "quote": "NSD 45 20 AUG 90 U.S. Policy in Response to the Iraqi Invasion of Kuwait ...", + "chunk": { + "chunk_index": 14, + "length": 148, + "embedding_model": "embedding-3", + "embedding_status": 3, + "search_index_status": 3 + } + } +} +``` + +### 错误响应 + +#### evidence 不存在 + +状态码: + +```http +404 Not Found +``` + +```json +{ + "code": 404, + "message": "Evidence not found.", + "errors": { + "chunk_uid": "missing_chunk_uid" + } +} +``` diff --git a/apidoc/importapi.md b/apidoc/importapi.md index 7d77345..cd1ce23 100644 --- a/apidoc/importapi.md +++ b/apidoc/importapi.md @@ -90,7 +90,7 @@ POST /api/articles/import ## 请求示例 ```bash -curl -X POST http://127.0.0.1:8787/api/articles/import \ +curl -X POST /api/articles/import \ -F 'title=NSD 76 Disposition of NSC Policy Documents' \ -F 'source=archive://nsc/nsd-76' \ -F 'chunk_size=800' \ @@ -101,7 +101,7 @@ curl -X POST http://127.0.0.1:8787/api/articles/import \ 也可以直接发送 Markdown 原文: ```bash -curl -X POST 'http://127.0.0.1:8787/api/articles/import?title=NSD%2076&source=archive://nsc/nsd-76' \ +curl -X POST '/api/articles/import?title=NSD%2076&source=archive://nsc/nsd-76' \ -H 'Content-Type: text/markdown' \ --data-binary '@test/1.test.md' ``` @@ -109,7 +109,7 @@ curl -X POST 'http://127.0.0.1:8787/api/articles/import?title=NSD%2076&source=ar JSON 调用示例: ```bash -curl -X POST http://127.0.0.1:8787/api/articles/import \ +curl -X POST /api/articles/import \ -H 'Content-Type: application/json' \ --data '{ "title": "NSD 76 Disposition of NSC Policy Documents", diff --git a/apidoc/searchapi.md b/apidoc/searchapi.md index 52ad811..0b58411 100644 --- a/apidoc/searchapi.md +++ b/apidoc/searchapi.md @@ -7,6 +7,7 @@ Proof DB 的搜索接口基于 OpenSearch `proofdb_chunks` 索引。当前版本 OpenSearch 中每个 chunk 文档同时包含: - `text` 等全文字段,用于 BM25 检索。 +- `summary` 档案摘要字段,会参与全文检索,也会随搜索结果一起返回。 - `embedding` 2048 维向量字段,用于后续 vector / hybrid 检索。 ## 全文搜索 @@ -35,7 +36,7 @@ POST /api/search/fulltext ### 请求示例 ```bash -curl -X POST http://127.0.0.1:8787/api/search/fulltext \ +curl -X POST /api/search/fulltext \ -H 'Content-Type: application/json' \ --data '{ "query": "policy documents", @@ -46,7 +47,7 @@ curl -X POST http://127.0.0.1:8787/api/search/fulltext \ 带过滤条件: ```bash -curl -X POST http://127.0.0.1:8787/api/search/fulltext \ +curl -X POST /api/search/fulltext \ -H 'Content-Type: application/json' \ --data '{ "query": "Iraq Kuwait", @@ -87,6 +88,7 @@ curl -X POST http://127.0.0.1:8787/api/search/fulltext \ "page_start": 1, "page_end": 1, "title": "NSD 76 Disposition of NSC Policy Documents", + "summary": "Summary text...", "source": "archive://nsc/nsd-76", "author": "Brent Scowcroft", "year": 1992, @@ -101,6 +103,12 @@ curl -X POST http://127.0.0.1:8787/api/search/fulltext \ } ``` +说明: + +- `hits` 是当前返回的结果数组。 +- `total` 是当前 full-text 查询下的命中总数。 +- 全文搜索当前会综合匹配 `text`、`title`、`summary`、`source`、`author`、`series`、`tags`。 + ### 错误响应 #### JSON 格式错误 @@ -157,8 +165,6 @@ curl -X POST http://127.0.0.1:8787/api/search/fulltext \ } ``` -## 后续接口 - ## 向量搜索 ```http @@ -179,7 +185,7 @@ POST /api/search/vector ### 请求示例 ```bash -curl -X POST http://127.0.0.1:8787/api/search/vector \ +curl -X POST /api/search/vector \ -H 'Content-Type: application/json' \ --data '{ "query": "Iraq invasion and Desert Storm", @@ -191,7 +197,7 @@ curl -X POST http://127.0.0.1:8787/api/search/vector \ 中文 query 也可以提交给向量搜索: ```bash -curl -X POST http://127.0.0.1:8787/api/search/vector \ +curl -X POST /api/search/vector \ -H 'Content-Type: application/json' \ --data '{ "query": "伊拉克入侵科威特与沙漠风暴", @@ -231,6 +237,7 @@ curl -X POST http://127.0.0.1:8787/api/search/vector \ "page_start": 8, "page_end": 8, "title": "NSD 76 Disposition of NSC Policy Documents", + "summary": "Summary text...", "source": "archive://nsc/nsd-76", "author": "Brent Scowcroft", "year": 1992, @@ -246,6 +253,12 @@ curl -X POST http://127.0.0.1:8787/api/search/vector \ ``` +说明: + +- `hits` 是当前返回的结果数组。 +- `total` 是当前 vector 查询返回的候选总数。 +- `embedding_dimensions` 是本次 query embedding 的维度,而不是索引总维度统计字段。 + ### 错误响应 错误响应格式与全文搜索一致。常见错误包括: @@ -254,8 +267,6 @@ curl -X POST http://127.0.0.1:8787/api/search/vector \ - 缺少 `query`:`422 Unprocessable Entity` - embedding API 或 OpenSearch 查询失败:`500 Internal Server Error` -## 后续接口 - ## 混合搜索 ```http @@ -290,7 +301,7 @@ POST /api/search/hybrid ### 请求示例 ```bash -curl -X POST http://127.0.0.1:8787/api/search/hybrid \ +curl -X POST /api/search/hybrid \ -H 'Content-Type: application/json' \ --data '{ "query": "Iraq invasion and Desert Storm", @@ -302,7 +313,7 @@ curl -X POST http://127.0.0.1:8787/api/search/hybrid \ 中文 query: ```bash -curl -X POST http://127.0.0.1:8787/api/search/hybrid \ +curl -X POST /api/search/hybrid \ -H 'Content-Type: application/json' \ --data '{ "query": "伊拉克入侵科威特与沙漠风暴", @@ -370,6 +381,7 @@ curl -X POST http://127.0.0.1:8787/api/search/hybrid \ "archive_uid": "01KQHVREB6XPYF604RVZAP9NNY", "page_start": 8, "page_end": 8, + "summary": "Summary text...", "text": "chunk text..." } ] @@ -377,6 +389,14 @@ curl -X POST http://127.0.0.1:8787/api/search/hybrid \ } ``` +说明: + +- `hits` 是融合排序后的结果数组。 +- `total` 是融合后的候选总数。 +- `sources.fulltext_total` 与 `sources.vector_total` 分别表示两路召回的原始统计。 +- `rank_sources` 用于说明某条结果在 fulltext / vector 两路中的排名与 RRF 贡献。 +- `summary` 来自 archive 级摘要元数据,不是 chunk 单独生成的摘要。 + ### 错误响应 错误响应格式与全文搜索一致。常见错误包括: @@ -385,11 +405,8 @@ curl -X POST http://127.0.0.1:8787/api/search/hybrid \ - 缺少 `query`:`422 Unprocessable Entity` - embedding API、全文搜索或向量搜索失败:`500 Internal Server Error` -## 后续接口 +## 相关接口 -以下能力尚未实现: +与搜索结果配套的证据查看接口见: -```http -GET /api/chunks/{chunk_uid} -GET /api/evidence/{chunk_uid} -``` +- [evidenceapi.md](/www/proofdb/apidoc/evidenceapi.md) diff --git a/app/controller/AdminController.php b/app/controller/AdminController.php new file mode 100644 index 0000000..20fd667 --- /dev/null +++ b/app/controller/AdminController.php @@ -0,0 +1,64 @@ +current($request) !== null) { + return $this->redirect('/admin'); + } + + return view('admin/landing', [ + 'archiveCaskUrl' => config('admin.archive_cask_url', ''), + 'version' => $this->version(), + ]); + } + + public function login(Request $request): Response + { + if ((new AdminAuthService())->current($request) !== null) { + return $this->redirect('/admin'); + } + + return view('admin/login', [ + 'archiveCaskUrl' => config('admin.archive_cask_url', ''), + 'version' => $this->version(), + ]); + } + + public function dashboard(Request $request): Response + { + $admin = (new AdminAuthService())->current($request); + if ($admin === null) { + return $this->redirect('/admin/login'); + } + + return view('admin/dashboard', [ + 'archiveCaskUrl' => config('admin.archive_cask_url', ''), + 'admin' => $admin, + 'version' => $this->version(), + ]); + } + + private function redirect(string $location): Response + { + return response('', 302, ['Location' => $location]); + } + + private function version(): string + { + $path = base_path('.version'); + if (!is_file($path)) { + return 'unknown'; + } + + $value = trim((string) file_get_contents($path)); + return $value !== '' ? $value : 'unknown'; + } +} diff --git a/app/controller/Api/AdminAuthController.php b/app/controller/Api/AdminAuthController.php new file mode 100644 index 0000000..7403aca --- /dev/null +++ b/app/controller/Api/AdminAuthController.php @@ -0,0 +1,129 @@ +payload($request); + $username = trim((string) ($payload['username'] ?? '')); + $password = (string) ($payload['password'] ?? ''); + + if ($username === '' || $password === '') { + throw new InvalidArgumentException('username and password are required.'); + } + + $auth = new AdminAuthService(); + $user = $auth->authenticate($username, $password); + if ($user === null) { + return $this->jsonResponse([ + 'code' => 401, + 'message' => 'Admin login failed.', + 'errors' => ['auth' => 'invalid username or password.'], + ], 401); + } + + $auth->login($request, $user); + } catch (JsonException $exception) { + return $this->jsonResponse([ + 'code' => 400, + 'message' => 'Invalid JSON body.', + 'errors' => ['body' => $exception->getMessage()], + ], 400); + } catch (InvalidArgumentException $exception) { + return $this->jsonResponse([ + 'code' => 422, + 'message' => 'Admin login validation failed.', + 'errors' => ['auth' => $exception->getMessage()], + ], 422); + } catch (Throwable $exception) { + return $this->jsonResponse([ + 'code' => 500, + 'message' => 'Admin login failed.', + 'errors' => ['auth' => $exception->getMessage()], + ], 500); + } + + return $this->jsonResponse([ + 'code' => 0, + 'message' => 'Admin login completed.', + 'data' => ['admin' => $user], + ], 200); + } + + public function logout(Request $request): Response + { + try { + (new AdminAuthService())->logout($request); + } catch (Throwable $exception) { + return $this->jsonResponse([ + 'code' => 500, + 'message' => 'Admin logout failed.', + 'errors' => ['auth' => $exception->getMessage()], + ], 500); + } + + return $this->jsonResponse([ + 'code' => 0, + 'message' => 'Admin logout completed.', + ], 200); + } + + public function me(Request $request): Response + { + try { + $admin = (new AdminAuthService())->current($request); + } catch (Throwable $exception) { + return $this->jsonResponse([ + 'code' => 500, + 'message' => 'Admin session lookup failed.', + 'errors' => ['auth' => $exception->getMessage()], + ], 500); + } + + if ($admin === null) { + return $this->jsonResponse([ + 'code' => 401, + 'message' => 'Admin session not found.', + ], 401); + } + + return $this->jsonResponse([ + 'code' => 0, + 'message' => 'Admin session loaded.', + 'data' => ['admin' => $admin], + ], 200); + } + + /** + * @throws JsonException + */ + private function payload(Request $request): array + { + $rawBody = trim($request->rawBody()); + if ($rawBody === '') { + return $request->post(); + } + + $payload = json_decode($rawBody, true, 512, JSON_THROW_ON_ERROR); + return is_array($payload) ? $payload : []; + } + + private function jsonResponse(array $data, int $status): Response + { + return response( + json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR), + $status, + ['Content-Type' => 'application/json'] + ); + } +} diff --git a/app/controller/Api/AdminConsoleController.php b/app/controller/Api/AdminConsoleController.php new file mode 100644 index 0000000..d2cbdfd --- /dev/null +++ b/app/controller/Api/AdminConsoleController.php @@ -0,0 +1,351 @@ +guard($request)) { + return $guard; + } + + try { + $data = (new ArchiveAdminService())->list( + trim((string) $request->get('query', '')), + (int) $request->get('page', 1), + (int) $request->get('page_size', 20), + ); + } catch (Throwable $exception) { + return $this->error(500, 'Archive list lookup failed.', ['archives' => $exception->getMessage()]); + } + + return $this->ok('Archive list loaded.', $data); + } + + public function archive(Request $request, string $archiveUid): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $archive = (new ArchiveAdminService())->detail($archiveUid); + } catch (Throwable $exception) { + return $this->error(500, 'Archive lookup failed.', ['archive' => $exception->getMessage()]); + } + + if ($archive === null) { + return $this->error(404, 'Archive not found.', ['archive_uid' => $archiveUid], 404); + } + + return $this->ok('Archive loaded.', $archive); + } + + public function updateArchive(Request $request, string $archiveUid): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $archive = (new ArchiveAdminService())->update($archiveUid, $this->payload($request)); + } catch (JsonException $exception) { + return $this->error(400, 'Invalid JSON body.', ['body' => $exception->getMessage()], 400); + } catch (InvalidArgumentException $exception) { + return $this->error(422, 'Archive update validation failed.', ['archive' => $exception->getMessage()], 422); + } catch (Throwable $exception) { + return $this->error(500, 'Archive update failed.', ['archive' => $exception->getMessage()]); + } + + if ($archive === null) { + return $this->error(404, 'Archive not found.', ['archive_uid' => $archiveUid], 404); + } + + return $this->ok('Archive updated.', $archive); + } + + public function deleteArchive(Request $request, string $archiveUid): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $deleted = (new ArchiveAdminService())->delete($archiveUid); + } catch (Throwable $exception) { + return $this->error(500, 'Archive delete failed.', ['archive' => $exception->getMessage()]); + } + + if (!$deleted) { + return $this->error(404, 'Archive not found.', ['archive_uid' => $archiveUid], 404); + } + + return $this->ok('Archive deleted.', ['archive_uid' => $archiveUid]); + } + + public function openSearchStatus(Request $request): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $status = (new OpenSearchAdminService())->status(); + } catch (Throwable $exception) { + return $this->error(500, 'OpenSearch status lookup failed.', ['opensearch' => $exception->getMessage()]); + } + + return $this->ok('OpenSearch status loaded.', $status); + } + + public function openSearchDocuments(Request $request): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $documents = (new OpenSearchAdminService())->documents( + trim((string) $request->get('query', '')), + (int) $request->get('size', 20), + ); + } catch (Throwable $exception) { + return $this->error(500, 'OpenSearch document lookup failed.', ['opensearch' => $exception->getMessage()]); + } + + return $this->ok('OpenSearch documents loaded.', $documents); + } + + public function users(Request $request): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $users = (new AdminUserRepository())->listAll(); + } catch (Throwable $exception) { + return $this->error(500, 'Admin users lookup failed.', ['users' => $exception->getMessage()]); + } + + return $this->ok('Admin users loaded.', ['items' => array_map(fn (array $user): array => $this->sanitizeUser($user), $users)]); + } + + public function createUser(Request $request): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $payload = $this->payload($request); + $username = trim((string) ($payload['username'] ?? '')); + $password = trim((string) ($payload['password'] ?? '')); + $displayName = trim((string) ($payload['display_name'] ?? '')); + + if ($username === '' || $password === '') { + throw new InvalidArgumentException('username and password are required.'); + } + + $repository = new AdminUserRepository(); + if ($repository->findAnyByUsername($username)) { + throw new InvalidArgumentException('username already exists.'); + } + + $user = $repository->create($username, $password, $displayName !== '' ? $displayName : null); + } catch (JsonException $exception) { + return $this->error(400, 'Invalid JSON body.', ['body' => $exception->getMessage()], 400); + } catch (InvalidArgumentException $exception) { + return $this->error(422, 'Admin user creation validation failed.', ['user' => $exception->getMessage()], 422); + } catch (Throwable $exception) { + return $this->error(500, 'Admin user creation failed.', ['user' => $exception->getMessage()]); + } + + return $this->ok('Admin user created.', ['user' => $this->sanitizeUser($user)]); + } + + public function updateUser(Request $request, int $id): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $payload = $this->payload($request); + $repository = new AdminUserRepository(); + if ($repository->findAnyById($id) === null) { + return $this->error(404, 'Admin user not found.', ['id' => $id], 404); + } + + $updates = []; + if (array_key_exists('display_name', $payload)) { + $updates['display_name'] = $payload['display_name']; + } + if (array_key_exists('password', $payload)) { + $updates['password'] = $payload['password']; + } + if (array_key_exists('is_active', $payload)) { + $updates['is_active'] = (bool) $payload['is_active']; + } + + $user = $repository->updateUser($id, $updates); + } catch (JsonException $exception) { + return $this->error(400, 'Invalid JSON body.', ['body' => $exception->getMessage()], 400); + } catch (Throwable $exception) { + return $this->error(500, 'Admin user update failed.', ['user' => $exception->getMessage()]); + } + + return $this->ok('Admin user updated.', ['user' => $this->sanitizeUser($user ?? [])]); + } + + public function docs(Request $request): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $docs = (new AdminDocService())->list(); + } catch (Throwable $exception) { + return $this->error(500, 'API docs lookup failed.', ['docs' => $exception->getMessage()]); + } + + return $this->ok('API docs loaded.', ['items' => $docs]); + } + + public function doc(Request $request, string $name): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $doc = (new AdminDocService())->read($name); + } catch (Throwable $exception) { + return $this->error(404, 'API doc not found.', ['doc' => $exception->getMessage()], 404); + } + + return $this->ok('API doc loaded.', $doc); + } + + public function scripts(Request $request): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + return $this->ok('Maintenance scripts loaded.', ['items' => (new MaintenanceScriptService())->list()]); + } + + public function script(Request $request, string $name): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $script = (new MaintenanceScriptService())->describe($name); + } catch (Throwable $exception) { + return $this->error(404, 'Maintenance script not found.', ['script' => $exception->getMessage()], 404); + } + + return $this->ok('Maintenance script loaded.', $script); + } + + public function runScript(Request $request): Response + { + if ($guard = $this->guard($request)) { + return $guard; + } + + try { + $payload = $this->payload($request); + $scriptName = trim((string) ($payload['script_name'] ?? '')); + $args = $payload['args'] ?? []; + + if ($scriptName === '') { + throw new InvalidArgumentException('script_name is required.'); + } + if (!is_array($args)) { + throw new InvalidArgumentException('args must be an array.'); + } + + $result = (new MaintenanceScriptService())->run($scriptName, $args); + } catch (JsonException $exception) { + return $this->error(400, 'Invalid JSON body.', ['body' => $exception->getMessage()], 400); + } catch (InvalidArgumentException $exception) { + return $this->error(422, 'Script execution validation failed.', ['script' => $exception->getMessage()], 422); + } catch (Throwable $exception) { + return $this->error(500, 'Script execution failed.', ['script' => $exception->getMessage()]); + } + + return $this->ok('Maintenance script finished.', $result); + } + + private function guard(Request $request): ?Response + { + return (new AdminAuthService())->current($request) === null + ? $this->error(401, 'Admin session not found.', [], 401) + : null; + } + + /** + * @throws JsonException + */ + private function payload(Request $request): array + { + $rawBody = trim($request->rawBody()); + if ($rawBody === '') { + return $request->post(); + } + + $payload = json_decode($rawBody, true, 512, JSON_THROW_ON_ERROR); + return is_array($payload) ? $payload : []; + } + + private function ok(string $message, array $data): Response + { + return $this->jsonResponse([ + 'code' => 0, + 'message' => $message, + 'data' => $data, + ], 200); + } + + private function error(int $code, string $message, array $errors = [], int $status = 500): Response + { + return $this->jsonResponse([ + 'code' => $code, + 'message' => $message, + 'errors' => $errors, + ], $status); + } + + private function sanitizeUser(array $user): array + { + unset($user['password_hash']); + return $user; + } + + private function jsonResponse(array $data, int $status): Response + { + return response( + json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR), + $status, + ['Content-Type' => 'application/json'] + ); + } +} diff --git a/app/controller/Api/EvidenceController.php b/app/controller/Api/EvidenceController.php new file mode 100644 index 0000000..0e3dff4 --- /dev/null +++ b/app/controller/Api/EvidenceController.php @@ -0,0 +1,255 @@ +findArchive($archiveUid); + } catch (Throwable $exception) { + return $this->jsonResponse([ + 'code' => 500, + 'message' => 'Archive lookup failed.', + 'errors' => ['archive' => $exception->getMessage()], + ], 500); + } + + if ($archive === null) { + return $this->jsonResponse([ + 'code' => 404, + 'message' => 'Archive not found.', + 'errors' => ['archive_uid' => $archiveUid], + ], 404); + } + + $archive['chunk_count'] = is_array($archive['chunks'] ?? null) ? count($archive['chunks']) : 0; + + return $this->jsonResponse([ + 'code' => 0, + 'message' => 'Archive loaded.', + 'data' => $archive, + ], 200); + } + + public function chunk(string $chunkUid): Response + { + try { + $chunk = (new ArchiveRepository())->findChunk($chunkUid); + } catch (Throwable $exception) { + return $this->jsonResponse([ + 'code' => 500, + 'message' => 'Chunk lookup failed.', + 'errors' => ['chunk' => $exception->getMessage()], + ], 500); + } + + if ($chunk === null) { + return $this->jsonResponse([ + 'code' => 404, + 'message' => 'Chunk not found.', + 'errors' => ['chunk_uid' => $chunkUid], + ], 404); + } + + return $this->jsonResponse([ + 'code' => 0, + 'message' => 'Chunk loaded.', + 'data' => $chunk, + ], 200); + } + + public function archiveChunks(string $archiveUid): Response + { + try { + $repository = new ArchiveRepository(); + $archive = $repository->findArchive($archiveUid); + $chunks = $archive === null ? [] : $repository->findArchiveChunks($archiveUid); + } catch (Throwable $exception) { + return $this->jsonResponse([ + 'code' => 500, + 'message' => 'Archive chunks lookup failed.', + 'errors' => ['archive_chunks' => $exception->getMessage()], + ], 500); + } + + if ($archive === null) { + return $this->jsonResponse([ + 'code' => 404, + 'message' => 'Archive not found.', + 'errors' => ['archive_uid' => $archiveUid], + ], 404); + } + + return $this->jsonResponse([ + 'code' => 0, + 'message' => 'Archive chunks loaded.', + 'data' => [ + 'archive_uid' => $archive['archive_uid'], + 'title' => $archive['title'], + 'summary' => $archive['summary'], + 'source' => $archive['source'], + 'author' => $archive['author'], + 'year' => $archive['year'], + 'series' => $archive['series'], + 'tags' => $archive['tags'], + 'chunk_count' => count($chunks), + 'chunks' => $chunks, + ], + ], 200); + } + + public function evidence(string $chunkUid): Response + { + try { + $chunk = (new ArchiveRepository())->findChunk($chunkUid); + } catch (Throwable $exception) { + return $this->jsonResponse([ + 'code' => 500, + 'message' => 'Evidence lookup failed.', + 'errors' => ['evidence' => $exception->getMessage()], + ], 500); + } + + if ($chunk === null) { + return $this->jsonResponse([ + 'code' => 404, + 'message' => 'Evidence not found.', + 'errors' => ['chunk_uid' => $chunkUid], + ], 404); + } + + $archive = $chunk['archive']; + $pages = $chunk['pages']; + $pageLabel = $this->pageLabel($pages); + + return $this->jsonResponse([ + 'code' => 0, + 'message' => 'Evidence loaded.', + 'data' => [ + 'chunk_uid' => $chunk['chunk_uid'], + 'archive_uid' => $chunk['archive_uid'], + 'title' => $archive['title'] ?? null, + 'source' => $archive['source'] ?? null, + 'author' => $archive['author'] ?? null, + 'year' => $archive['year'] ?? null, + 'series' => $archive['series'] ?? null, + 'tags' => $archive['tags'] ?? [], + 'page_start' => $chunk['page_start'], + 'page_end' => $chunk['page_end'], + 'pages' => $pages, + 'page_label' => $pageLabel, + 'citation' => $this->citation($archive, $pageLabel), + 'quote' => $chunk['text'], + 'chunk' => [ + 'chunk_index' => $chunk['chunk_index'], + 'length' => $chunk['length'], + 'embedding_model' => $chunk['embedding_model'], + 'embedding_status' => $chunk['embedding_status'], + 'search_index_status' => $chunk['search_index_status'], + ], + ], + ], 200); + } + + public function archiveEvidence(string $archiveUid): Response + { + try { + $repository = new ArchiveRepository(); + $archive = $repository->findArchive($archiveUid); + $chunks = $archive === null ? [] : $repository->findArchiveChunks($archiveUid); + } catch (Throwable $exception) { + return $this->jsonResponse([ + 'code' => 500, + 'message' => 'Archive evidence lookup failed.', + 'errors' => ['archive_evidence' => $exception->getMessage()], + ], 500); + } + + if ($archive === null) { + return $this->jsonResponse([ + 'code' => 404, + 'message' => 'Archive not found.', + 'errors' => ['archive_uid' => $archiveUid], + ], 404); + } + + $evidence = array_map(function (array $chunk): array { + $archive = $chunk['archive']; + $pages = $chunk['pages']; + $pageLabel = $this->pageLabel($pages); + + return [ + 'chunk_uid' => $chunk['chunk_uid'], + 'chunk_index' => $chunk['chunk_index'], + 'page_start' => $chunk['page_start'], + 'page_end' => $chunk['page_end'], + 'pages' => $pages, + 'page_label' => $pageLabel, + 'citation' => $this->citation($archive, $pageLabel), + 'quote' => $chunk['text'], + 'length' => $chunk['length'], + 'embedding_model' => $chunk['embedding_model'], + 'embedding_status' => $chunk['embedding_status'], + 'search_index_status' => $chunk['search_index_status'], + ]; + }, $chunks); + + return $this->jsonResponse([ + 'code' => 0, + 'message' => 'Archive evidence loaded.', + 'data' => [ + 'archive_uid' => $archive['archive_uid'], + 'title' => $archive['title'], + 'summary' => $archive['summary'], + 'source' => $archive['source'], + 'author' => $archive['author'], + 'year' => $archive['year'], + 'series' => $archive['series'], + 'tags' => $archive['tags'], + 'chunk_count' => count($evidence), + 'evidence' => $evidence, + ], + ], 200); + } + + private function citation(array $archive, string $pageLabel): string + { + $parts = array_values(array_filter([ + $archive['title'] ?? null, + $archive['author'] ?? null, + isset($archive['year']) ? (string) $archive['year'] : null, + $pageLabel === '' ? null : $pageLabel, + $archive['source'] ?? null, + ], static fn ($value): bool => $value !== null && trim((string) $value) !== '')); + + return implode(' | ', $parts); + } + + private function pageLabel(array $pages): string + { + if ($pages === []) { + return ''; + } + + if (count($pages) === 1) { + return 'p. ' . (string) $pages[0]; + } + + return 'pp. ' . (string) $pages[0] . '-' . (string) $pages[count($pages) - 1]; + } + + private function jsonResponse(array $data, int $status): Response + { + return response( + json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES | JSON_THROW_ON_ERROR), + $status, + ['Content-Type' => 'application/json'] + ); + } +} diff --git a/app/service/AdminAuthService.php b/app/service/AdminAuthService.php new file mode 100644 index 0000000..bceaa2a --- /dev/null +++ b/app/service/AdminAuthService.php @@ -0,0 +1,63 @@ +users()->findByUsername($username); + if ($user === null || !password_verify($password, $user['password_hash'])) { + return null; + } + + unset($user['password_hash']); + return $user; + } + + public function login(Request $request, array $user): void + { + $request->session()->set(self::SESSION_KEY, (int) $user['id']); + $this->users()->touchLastLogin((int) $user['id']); + } + + public function logout(Request $request): void + { + $request->session()->delete(self::SESSION_KEY); + } + + public function current(Request $request): ?array + { + $id = (int) $request->session()->get(self::SESSION_KEY, 0); + if ($id <= 0) { + return null; + } + + $user = $this->users()->findById($id); + if ($user === null) { + $request->session()->delete(self::SESSION_KEY); + return null; + } + + unset($user['password_hash']); + return $user; + } + + private function users(): AdminUserRepository + { + return $this->users ?? new AdminUserRepository(); + } +} diff --git a/app/service/AdminConsole/AdminDocService.php b/app/service/AdminConsole/AdminDocService.php new file mode 100644 index 0000000..c4248b1 --- /dev/null +++ b/app/service/AdminConsole/AdminDocService.php @@ -0,0 +1,76 @@ + $name, + 'title' => $this->title($content, $name), + ]; + } + + usort($items, fn (array $a, array $b): int => strcmp($a['name'], $b['name'])); + return $items; + } + + public function read(string $name): array + { + $safeName = basename($name); + $path = base_path('apidoc/' . $safeName); + if (!is_file($path) || pathinfo($path, PATHINFO_EXTENSION) !== 'md') { + throw new RuntimeException('API doc not found.'); + } + + $content = (string) file_get_contents($path); + return [ + 'name' => $safeName, + 'title' => $this->title($content, $safeName), + 'content' => $content, + 'html' => $this->renderer()->render($content), + ]; + } + + public function readScriptDoc(string $name): array + { + $safeName = basename($name); + $path = base_path('scriptdoc/' . $safeName); + if (!is_file($path) || pathinfo($path, PATHINFO_EXTENSION) !== 'md') { + throw new RuntimeException('Script doc not found.'); + } + + $content = (string) file_get_contents($path); + return [ + 'name' => $safeName, + 'title' => $this->title($content, $safeName), + 'content' => $content, + 'html' => $this->renderer()->render($content), + ]; + } + + private function title(string $content, string $fallback): string + { + if (preg_match('/^#\s+(.+)$/m', $content, $matches)) { + return trim($matches[1]); + } + + return $fallback; + } + + private function renderer(): MarkdownRenderer + { + return $this->renderer ?? new MarkdownRenderer(); + } +} diff --git a/app/service/AdminConsole/ArchiveAdminService.php b/app/service/AdminConsole/ArchiveAdminService.php new file mode 100644 index 0000000..5976920 --- /dev/null +++ b/app/service/AdminConsole/ArchiveAdminService.php @@ -0,0 +1,205 @@ +where(function ($subQuery) use ($like): void { + $subQuery + ->orWhere('archive_uid', 'like', $like) + ->orWhere('title', 'like', $like) + ->orWhere('summary', 'like', $like) + ->orWhere('author', 'like', $like) + ->orWhere('source', 'like', $like) + ->orWhere('series', 'like', $like); + }); + } + + $total = (clone $builder)->count(); + $rows = $builder + ->orderByDesc('updated_time') + ->offset(($page - 1) * $pageSize) + ->limit($pageSize) + ->get([ + 'archive_uid', + 'title', + 'summary', + 'year', + 'author', + 'source', + 'series', + 'tags', + 'created_time', + 'updated_time', + Db::raw('jsonb_array_length(chunks) as chunk_count'), + ]) + ->all(); + + return [ + 'items' => array_map(fn (object $row): array => $this->listItem($row), $rows), + 'total' => (int) $total, + 'page' => $page, + 'page_size' => $pageSize, + ]; + } + + public function detail(string $archiveUid): ?array + { + $row = Db::table('archives')->where('archive_uid', $archiveUid)->first(); + if (!$row) { + return null; + } + + return $this->detailItem($row); + } + + public function update(string $archiveUid, array $payload): ?array + { + if (!$this->detail($archiveUid)) { + return null; + } + + $updates = []; + foreach (['title', 'summary', 'author', 'source', 'series', 'content', 'raw'] as $field) { + if (array_key_exists($field, $payload)) { + $updates[$field] = $this->nullableText($payload[$field]); + } + } + + if (array_key_exists('year', $payload)) { + $year = trim((string) ($payload['year'] ?? '')); + if ($year === '') { + $updates['year'] = null; + } elseif (!preg_match('/^\d{1,4}$/', $year)) { + throw new InvalidArgumentException('year must be empty or a 1-4 digit number.'); + } else { + $updates['year'] = (int) $year; + } + } + + if (array_key_exists('tags', $payload)) { + $updates['tags'] = json_encode($this->normalizeTags($payload['tags']), JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); + } + + if (array_key_exists('metadata', $payload)) { + $updates['metadata'] = json_encode($this->normalizeMetadata($payload['metadata']), JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES); + } + + if ($updates !== []) { + Db::table('archives')->where('archive_uid', $archiveUid)->update($updates); + } + + return $this->detail($archiveUid); + } + + public function delete(string $archiveUid): bool + { + return (int) Db::table('archives')->where('archive_uid', $archiveUid)->delete() > 0; + } + + private function listItem(object $row): array + { + $chunks = $this->decodeJson($row->chunks ?? null, []); + + return [ + 'archive_uid' => (string) $row->archive_uid, + 'title' => $row->title, + 'summary' => $row->summary, + 'year' => $row->year === null ? null : (int) $row->year, + 'author' => $row->author, + 'source' => $row->source, + 'series' => $row->series, + 'tags' => $this->decodeJson($row->tags ?? null, []), + 'chunk_count' => property_exists($row, 'chunk_count') + ? ($row->chunk_count === null ? 0 : (int) $row->chunk_count) + : count(is_array($chunks) ? $chunks : []), + 'created_time' => $row->created_time, + 'updated_time' => $row->updated_time, + ]; + } + + private function detailItem(object $row): array + { + $data = $this->listItem($row); + $data['metadata'] = $this->decodeJson($row->metadata ?? null, []); + $data['content'] = $row->content; + $data['raw'] = $row->raw; + $data['chunks'] = $this->decodeJson($row->chunks ?? null, []); + + return $data; + } + + private function normalizeTags(mixed $value): array + { + if (is_array($value)) { + $items = $value; + } else { + $text = trim((string) $value); + if ($text === '') { + return []; + } + $items = preg_split('/[\r\n,]+/', $text) ?: []; + } + + $tags = []; + foreach ($items as $item) { + $tag = trim((string) $item); + if ($tag !== '') { + $tags[] = $tag; + } + } + + return array_values(array_unique($tags)); + } + + private function normalizeMetadata(mixed $value): array + { + if (is_array($value)) { + return $value; + } + + $text = trim((string) $value); + if ($text === '') { + return []; + } + + $decoded = json_decode($text, true); + if (!is_array($decoded)) { + throw new InvalidArgumentException('metadata must be a JSON object or array.'); + } + + return $decoded; + } + + private function nullableText(mixed $value): ?string + { + $text = trim((string) $value); + return $text === '' ? null : $text; + } + + private function decodeJson(mixed $value, mixed $fallback): mixed + { + if ($value === null) { + return $fallback; + } + + if (is_array($value)) { + return $value; + } + + $decoded = json_decode((string) $value, true); + return $decoded === null && json_last_error() !== JSON_ERROR_NONE ? $fallback : $decoded; + } +} diff --git a/app/service/AdminConsole/MaintenanceScriptService.php b/app/service/AdminConsole/MaintenanceScriptService.php new file mode 100644 index 0000000..5fbb3a5 --- /dev/null +++ b/app/service/AdminConsole/MaintenanceScriptService.php @@ -0,0 +1,148 @@ +definitions() as $definition) { + $item = $definition; + try { + $doc = $docs->readScriptDoc($definition['doc_name']); + $item['doc_title'] = $doc['title']; + $item['doc_html'] = $doc['html']; + $item['doc_content'] = $doc['content']; + } catch (RuntimeException) { + $item['doc_title'] = null; + $item['doc_html'] = null; + $item['doc_content'] = null; + } + $items[] = $item; + } + + return $items; + } + + public function describe(string $name): array + { + $definitions = $this->definitions(); + if (!isset($definitions[$name])) { + throw new RuntimeException('Script is not allowed.'); + } + + foreach ($this->list() as $item) { + if ($item['name'] === $name) { + return $item; + } + } + + throw new RuntimeException('Script metadata not found.'); + } + + public function run(string $name, array $args = []): array + { + $definitions = $this->definitions(); + if (!isset($definitions[$name])) { + throw new RuntimeException('Script is not allowed.'); + } + + $script = $definitions[$name]; + $scriptPath = base_path('scripts/' . $script['file']); + if (!is_file($scriptPath)) { + throw new RuntimeException('Script file not found.'); + } + + $safeArgs = []; + foreach ($args as $arg) { + $arg = trim((string) $arg); + if ($arg === '') { + continue; + } + if (!preg_match(self::ARG_PATTERN, $arg)) { + throw new RuntimeException('Only --key=value style arguments are allowed.'); + } + $safeArgs[] = $arg; + } + + $command = array_merge([PHP_BINARY, $scriptPath], $safeArgs); + $descriptors = [ + 0 => ['pipe', 'r'], + 1 => ['pipe', 'w'], + 2 => ['pipe', 'w'], + ]; + + $process = proc_open($command, $descriptors, $pipes, base_path()); + if (!is_resource($process)) { + throw new RuntimeException('Failed to start script process.'); + } + + fclose($pipes[0]); + $stdout = (string) stream_get_contents($pipes[1]); + fclose($pipes[1]); + $stderr = (string) stream_get_contents($pipes[2]); + fclose($pipes[2]); + $exitCode = proc_close($process); + + return [ + 'script_name' => $name, + 'command' => array_merge(['php', 'scripts/' . $script['file']], $safeArgs), + 'exit_code' => $exitCode, + 'stdout' => $stdout, + 'stderr' => $stderr, + 'ok' => $exitCode === 0, + ]; + } + + private function definitions(): array + { + return [ + 'setup_database' => [ + 'name' => 'setup_database', + 'file' => 'setup_database.php', + 'label' => '初始化数据库', + 'description' => '创建或补齐 archives、chunks 相关表结构与索引。', + 'doc_name' => 'setup_database.md', + 'args_hint' => '无参数', + ], + 'setup_opensearch' => [ + 'name' => 'setup_opensearch', + 'file' => 'setup_opensearch.php', + 'label' => '初始化 OpenSearch', + 'description' => '创建或补齐 proofdb_chunks 索引与 mapping。', + 'doc_name' => 'setup_opensearch.md', + 'args_hint' => '无参数', + ], + 'reindex_opensearch' => [ + 'name' => 'reindex_opensearch', + 'file' => 'reindex_opensearch.php', + 'label' => '重建 OpenSearch 索引', + 'description' => '把 PostgreSQL 中已向量化的数据重新写入 OpenSearch。', + 'doc_name' => 'reindex_opensearch.md', + 'args_hint' => '--archive_uid=01...', + ], + 'backfill_archive_content' => [ + 'name' => 'backfill_archive_content', + 'file' => 'backfill_archive_content.php', + 'label' => '回填 archive content', + 'description' => '从 raw 或 chunks 回填 archives.content。', + 'doc_name' => 'backfill_archive_content.md', + 'args_hint' => '--archive_uid=01...', + ], + 'setup_admin_users' => [ + 'name' => 'setup_admin_users', + 'file' => 'setup_admin_users.php', + 'label' => '初始化管理员用户', + 'description' => '创建 admin_users 表并写入或更新管理员账号。', + 'doc_name' => 'setup_admin_users.md', + 'args_hint' => '--username=admin --password=secret', + ], + ]; + } +} diff --git a/app/service/AdminConsole/MarkdownRenderer.php b/app/service/AdminConsole/MarkdownRenderer.php new file mode 100644 index 0000000..e62587c --- /dev/null +++ b/app/service/AdminConsole/MarkdownRenderer.php @@ -0,0 +1,185 @@ +' . $this->renderInline($text) . '

'; + $paragraph = []; + }; + + $flushList = function () use (&$listType, &$html): void { + if ($listType !== null) { + $html[] = ''; + $listType = null; + } + }; + + $flushTable = function () use (&$table, &$html): void { + if ($table === null) { + return; + } + + $html[] = '' . + implode('', array_map(fn (string $cell): string => '', $table['headers'])) . + ''; + foreach ($table['rows'] as $row) { + $html[] = '' . + implode('', array_map(fn (string $cell): string => '', $row)) . + ''; + } + $html[] = '
' . $this->renderInline($cell) . '
' . $this->renderInline($cell) . '
'; + $table = null; + }; + + foreach ($lines as $line) { + if (preg_match('/^```/', $line)) { + $flushParagraph(); + $flushList(); + $flushTable(); + + if ($inCodeBlock) { + $html[] = '
' . htmlspecialchars(implode("\n", $codeLines), ENT_QUOTES, 'UTF-8') . '
'; + $codeLines = []; + $inCodeBlock = false; + } else { + $inCodeBlock = true; + } + continue; + } + + if ($inCodeBlock) { + $codeLines[] = $line; + continue; + } + + $trimmed = trim($line); + if ($trimmed === '') { + $flushParagraph(); + $flushList(); + $flushTable(); + continue; + } + + if (preg_match('/^(#{1,6})\s+(.+)$/', $trimmed, $matches)) { + $flushParagraph(); + $flushList(); + $flushTable(); + $level = strlen($matches[1]); + $html[] = sprintf('%s', $level, $this->renderInline($matches[2]), $level); + continue; + } + + if (preg_match('/^>\s?(.+)$/', $trimmed, $matches)) { + $flushParagraph(); + $flushList(); + $flushTable(); + $html[] = '
' . $this->renderInline($matches[1]) . '
'; + continue; + } + + if (preg_match('/^---+$/', $trimmed)) { + $flushParagraph(); + $flushList(); + $flushTable(); + $html[] = '
'; + continue; + } + + if ($this->isTableDelimiter($trimmed) && $table !== null) { + continue; + } + + if (str_contains($trimmed, '|')) { + $cells = $this->tableCells($trimmed); + if (count($cells) >= 2) { + $flushParagraph(); + $flushList(); + if ($table === null) { + $table = ['headers' => $cells, 'rows' => []]; + } else { + $table['rows'][] = $cells; + } + continue; + } + } + + if (preg_match('/^[-*]\s+(.+)$/', $trimmed, $matches)) { + $flushParagraph(); + $flushTable(); + if ($listType !== 'ul') { + $flushList(); + $listType = 'ul'; + $html[] = '
    '; + } + $html[] = '
  • ' . $this->renderInline($matches[1]) . '
  • '; + continue; + } + + if (preg_match('/^\d+\.\s+(.+)$/', $trimmed, $matches)) { + $flushParagraph(); + $flushTable(); + if ($listType !== 'ol') { + $flushList(); + $listType = 'ol'; + $html[] = '
      '; + } + $html[] = '
    1. ' . $this->renderInline($matches[1]) . '
    2. '; + continue; + } + + $flushList(); + $flushTable(); + $paragraph[] = $trimmed; + } + + if ($inCodeBlock) { + $html[] = '
      ' . htmlspecialchars(implode("\n", $codeLines), ENT_QUOTES, 'UTF-8') . '
      '; + } + + $flushParagraph(); + $flushList(); + $flushTable(); + + return implode("\n", $html); + } + + private function renderInline(string $text): string + { + $text = htmlspecialchars($text, ENT_QUOTES, 'UTF-8'); + $text = preg_replace('/`([^`]+)`/', '$1', $text) ?? $text; + $text = preg_replace('/\*\*([^*]+)\*\*/', '$1', $text) ?? $text; + $text = preg_replace('/\*([^*]+)\*/', '$1', $text) ?? $text; + $text = preg_replace('/\[(.+?)\]\((.+?)\)/', '$1', $text) ?? $text; + + return $text; + } + + private function isTableDelimiter(string $line): bool + { + return (bool) preg_match('/^\|?[\s:-]+\|[\s|:-]*$/', $line); + } + + private function tableCells(string $line): array + { + $line = trim($line); + $line = trim($line, '|'); + return array_map(static fn (string $cell): string => trim($cell), explode('|', $line)); + } +} diff --git a/app/service/AdminConsole/OpenSearchAdminService.php b/app/service/AdminConsole/OpenSearchAdminService.php new file mode 100644 index 0000000..98557ff --- /dev/null +++ b/app/service/AdminConsole/OpenSearchAdminService.php @@ -0,0 +1,153 @@ + [ + 'hosts' => $config['hosts'] ?? [], + 'ssl_verify' => (bool) ($config['ssl_verify'] ?? true), + 'index_name' => $indexName, + ], + 'database' => [ + 'archives_total' => (int) Db::table('archives')->count(), + 'chunks_total' => (int) Db::table('chunks')->count(), + 'embedded_chunks' => (int) Db::table('chunks')->where('embedding_status', 3)->count(), + 'indexed_chunks' => (int) Db::table('chunks')->where('search_index_status', 3)->count(), + ], + 'opensearch' => [ + 'reachable' => false, + 'index_exists' => false, + 'cluster_name' => null, + 'health' => null, + 'docs_count' => 0, + 'mapping_fields' => [], + 'error' => null, + ], + ]; + + try { + $client = (new OpenSearchClientFactory())->make(); + $health = $client->cluster()->health(); + $status['opensearch']['reachable'] = true; + $status['opensearch']['cluster_name'] = $health['cluster_name'] ?? null; + $status['opensearch']['health'] = $health['status'] ?? null; + + $exists = (bool) $client->indices()->exists(['index' => $indexName]); + $status['opensearch']['index_exists'] = $exists; + + if ($exists) { + $stats = $client->indices()->stats(['index' => $indexName]); + $mapping = $client->indices()->getMapping(['index' => $indexName]); + $status['opensearch']['docs_count'] = (int) (($stats['_all']['primaries']['docs']['count'] ?? 0)); + $status['opensearch']['mapping_fields'] = array_keys($mapping[$indexName]['mappings']['properties'] ?? []); + } + } catch (Throwable $exception) { + $status['opensearch']['error'] = $exception->getMessage(); + } + + return $status; + } + + public function documents(string $query = '', int $size = 20): array + { + $size = min(50, max(1, $size)); + $indexName = config('opensearch.indices.chunks', 'proofdb_chunks'); + $client = (new OpenSearchClientFactory())->make(); + + if (!(bool) $client->indices()->exists(['index' => $indexName])) { + return [ + 'index_name' => $indexName, + 'items' => [], + 'total' => 0, + ]; + } + + $body = [ + '_source' => [ + 'includes' => [ + 'chunk_uid', + 'archive_uid', + 'chunk_index', + 'page_start', + 'page_end', + 'title', + 'summary', + 'source', + 'author', + 'year', + 'series', + 'tags', + 'text', + 'embedding_model', + 'embedding_dimensions', + 'created_time', + 'updated_time', + ], + ], + 'size' => $size, + 'sort' => [ + ['updated_time' => ['order' => 'desc']], + ], + ]; + + $query = trim($query); + if ($query === '') { + $body['query'] = ['match_all' => (object) []]; + } else { + $body['query'] = [ + 'multi_match' => [ + 'query' => $query, + 'fields' => ['text^3', 'title^2', 'summary^2', 'source', 'author', 'tags'], + 'type' => 'best_fields', + ], + ]; + } + + $response = $client->search([ + 'index' => $indexName, + 'body' => $body, + ]); + + $hits = $response['hits']['hits'] ?? []; + + return [ + 'index_name' => $indexName, + 'total' => (int) (($response['hits']['total']['value'] ?? 0)), + 'items' => array_map(function (array $hit): array { + $source = $hit['_source'] ?? []; + $text = trim((string) ($source['text'] ?? '')); + return [ + 'score' => $hit['_score'] ?? null, + 'chunk_uid' => $source['chunk_uid'] ?? ($hit['_id'] ?? null), + 'archive_uid' => $source['archive_uid'] ?? null, + 'chunk_index' => $source['chunk_index'] ?? null, + 'page_start' => $source['page_start'] ?? null, + 'page_end' => $source['page_end'] ?? null, + 'title' => $source['title'] ?? null, + 'summary' => $source['summary'] ?? null, + 'source' => $source['source'] ?? null, + 'author' => $source['author'] ?? null, + 'year' => $source['year'] ?? null, + 'series' => $source['series'] ?? null, + 'tags' => $source['tags'] ?? [], + 'text_preview' => mb_substr($text, 0, 320), + 'embedding_model' => $source['embedding_model'] ?? null, + 'embedding_dimensions' => $source['embedding_dimensions'] ?? null, + 'created_time' => $source['created_time'] ?? null, + 'updated_time' => $source['updated_time'] ?? null, + ]; + }, $hits), + ]; + } +} diff --git a/app/service/AdminUserRepository.php b/app/service/AdminUserRepository.php new file mode 100644 index 0000000..17952ae --- /dev/null +++ b/app/service/AdminUserRepository.php @@ -0,0 +1,108 @@ +orderByDesc('id') + ->get() + ->all(); + + return array_map(fn (object $row): array => $this->toArray($row), $rows); + } + + public function findByUsername(string $username): ?array + { + $row = Db::table('admin_users') + ->where('username', $username) + ->where('is_active', true) + ->first(); + + return $row ? $this->toArray($row) : null; + } + + public function findById(int $id): ?array + { + $row = Db::table('admin_users') + ->where('id', $id) + ->where('is_active', true) + ->first(); + + return $row ? $this->toArray($row) : null; + } + + public function findAnyById(int $id): ?array + { + $row = Db::table('admin_users')->where('id', $id)->first(); + return $row ? $this->toArray($row) : null; + } + + public function findAnyByUsername(string $username): ?array + { + $row = Db::table('admin_users')->where('username', $username)->first(); + return $row ? $this->toArray($row) : null; + } + + public function touchLastLogin(int $id): void + { + Db::table('admin_users') + ->where('id', $id) + ->update(['last_login_at' => Db::raw('CURRENT_TIMESTAMP')]); + } + + public function create(string $username, string $password, ?string $displayName = null): array + { + $id = Db::table('admin_users')->insertGetId([ + 'username' => $username, + 'display_name' => $displayName, + 'password_hash' => password_hash($password, PASSWORD_DEFAULT), + 'is_active' => true, + 'last_login_at' => null, + ]); + + return $this->findAnyById((int) $id) ?? []; + } + + public function updateUser(int $id, array $fields): ?array + { + $updates = []; + + if (array_key_exists('display_name', $fields)) { + $displayName = $fields['display_name']; + $updates['display_name'] = $displayName === null ? null : trim((string) $displayName); + } + + if (array_key_exists('password', $fields) && trim((string) $fields['password']) !== '') { + $updates['password_hash'] = password_hash((string) $fields['password'], PASSWORD_DEFAULT); + } + + if (array_key_exists('is_active', $fields)) { + $updates['is_active'] = (bool) $fields['is_active']; + } + + if ($updates !== []) { + Db::table('admin_users')->where('id', $id)->update($updates); + } + + return $this->findAnyById($id); + } + + private function toArray(object $row): array + { + return [ + 'id' => (int) $row->id, + 'username' => (string) $row->username, + 'display_name' => $row->display_name, + 'password_hash' => (string) $row->password_hash, + 'is_active' => (bool) $row->is_active, + 'last_login_at' => $row->last_login_at, + 'created_time' => $row->created_time, + 'updated_time' => $row->updated_time, + ]; + } +} diff --git a/app/service/ArchiveRepository.php b/app/service/ArchiveRepository.php index 43a2c8d..c8222b6 100644 --- a/app/service/ArchiveRepository.php +++ b/app/service/ArchiveRepository.php @@ -75,6 +75,102 @@ class ArchiveRepository return implode("\n\n", array_map(fn ($chunk): string => (string) $chunk->text, $chunks)); } + public function findChunk(string $chunkUid): ?array + { + $row = Db::table('chunks') + ->join('archives', 'chunks.archive_uid', '=', 'archives.archive_uid') + ->where('chunks.chunk_uid', $chunkUid) + ->first([ + 'chunks.chunk_uid', + 'chunks.archive_uid', + 'chunks.chunk_index', + 'chunks.page_start', + 'chunks.page_end', + 'chunks.text', + 'chunks.length', + 'chunks.embedding_status', + 'chunks.embedding_ref', + 'chunks.embedding_model', + 'chunks.embedding_error', + 'chunks.search_index_status', + 'chunks.search_index_error', + 'archives.title', + 'archives.summary', + 'archives.year', + 'archives.author', + 'archives.source', + 'archives.series', + 'archives.tags', + 'archives.metadata', + ]); + + if (!$row) { + return null; + } + + return [ + 'chunk_uid' => (string) $row->chunk_uid, + 'archive_uid' => (string) $row->archive_uid, + 'chunk_index' => (int) $row->chunk_index, + 'page_start' => $row->page_start === null ? null : (int) $row->page_start, + 'page_end' => $row->page_end === null ? null : (int) $row->page_end, + 'pages' => $this->pages($row->page_start, $row->page_end), + 'text' => (string) $row->text, + 'length' => $row->length === null ? null : (int) $row->length, + 'embedding_status' => (int) $row->embedding_status, + 'embedding_ref' => $this->decodeJson($row->embedding_ref ?? null, null), + 'embedding_model' => $row->embedding_model, + 'embedding_error' => $row->embedding_error, + 'search_index_status' => (int) $row->search_index_status, + 'search_index_error' => $row->search_index_error, + 'archive' => [ + 'archive_uid' => (string) $row->archive_uid, + 'title' => $row->title, + 'summary' => $row->summary, + 'year' => $row->year === null ? null : (int) $row->year, + 'author' => $row->author, + 'source' => $row->source, + 'series' => $row->series, + 'tags' => $this->decodeJson($row->tags ?? null, []), + 'metadata' => $this->decodeJson($row->metadata ?? null, []), + ], + ]; + } + + public function findArchiveChunks(string $archiveUid): array + { + $rows = Db::table('chunks') + ->join('archives', 'chunks.archive_uid', '=', 'archives.archive_uid') + ->where('chunks.archive_uid', $archiveUid) + ->orderBy('chunks.chunk_index') + ->get([ + 'chunks.chunk_uid', + 'chunks.archive_uid', + 'chunks.chunk_index', + 'chunks.page_start', + 'chunks.page_end', + 'chunks.text', + 'chunks.length', + 'chunks.embedding_status', + 'chunks.embedding_ref', + 'chunks.embedding_model', + 'chunks.embedding_error', + 'chunks.search_index_status', + 'chunks.search_index_error', + 'archives.title', + 'archives.summary', + 'archives.year', + 'archives.author', + 'archives.source', + 'archives.series', + 'archives.tags', + 'archives.metadata', + ]) + ->all(); + + return array_map(fn (object $row): array => $this->chunkRowToArray($row), $rows); + } + public function updateMetadata(string $archiveUid, array $fields, array $aiMeta): void { $archive = $this->findArchive($archiveUid); @@ -136,4 +232,68 @@ class ArchiveRepository 'chunks' => json_decode($archive->chunks ?? '[]', true) ?: [], ]; } + + private function chunkRowToArray(object $row): array + { + return [ + 'chunk_uid' => (string) $row->chunk_uid, + 'archive_uid' => (string) $row->archive_uid, + 'chunk_index' => (int) $row->chunk_index, + 'page_start' => $row->page_start === null ? null : (int) $row->page_start, + 'page_end' => $row->page_end === null ? null : (int) $row->page_end, + 'pages' => $this->pages($row->page_start, $row->page_end), + 'text' => (string) $row->text, + 'length' => $row->length === null ? null : (int) $row->length, + 'embedding_status' => (int) $row->embedding_status, + 'embedding_ref' => $this->decodeJson($row->embedding_ref ?? null, null), + 'embedding_model' => $row->embedding_model, + 'embedding_error' => $row->embedding_error, + 'search_index_status' => (int) $row->search_index_status, + 'search_index_error' => $row->search_index_error, + 'archive' => [ + 'archive_uid' => (string) $row->archive_uid, + 'title' => $row->title, + 'summary' => $row->summary, + 'year' => $row->year === null ? null : (int) $row->year, + 'author' => $row->author, + 'source' => $row->source, + 'series' => $row->series, + 'tags' => $this->decodeJson($row->tags ?? null, []), + 'metadata' => $this->decodeJson($row->metadata ?? null, []), + ], + ]; + } + + private function decodeJson(mixed $value, mixed $fallback): mixed + { + if ($value === null) { + return $fallback; + } + + if (is_array($value)) { + return $value; + } + + if (!is_string($value) || trim($value) === '') { + return $fallback; + } + + $decoded = json_decode($value, true); + return $decoded === null && json_last_error() !== JSON_ERROR_NONE ? $fallback : $decoded; + } + + private function pages(mixed $pageStart, mixed $pageEnd): array + { + if (!is_numeric($pageStart) || !is_numeric($pageEnd)) { + return array_values(array_filter([$pageStart, $pageEnd], static fn ($value): bool => $value !== null && $value !== '')); + } + + $start = (int) $pageStart; + $end = (int) $pageEnd; + if ($end < $start) { + $end = $start; + } + + return range($start, $end); + } } diff --git a/app/service/ArticleImportService.php b/app/service/ArticleImportService.php index d676df9..c515d47 100644 --- a/app/service/ArticleImportService.php +++ b/app/service/ArticleImportService.php @@ -70,6 +70,16 @@ class ArticleImportService } } + public function normalizeArchiveContentString(string $content): ?string + { + return $this->nullableClean($this->cleanMarkdownPage($content)); + } + + public function normalizeArchiveRawString(string $content): ?string + { + return $this->nullableClean($content); + } + private function validate(array $payload): array { $errors = []; @@ -182,8 +192,8 @@ class ArticleImportService 'tags' => is_array($payload['tags'] ?? null) ? array_values($payload['tags']) : [], 'summary' => $this->nullableClean($payload['summary'] ?? null), 'metadata' => $payload['metadata'] ?? [], - 'content' => $this->nullableClean($payload['content_url'] ?? $payload['content_path'] ?? null), - 'raw' => $this->nullableClean($payload['raw_url'] ?? $payload['raw_path'] ?? null), + 'content' => $this->normalizedArchiveContent($payload), + 'raw' => $this->rawArchiveContent($payload), ]; } @@ -200,6 +210,57 @@ class ArticleImportService return $this->pageBlocksFromItems($payload, preg_split('/\R{2,}/u', $payload['content'])); } + private function normalizedArchiveContent(array $payload): ?string + { + if (isset($payload['pages']) && is_array($payload['pages'])) { + $parts = []; + foreach ($payload['pages'] as $page) { + if (!is_array($page) || !isset($page['content']) || !is_string($page['content'])) { + continue; + } + + $content = $this->cleanMarkdownPage($page['content']); + if ($content !== '') { + $parts[] = $content; + } + } + + return $this->nullableClean(implode("\n\n", $parts)); + } + + if (isset($payload['paragraphs']) && is_array($payload['paragraphs'])) { + $parts = []; + foreach ($payload['paragraphs'] as $paragraph) { + $content = is_array($paragraph) ? ($paragraph['content'] ?? '') : $paragraph; + if (!is_string($content)) { + continue; + } + + $content = $this->clean($content); + if ($content !== '') { + $parts[] = $content; + } + } + + return $this->nullableClean(implode("\n\n", $parts)); + } + + if (isset($payload['content']) && is_string($payload['content'])) { + return $this->normalizeArchiveContentString($payload['content']); + } + + return null; + } + + private function rawArchiveContent(array $payload): ?string + { + if (isset($payload['content']) && is_string($payload['content'])) { + return $this->normalizeArchiveRawString($payload['content']); + } + + return null; + } + private function pageBlocksFromPages(array $payload): array { $pageBlocks = []; diff --git a/app/service/Search/ChunkSearchIndexRepository.php b/app/service/Search/ChunkSearchIndexRepository.php index 8233a7d..404a408 100644 --- a/app/service/Search/ChunkSearchIndexRepository.php +++ b/app/service/Search/ChunkSearchIndexRepository.php @@ -7,6 +7,22 @@ use support\Db; class ChunkSearchIndexRepository { + public function resetEmbeddedChunksToPending(?string $archiveUid = null): int + { + $query = Db::table('chunks') + ->where('embedding_status', EmbeddingStatus::EMBEDDED); + + if ($archiveUid !== null && trim($archiveUid) !== '') { + $query->where('archive_uid', trim($archiveUid)); + } + + return $query->update([ + 'search_index_status' => SearchIndexStatus::PENDING, + 'search_index_error' => null, + 'search_index_updated_at' => null, + ]); + } + public function queuePendingArchiveTasks(int $limit): array { $statuses = [ @@ -63,6 +79,7 @@ class ChunkSearchIndexRepository 'chunks.created_time', 'chunks.updated_time', 'archives.title', + 'archives.summary', 'archives.source', 'archives.author', 'archives.year', @@ -105,6 +122,7 @@ class ChunkSearchIndexRepository 'page_start' => $row->page_start === null ? null : (int) $row->page_start, 'page_end' => $row->page_end === null ? null : (int) $row->page_end, 'title' => $row->title, + 'summary' => $row->summary, 'source' => $row->source, 'author' => $row->author, 'year' => $row->year === null ? null : (int) $row->year, diff --git a/app/service/Search/OpenSearchChunkIndex.php b/app/service/Search/OpenSearchChunkIndex.php index f0cc659..90d2b9c 100644 --- a/app/service/Search/OpenSearchChunkIndex.php +++ b/app/service/Search/OpenSearchChunkIndex.php @@ -16,6 +16,7 @@ class OpenSearchChunkIndex $index = $this->indexName(); if ($client->indices()->exists(['index' => $index])) { + $this->ensureProperties($client, $index); return; } @@ -64,6 +65,7 @@ class OpenSearchChunkIndex 'page_start' => ['type' => 'integer'], 'page_end' => ['type' => 'integer'], 'title' => $this->textWithKeyword(), + 'summary' => ['type' => 'text'], 'source' => $this->textWithKeyword(), 'author' => $this->textWithKeyword(), 'year' => ['type' => 'integer'], @@ -93,6 +95,31 @@ class OpenSearchChunkIndex return $this->client ?? (new OpenSearchClientFactory())->make(); } + private function ensureProperties(Client $client, string $index): void + { + $mapping = $client->indices()->getMapping(['index' => $index]); + $existing = $mapping[$index]['mappings']['properties'] ?? []; + $desired = $this->mapping()['mappings']['properties'] ?? []; + $missing = []; + + foreach ($desired as $field => $definition) { + if (!array_key_exists($field, $existing)) { + $missing[$field] = $definition; + } + } + + if ($missing === []) { + return; + } + + $client->indices()->putMapping([ + 'index' => $index, + 'body' => [ + 'properties' => $missing, + ], + ]); + } + private function indexName(): string { return config('opensearch.indices.chunks', 'proofdb_chunks'); diff --git a/app/service/Search/OpenSearchSearchService.php b/app/service/Search/OpenSearchSearchService.php index d2f84ae..962cca7 100644 --- a/app/service/Search/OpenSearchSearchService.php +++ b/app/service/Search/OpenSearchSearchService.php @@ -39,6 +39,7 @@ class OpenSearchSearchService 'fields' => [ 'text^4', 'title^3', + 'summary^2', 'source^2', 'author^2', 'series^2', @@ -219,6 +220,7 @@ class OpenSearchSearchService 'page_start' => $source['page_start'] ?? null, 'page_end' => $source['page_end'] ?? null, 'title' => $source['title'] ?? null, + 'summary' => $source['summary'] ?? null, 'source' => $source['source'] ?? null, 'author' => $source['author'] ?? null, 'year' => $source['year'] ?? null, @@ -322,6 +324,7 @@ class OpenSearchSearchService 'page_start', 'page_end', 'title', + 'summary', 'source', 'author', 'year', diff --git a/app/view/admin/dashboard.html b/app/view/admin/dashboard.html new file mode 100644 index 0000000..0840982 --- /dev/null +++ b/app/view/admin/dashboard.html @@ -0,0 +1,835 @@ + + + + + + Proof DB 管理面板 + + + +
      +
      + + +
      +
      +
      +
      Administrative Entry
      +

      Proof DB 管理面板

      +

      在这里维护 archives 表、OpenSearch 状态、管理员账号、API 文档,以及脚本级运维动作。

      +
      + +
      + + 返回 Archive Cask + + +
      +
      + + + +
      +
      +
      +

      系统总览

      +
      集中查看数据库、OpenSearch 和当前版本状态。
      +
      + +
      + +
      + +
      +
      +
      +

      OpenSearch 摘要

      +
      来自管理员状态 API
      +
      +
      等待加载...
      +
      + +
      +
      +

      快速入口

      +
      直接跳到主要维护区块
      +
      +
      + + + + + +
      +
      +
      +
      + +
      +
      +
      +

      archives 表管理

      +
      搜索、查看、编辑和删除档案记录。这里只操作 archives 表本身。
      +
      +
      + +
      +
      +
      + + + +
      + +
      +
      + +
      +
      +

      档案编辑器

      +
      选择左侧档案后可编辑。
      +
      + +
      + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
      + + +
      +
      +
      +
      +
      + +
      +
      +
      +

      OpenSearch 管理

      +
      查看集群、索引、数据库侧索引状态,以及索引中的文档粗览。
      +
      + +
      + +
      + +
      +
      + + + +
      +
      +
      + +
      +
      +
      +

      OpenSearch 详情

      +
      主机、索引、mapping 字段等。
      +
      +
      等待加载...
      +
      + +
      +
      +

      建议动作

      +
      跳转到脚本面板执行维护脚本。
      +
      +
      + + +
      +
      +
      +
      + +
      +
      +
      +

      管理员用户管理

      +
      创建管理员账号,修改显示名、密码与启用状态。
      +
      +
      + +
      +
      +
      +

      创建新管理员

      +
      账号创建后即默认启用。
      +
      + +
      + + + + + + + + + +
      + +
      +
      +
      + +
      +
      +

      现有管理员

      +
      留空密码表示不修改。
      +
      + +
      +
      +
      +
      + +
      +
      +
      +

      APIDOC 查看

      +
      浏览 `/apidoc` 中的接口文档。
      +
      +
      + +
      +
      +
      +
      + +
      +
      +

      文档内容

      +
      请选择一份文档。
      +
      +
      等待加载...
      +
      +
      +
      + +
      +
      +
      +

      维护脚本伪终端

      +
      仅允许执行白名单中的 `scripts/*.php` 维护脚本。
      +
      +
      + +
      +
      +
      +

      可执行脚本

      +
      支持 `--key=value` 参数格式。
      +
      + +
      +
      + +
      +
      +

      执行终端

      +
      脚本 stdout / stderr 会显示在下方。
      +
      + +
      + + + + + + +
      + +
      +
      + +
      proofdb-admin$ 等待命令...
      +
      +
      + +
      +
      +

      脚本文档

      +
      如果该脚本有文档,会显示在这里。
      +
      +
      等待加载...
      +
      +
      +
      +
      +
      + + + diff --git a/app/view/admin/landing.html b/app/view/admin/landing.html new file mode 100644 index 0000000..32212f1 --- /dev/null +++ b/app/view/admin/landing.html @@ -0,0 +1,83 @@ + + + + + + Proof DB Admin + + + +
      +
      +
      + + Proof DB Admin Console +
      +
      Version
      +
      + +
      +
      +
      +
      Administrative Entry
      +

      Proof DB


      档案数据中心

      +

      + 档案储存 标签处理 向量存储 全文搜索 +

      +
      + +
      +
      +
      关于Proof DB
      +
      ProofDB是一个专业级的历史档案后端数据中心,集成档案数据库、全文搜索引擎和RAG向量引擎。
      +
      +
      +
      + +
      +
      + Access Control + v1 +
      +
      +

      选择进入路径

      +

      Tips: PoofDB的Proof是酒精度的意思

      +
      + + + +
      + + Archive Cask 未配置。 请在 `.env` 中设置 `ARCHIVE_CASK_URL`。 + + Archive Cask 已连接。 + +
      +
      +
      + + +
      + + diff --git a/app/view/admin/login.html b/app/view/admin/login.html new file mode 100644 index 0000000..d3dd833 --- /dev/null +++ b/app/view/admin/login.html @@ -0,0 +1,87 @@ + + + + + + 管理员登录 + + + +
      + + + +
      + + + diff --git a/config/admin.php b/config/admin.php new file mode 100644 index 0000000..4fae678 --- /dev/null +++ b/config/admin.php @@ -0,0 +1,5 @@ + getenv('ARCHIVE_CASK_URL') ?: '', +]; diff --git a/config/opensearch.php b/config/opensearch.php index 363179a..01e16ff 100644 --- a/config/opensearch.php +++ b/config/opensearch.php @@ -20,14 +20,14 @@ return [ * - OPENSEARCH_HOSTS=http://127.0.0.1:9200 * - OPENSEARCH_USERNAME=admin * - OPENSEARCH_PASSWORD=... - * - OPENSEARCH_SSL_VERIFY=true + * - OPENSEARCH_SSL_VERIFY=false * - OPENSEARCH_INDEX_CHUNKS=proofdb_chunks */ 'default' => [ 'hosts' => $hosts, 'username' => getenv('OPENSEARCH_USERNAME') ?: null, 'password' => getenv('OPENSEARCH_PASSWORD') ?: null, - 'ssl_verify' => $bool('OPENSEARCH_SSL_VERIFY', true), + 'ssl_verify' => $bool('OPENSEARCH_SSL_VERIFY', false), 'timeout' => (float) (getenv('OPENSEARCH_TIMEOUT') ?: 30), 'connect_timeout' => (float) (getenv('OPENSEARCH_CONNECT_TIMEOUT') ?: 5), ], diff --git a/config/route.php b/config/route.php index 4d486a3..cde66ba 100644 --- a/config/route.php +++ b/config/route.php @@ -14,8 +14,33 @@ use Webman\Route; +Route::get('/', [app\controller\AdminController::class, 'landing']); +Route::get('/admin/login', [app\controller\AdminController::class, 'login']); +Route::get('/admin', [app\controller\AdminController::class, 'dashboard']); + Route::post('/api/articles/import', [app\controller\Api\ArticleImportController::class, 'import']); +Route::post('/api/admin/login', [app\controller\Api\AdminAuthController::class, 'login']); +Route::post('/api/admin/logout', [app\controller\Api\AdminAuthController::class, 'logout']); +Route::get('/api/admin/me', [app\controller\Api\AdminAuthController::class, 'me']); +Route::get('/api/admin/archives', [app\controller\Api\AdminConsoleController::class, 'archives']); +Route::get('/api/admin/archives/{archiveUid}', [app\controller\Api\AdminConsoleController::class, 'archive']); +Route::patch('/api/admin/archives/{archiveUid}', [app\controller\Api\AdminConsoleController::class, 'updateArchive']); +Route::delete('/api/admin/archives/{archiveUid}', [app\controller\Api\AdminConsoleController::class, 'deleteArchive']); +Route::get('/api/admin/opensearch/status', [app\controller\Api\AdminConsoleController::class, 'openSearchStatus']); +Route::get('/api/admin/opensearch/documents', [app\controller\Api\AdminConsoleController::class, 'openSearchDocuments']); +Route::get('/api/admin/users', [app\controller\Api\AdminConsoleController::class, 'users']); +Route::post('/api/admin/users', [app\controller\Api\AdminConsoleController::class, 'createUser']); +Route::patch('/api/admin/users/{id}', [app\controller\Api\AdminConsoleController::class, 'updateUser']); +Route::get('/api/admin/docs', [app\controller\Api\AdminConsoleController::class, 'docs']); +Route::get('/api/admin/docs/{name}', [app\controller\Api\AdminConsoleController::class, 'doc']); +Route::get('/api/admin/scripts', [app\controller\Api\AdminConsoleController::class, 'scripts']); +Route::get('/api/admin/scripts/{name}', [app\controller\Api\AdminConsoleController::class, 'script']); +Route::post('/api/admin/scripts/run', [app\controller\Api\AdminConsoleController::class, 'runScript']); Route::post('/api/search/fulltext', [app\controller\Api\SearchController::class, 'fulltext']); Route::post('/api/search/vector', [app\controller\Api\SearchController::class, 'vector']); Route::post('/api/search/hybrid', [app\controller\Api\SearchController::class, 'hybrid']); - +Route::get('/api/archives/{archive_uid}', [app\controller\Api\EvidenceController::class, 'archive']); +Route::get('/api/archives/{archive_uid}/chunks', [app\controller\Api\EvidenceController::class, 'archiveChunks']); +Route::get('/api/archives/{archive_uid}/evidence', [app\controller\Api\EvidenceController::class, 'archiveEvidence']); +Route::get('/api/chunks/{chunk_uid}', [app\controller\Api\EvidenceController::class, 'chunk']); +Route::get('/api/evidence/{chunk_uid}', [app\controller\Api\EvidenceController::class, 'evidence']); diff --git a/public/admin.css b/public/admin.css new file mode 100644 index 0000000..4ea8a88 --- /dev/null +++ b/public/admin.css @@ -0,0 +1,1054 @@ +:root { + --bg: #111316; + --panel: rgba(18, 21, 24, 0.84); + --panel-soft: rgba(29, 33, 38, 0.82); + --line: rgba(214, 196, 167, 0.18); + --line-strong: rgba(214, 196, 167, 0.36); + --text: #f1ece1; + --muted: #b7aea1; + --accent: #d6b67e; + --signal: #7ea6d6; + --shadow: 0 24px 60px rgba(0, 0, 0, 0.36); + --danger-bg: rgba(118, 34, 34, 0.28); + --danger-line: rgba(208, 115, 115, 0.42); + --danger-text: #ffd4d4; +} + +* { + box-sizing: border-box; +} + +html, +body { + margin: 0; + min-height: 100%; +} + +body { + min-height: 100vh; + font-family: Arial, Helvetica, sans-serif; + color: var(--text); + background: + linear-gradient(180deg, rgba(214, 182, 126, 0.05), transparent 18%), + linear-gradient(90deg, rgba(126, 166, 214, 0.08), transparent 32%), + repeating-linear-gradient( + 90deg, + rgba(255, 255, 255, 0.018) 0, + rgba(255, 255, 255, 0.018) 1px, + transparent 1px, + transparent 96px + ), + linear-gradient(180deg, #16191d 0%, #101214 100%); +} + +body.admin-login { + background: + radial-gradient(circle at top left, rgba(214, 182, 126, 0.08), transparent 22%), + repeating-linear-gradient( + 0deg, + rgba(255, 255, 255, 0.018) 0, + rgba(255, 255, 255, 0.018) 1px, + transparent 1px, + transparent 88px + ), + linear-gradient(180deg, #171a1e 0%, #101214 100%); +} + +body.admin-dashboard { + background: + linear-gradient(180deg, rgba(214, 182, 126, 0.05), transparent 16%), + repeating-linear-gradient( + 90deg, + rgba(255, 255, 255, 0.014) 0, + rgba(255, 255, 255, 0.014) 1px, + transparent 1px, + transparent 92px + ), + linear-gradient(180deg, #171a1d 0%, #0f1113 100%); +} + +a { + color: var(--text); +} + +h1, +h2, +h3, +h4, +h5 { + margin: 0; + max-width: none; +} + +h1 { + font-size: clamp(42px, 5vw, 72px); + line-height: 0.96; +} + +h2 { + font-size: clamp(32px, 3.8vw, 56px); + line-height: 1.02; +} + +h3 { + font-size: clamp(26px, 3vw, 42px); + line-height: 1.08; +} + +h4 { + font-size: clamp(22px, 2.2vw, 32px); + line-height: 1.14; +} + +h5 { + font-size: clamp(18px, 1.6vw, 24px); + line-height: 1.22; +} + +.page-shell { + width: min(1200px, calc(100% - 40px)); + margin: 0 auto; +} + +.page-shell-wide { + width: min(1240px, calc(100% - 40px)); + margin: 0 auto; +} + +.eyebrow { + color: var(--accent); + font-size: 12px; + letter-spacing: 0.12em; + text-transform: uppercase; + margin-bottom: 12px; +} + +.lead { + color: var(--muted); + font-size: 16px; + line-height: 1.7; +} + +.panel { + border: 1px solid var(--line); + background: var(--panel); + backdrop-filter: blur(8px); + box-shadow: var(--shadow); +} + +.panel-soft { + border: 1px solid var(--line); + background: + linear-gradient(180deg, rgba(214, 182, 126, 0.06), transparent 18%), + var(--panel-soft); + backdrop-filter: blur(8px); + box-shadow: var(--shadow); +} + +.button { + min-height: 50px; + padding: 0 16px; + border: 1px solid var(--line-strong); + background: rgba(255, 255, 255, 0.03); + color: var(--text); + text-decoration: none; + display: inline-flex; + align-items: center; + justify-content: center; + gap: 12px; + cursor: pointer; + font-size: 15px; + transition: background 120ms ease, transform 120ms ease, border-color 120ms ease; +} + +.button:hover { + background: rgba(255, 255, 255, 0.06); + border-color: rgba(214, 196, 167, 0.5); + transform: translateY(-1px); +} + +.button.primary { + background: linear-gradient(90deg, rgba(155, 111, 64, 0.96), rgba(214, 182, 126, 0.92)); + color: #18130f; + border-color: rgba(214, 182, 126, 0.82); + font-weight: 600; +} + +.button.primary:hover { + background: linear-gradient(90deg, rgba(170, 122, 68, 0.98), rgba(225, 193, 140, 0.94)); +} + +.button.disabled { + pointer-events: none; + opacity: 0.48; +} + +.button-key { + color: rgba(23, 19, 15, 0.72); + font-size: 12px; + letter-spacing: 0.08em; + text-transform: uppercase; +} + +.button:not(.primary) .button-key { + color: var(--muted); +} + +.error-box { + display: none; + margin-bottom: 16px; + padding: 12px 14px; + border: 1px solid var(--danger-line); + background: var(--danger-bg); + color: var(--danger-text); + font-size: 14px; + line-height: 1.6; +} + +.field-label { + display: block; + margin-bottom: 8px; + font-size: 13px; + letter-spacing: 0.06em; + text-transform: uppercase; + color: var(--muted); +} + +.text-input { + width: 100%; + min-height: 48px; + padding: 0 14px; + margin-bottom: 18px; + border: 1px solid var(--line-strong); + background: rgba(255, 255, 255, 0.03); + color: var(--text); + font-size: 15px; + outline: none; +} + +.text-input:focus { + border-color: rgba(214, 182, 126, 0.72); + box-shadow: inset 0 0 0 1px rgba(214, 182, 126, 0.32); +} + +.text-area { + width: 100%; + min-height: 120px; + padding: 12px 14px; + margin-bottom: 18px; + border: 1px solid var(--line-strong); + background: rgba(255, 255, 255, 0.03); + color: var(--text); + font-size: 14px; + line-height: 1.7; + outline: none; + resize: vertical; +} + +.text-area:focus { + border-color: rgba(214, 182, 126, 0.72); + box-shadow: inset 0 0 0 1px rgba(214, 182, 126, 0.32); +} + +.metric-grid { + display: grid; + gap: 14px; +} + +.metric-card { + border: 1px solid var(--line); + padding: 16px; + background: rgba(255, 255, 255, 0.02); + min-height: 120px; +} + +.metric-label { + color: var(--muted); + font-size: 12px; + letter-spacing: 0.08em; + text-transform: uppercase; + margin-bottom: 10px; +} + +.metric-value { + font-size: 22px; + line-height: 1.2; + word-break: break-word; +} + +.metric-subvalue { + margin-top: 10px; + color: var(--muted); + font-size: 13px; + line-height: 1.6; +} + +.terminal-block { + border: 1px solid var(--line); + background: rgba(10, 12, 14, 0.76); + padding: 16px; + font-family: "Courier New", Courier, monospace; + font-size: 13px; + line-height: 1.8; + color: #d9d2c7; + white-space: pre-wrap; +} + +.terminal-block .prompt { + color: var(--signal); +} + +.terminal-block .accent { + color: var(--accent); +} + +.topbar, +.footer { + display: flex; + justify-content: space-between; + align-items: center; + gap: 16px; + color: var(--muted); + font-size: 12px; + letter-spacing: 0.04em; + text-transform: uppercase; +} + +.topbar { + padding: 18px 0; +} + +.footer { + padding: 0 0 22px; + color: rgba(183, 174, 161, 0.72); +} + +.brand { + display: flex; + gap: 14px; + align-items: center; +} + +.brand-mark { + width: 12px; + height: 12px; + border: 1px solid var(--accent); + box-shadow: inset 0 0 0 2px rgba(214, 182, 126, 0.12); +} + +.admin-landing-page { + min-height: 100vh; + display: grid; + grid-template-rows: auto 1fr auto; +} + +.admin-landing-hero { + display: grid; + grid-template-columns: minmax(0, 1.45fr) minmax(340px, 0.9fr); + gap: 24px; + align-items: stretch; + padding: 18px 0 36px; +} + +.admin-landing-intro, +.admin-landing-portal { + min-height: 560px; +} + +.admin-landing-intro { + padding: 40px 42px 34px; + display: flex; + flex-direction: column; + justify-content: space-between; + position: relative; + overflow: hidden; +} + +.admin-landing-title { + max-width: 9ch; + position: relative; + z-index: 1; +} + +.admin-landing-lead { + max-width: 52ch; + margin-top: 20px; + position: relative; + z-index: 1; +} + +.admin-landing-meta { + margin-top: 34px; + padding-top: 22px; + border-top: 1px solid var(--line); + display: grid; + grid-template-columns: repeat(1, minmax(0, 1fr)); + gap: 18px; + position: relative; + z-index: 1; +} + +.admin-landing-meta-value { + font-size: 15px; + line-height: 1.5; +} + +.admin-landing-intro::before { + content: ""; + position: absolute; + inset: 0; + background: + linear-gradient(90deg, transparent 0, transparent calc(100% - 220px), rgba(214, 182, 126, 0.08) calc(100% - 220px), rgba(214, 182, 126, 0.08) calc(100% - 219px), transparent calc(100% - 219px)), + linear-gradient(180deg, transparent 0, transparent calc(100% - 180px), rgba(126, 166, 214, 0.08) calc(100% - 180px), rgba(126, 166, 214, 0.08) calc(100% - 179px), transparent calc(100% - 179px)), + linear-gradient(135deg, transparent 0, transparent 71%, rgba(214, 182, 126, 0.1) 71%, rgba(214, 182, 126, 0.1) 71.35%, transparent 71.35%), + repeating-linear-gradient( + 90deg, + transparent 0, + transparent 108px, + rgba(255, 255, 255, 0.03) 108px, + rgba(255, 255, 255, 0.03) 109px + ); + opacity: 0.8; + pointer-events: none; +} + +.admin-landing-intro::after { + content: ""; + position: absolute; + top: 34px; + right: 34px; + width: 160px; + height: 160px; + border-top: 1px solid rgba(214, 182, 126, 0.34); + border-right: 1px solid rgba(214, 182, 126, 0.34); + clip-path: polygon(34% 0, 100% 0, 100% 66%); + opacity: 0.8; + pointer-events: none; +} + +.admin-landing-portal { + padding: 28px; + display: grid; + grid-template-rows: auto auto 1fr auto; + gap: 18px; +} + +.admin-landing-portal-head { + display: flex; + justify-content: space-between; + align-items: center; + color: var(--muted); + font-size: 12px; + letter-spacing: 0.08em; + text-transform: uppercase; +} + +.admin-landing-portal-title { + font-size: 24px; + line-height: 1.2; + margin: 0; +} + +.admin-landing-portal-copy { + margin: 0; + color: var(--muted); + line-height: 1.7; + font-size: 15px; +} + +.admin-landing-button-stack { + display: grid; + gap: 14px; + align-content: end; +} + +.admin-landing-button-stack .button { + justify-content: space-between; +} + +.admin-landing-status { + border-top: 1px solid var(--line); + padding-top: 18px; + color: var(--muted); + font-size: 13px; + line-height: 1.7; +} + +.admin-landing-status strong { + color: var(--text); + font-weight: 600; +} + +.admin-login-layout { + min-height: 100vh; + display: grid; + grid-template-columns: minmax(0, 1.2fr) minmax(360px, 460px); + gap: 24px; + align-items: stretch; + padding: 28px 0; +} + +.admin-login-intro { + padding: 36px 40px; + display: flex; + flex-direction: column; + justify-content: space-between; + min-height: 620px; +} + +.admin-login-title { + max-width: 8ch; +} + +.admin-login-lead { + margin-top: 18px; + max-width: 42ch; + line-height: 1.75; +} + +.admin-login-facts { + display: grid; + grid-template-columns: repeat(3, minmax(0, 1fr)); + gap: 16px; + padding-top: 22px; + border-top: 1px solid var(--line); +} + +.admin-login-form-shell { + padding: 26px; + display: grid; + align-content: center; +} + +.admin-login-form-head { + margin-bottom: 24px; +} + +.admin-login-form-copy { + margin: 10px 0 0; + color: var(--muted); + line-height: 1.7; + font-size: 15px; +} + +.admin-login-actions { + display: grid; + gap: 12px; + margin-top: 6px; +} + +.admin-login-footnote { + margin-top: 18px; + color: var(--muted); + font-size: 12px; + line-height: 1.7; +} + +.admin-dashboard-page { + min-height: 100vh; + padding: 24px 0 28px; +} + +.admin-console { + min-height: 100vh; + padding: 24px 0 28px; +} + +.admin-console-shell { + display: grid; + grid-template-columns: 260px minmax(0, 1fr); + min-height: calc(100vh - 52px); +} + +.admin-console-sidebar { + border-right: 1px solid var(--line); + padding: 24px 20px; + display: flex; + flex-direction: column; + gap: 24px; + background: + linear-gradient(180deg, rgba(214, 182, 126, 0.05), transparent 20%), + rgba(9, 11, 13, 0.42); +} + +.admin-console-brand { + padding-bottom: 18px; + border-bottom: 1px solid var(--line); +} + +.admin-console-title { + font-size: clamp(38px, 4vw, 62px); + line-height: 0.95; +} + +.admin-console-subtitle { + margin-top: 10px; + color: var(--muted); + font-size: 14px; + line-height: 1.6; +} + +.admin-console-nav { + display: grid; + gap: 10px; +} + +.admin-console-nav-item { + min-height: 46px; + padding: 0 14px; + border: 1px solid var(--line); + background: rgba(255, 255, 255, 0.02); + color: var(--text); + text-align: left; + cursor: pointer; + transition: background 120ms ease, border-color 120ms ease, transform 120ms ease; +} + +.admin-console-nav-item:hover { + border-color: rgba(214, 196, 167, 0.48); + background: rgba(255, 255, 255, 0.05); + transform: translateX(1px); +} + +.admin-console-nav-item.is-active { + border-color: rgba(214, 182, 126, 0.74); + background: + linear-gradient(90deg, rgba(214, 182, 126, 0.18), rgba(255, 255, 255, 0.03)); +} + +.admin-console-sidebar-foot { + margin-top: auto; + padding-top: 18px; + border-top: 1px solid var(--line); +} + +.admin-console-identity { + font-size: 18px; + line-height: 1.3; +} + +.admin-console-identity-sub { + margin-top: 4px; + color: var(--muted); + font-size: 13px; +} + +.admin-console-main { + padding: 24px; +} + +.admin-console-header { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + gap: 20px; + align-items: start; +} + +.admin-console-header-title { + font-size: clamp(34px, 4vw, 52px); + line-height: 1.02; +} + +.admin-console-header-copy { + max-width: 62ch; + margin: 14px 0 0; + color: var(--muted); + font-size: 15px; + line-height: 1.75; +} + +.console-message { + margin: 18px 0 0; + padding: 14px 16px; + border: 1px solid var(--line); + background: rgba(255, 255, 255, 0.03); + color: var(--text); + font-size: 14px; + line-height: 1.6; +} + +.console-message.is-success { + border-color: rgba(126, 214, 165, 0.4); + background: rgba(33, 78, 56, 0.22); +} + +.console-message.is-error { + border-color: var(--danger-line); + background: var(--danger-bg); + color: var(--danger-text); +} + +.admin-pane { + display: none; + margin-top: 20px; +} + +.admin-pane.is-active { + display: block; +} + +.admin-pane-head { + display: flex; + justify-content: space-between; + align-items: end; + gap: 16px; + margin-bottom: 16px; +} + +.admin-console-overview-grid, +.admin-console-opensearch-grid { + grid-template-columns: repeat(6, minmax(0, 1fr)); + margin-bottom: 18px; +} + +.admin-console-two-column, +.admin-console-workbench { + display: grid; + gap: 18px; + grid-template-columns: minmax(320px, 0.9fr) minmax(0, 1.3fr); +} + +.admin-console-quick-actions { + display: flex; + flex-wrap: wrap; + gap: 12px; +} + +.admin-toolbar { + display: grid; + grid-template-columns: minmax(0, 1fr) auto auto; + gap: 12px; + align-items: start; + margin-bottom: 18px; +} + +.admin-toolbar .text-input { + margin-bottom: 0; +} + +.admin-list, +.admin-user-list, +.admin-script-list { + display: grid; + gap: 12px; +} + +.admin-list-empty { + padding: 18px; + border: 1px dashed var(--line-strong); + color: var(--muted); + font-size: 14px; + line-height: 1.7; +} + +.admin-list-item, +.admin-script-card { + width: 100%; + padding: 14px 16px; + border: 1px solid var(--line); + background: rgba(255, 255, 255, 0.02); + color: var(--text); + text-align: left; + cursor: pointer; + transition: background 120ms ease, border-color 120ms ease, transform 120ms ease; +} + +.admin-list-item.no-click { + cursor: default; +} + +.admin-list-item:hover, +.admin-script-card:hover { + border-color: rgba(214, 196, 167, 0.48); + background: rgba(255, 255, 255, 0.05); +} + +.admin-list-item.is-active { + border-color: rgba(214, 182, 126, 0.68); + background: linear-gradient(90deg, rgba(214, 182, 126, 0.1), rgba(255, 255, 255, 0.03)); +} + +.admin-list-item-head, +.admin-script-card-head { + display: flex; + justify-content: space-between; + gap: 14px; + margin-bottom: 8px; + font-size: 13px; +} + +.admin-list-item-head strong, +.admin-script-card-head strong { + font-size: 16px; + line-height: 1.4; +} + +.admin-list-item-copy, +.admin-script-card-copy { + color: var(--muted); + font-size: 14px; + line-height: 1.7; +} + +.admin-list-item-meta, +.admin-script-card-meta { + margin-top: 10px; + display: flex; + flex-wrap: wrap; + gap: 10px 18px; + color: var(--muted); + font-size: 12px; + text-transform: uppercase; + letter-spacing: 0.04em; +} + +.admin-form-grid.compact .text-input, +.admin-form-grid.compact .text-area { + margin-bottom: 14px; +} + +.admin-form-actions { + display: flex; + flex-wrap: wrap; + gap: 12px; + margin-top: 6px; +} + +.admin-code-area { + font-family: "Courier New", Courier, monospace; + font-size: 13px; +} + +.admin-inline-check { + display: inline-flex; + align-items: center; + gap: 10px; + margin: 6px 0 8px; + color: var(--muted); + font-size: 13px; +} + +.admin-user-card { + padding: 16px; + border: 1px solid var(--line); + background: rgba(255, 255, 255, 0.02); +} + +.admin-user-card-head { + display: flex; + justify-content: space-between; + gap: 14px; + margin-bottom: 12px; +} + +.admin-user-card-head span { + color: var(--muted); + font-size: 12px; + text-transform: uppercase; +} + +.admin-markdown-viewer { + margin: 0; + min-height: 520px; + max-height: 70vh; + overflow: auto; + padding: 18px; + border: 1px solid var(--line); + background: rgba(10, 12, 14, 0.76); + color: #ddd5ca; + font-family: "Courier New", Courier, monospace; + font-size: 13px; + line-height: 1.8; + white-space: pre-wrap; + word-break: break-word; +} + +.admin-markdown-viewer h1, +.admin-markdown-viewer h2, +.admin-markdown-viewer h3, +.admin-markdown-viewer h4, +.admin-markdown-viewer h5, +.admin-markdown-viewer h6 { + margin: 0 0 14px; + line-height: 1.2; +} + +.admin-markdown-viewer p, +.admin-markdown-viewer ul, +.admin-markdown-viewer ol, +.admin-markdown-viewer blockquote, +.admin-markdown-viewer pre, +.admin-markdown-viewer table { + margin: 0 0 16px; +} + +.admin-markdown-viewer ul, +.admin-markdown-viewer ol { + padding-left: 20px; +} + +.admin-markdown-viewer blockquote { + padding-left: 14px; + border-left: 2px solid var(--line-strong); + color: var(--muted); +} + +.admin-markdown-viewer code { + padding: 2px 6px; + background: rgba(255, 255, 255, 0.06); + border: 1px solid rgba(255, 255, 255, 0.08); + font-family: "Courier New", Courier, monospace; + font-size: 12px; +} + +.admin-markdown-viewer .markdown-pre { + padding: 14px; + overflow: auto; + background: rgba(255, 255, 255, 0.04); + border: 1px solid var(--line); +} + +.admin-markdown-viewer .markdown-pre code { + padding: 0; + border: 0; + background: transparent; +} + +.admin-markdown-viewer .markdown-table { + width: 100%; + border-collapse: collapse; +} + +.admin-markdown-viewer .markdown-table th, +.admin-markdown-viewer .markdown-table td { + padding: 10px 12px; + border: 1px solid var(--line); + text-align: left; + vertical-align: top; +} + +.admin-markdown-viewer .markdown-table th { + color: var(--accent); + background: rgba(255, 255, 255, 0.04); +} + +.admin-markdown-viewer hr { + border: 0; + border-top: 1px solid var(--line); + margin: 18px 0; +} + +.admin-dashboard-masthead { + display: grid; + grid-template-columns: minmax(0, 1.3fr) auto; + gap: 20px; + padding: 28px 30px; +} + +.admin-dashboard-title { + font-size: clamp(34px, 4vw, 54px); + line-height: 1.02; + max-width: 10ch; +} + +.admin-dashboard-subtitle { + margin-top: 14px; + max-width: 56ch; + color: var(--muted); + font-size: 15px; + line-height: 1.75; +} + +.admin-dashboard-actions { + display: flex; + flex-wrap: wrap; + align-content: flex-start; + justify-content: flex-end; + gap: 12px; +} + +.admin-dashboard-board { + margin-top: 18px; + display: grid; + gap: 18px; +} + +.admin-dashboard-section { + padding: 24px 26px; +} + +.admin-dashboard-section-head { + display: flex; + justify-content: space-between; + align-items: flex-end; + gap: 16px; + margin-bottom: 18px; +} + +.admin-dashboard-section-title { + margin: 0; + font-size: 18px; +} + +.admin-dashboard-section-note { + color: var(--muted); + font-size: 13px; + line-height: 1.6; +} + +.admin-dashboard-metrics { + grid-template-columns: repeat(4, minmax(0, 1fr)); +} + +@media (max-width: 980px) { + .admin-login-layout { + grid-template-columns: 1fr; + } + + .admin-login-intro { + min-height: auto; + } + + .admin-login-facts { + grid-template-columns: 1fr; + } +} + +@media (max-width: 920px) { + .admin-landing-hero, + .admin-dashboard-masthead, + .admin-console-shell, + .admin-console-header, + .admin-console-two-column, + .admin-console-workbench { + grid-template-columns: 1fr; + } + + .admin-landing-intro, + .admin-landing-portal { + min-height: auto; + } + + .admin-dashboard-actions { + justify-content: flex-start; + } + + .admin-dashboard-metrics, + .admin-console-overview-grid, + .admin-console-opensearch-grid { + grid-template-columns: 1fr; + } + + .admin-toolbar { + grid-template-columns: 1fr; + } + + .admin-console-main { + padding: 18px; + } + + .admin-console-sidebar { + border-right: 0; + border-bottom: 1px solid var(--line); + } +} diff --git a/readme.md b/readme.md index 8310093..c14b870 100644 --- a/readme.md +++ b/readme.md @@ -233,8 +233,11 @@ GET /api/evidence/{chunk_uid} - [x] `archive_uid` uses ULID and `chunk_uid` follows `{archive_uid}_{chunk_index}_{short_uid}`. - [x] Runtime import snapshot writing is implemented under `runtime/proofdb/imports/{import_uid}.json`. - [x] Relational persistence is implemented through `ArchiveRepository::saveImport()`, including `archives` and `chunks` writes. +- [x] Minimal admin entry frontend exists: landing page with Archive Cask redirect and admin login, plus a session-backed admin dashboard shell. +- [x] Admin dashboard now includes archives-table management, OpenSearch status, admin-user management, APIDOC viewing, and a whitelist-based maintenance-script terminal. - [x] PostgreSQL is the selected relational database, matching current `pgsql`, JSONB, `BIGSERIAL`, and `TIMESTAMPTZ` implementation. - [x] PostgreSQL setup script exists for creating `archives` and `chunks` tables plus indexes. +- [x] Admin user bootstrap script exists for creating `admin_users` and seeding/updating an admin account. - [x] Async AI metadata queue exists on Redis with pending, delayed, failed, retry, and error keys. - [x] `ai_metadata` Workerman process is registered and can consume Redis jobs. - [x] OpenAI-compatible chat client exists for metadata enrichment. @@ -246,6 +249,7 @@ GET /api/evidence/{chunk_uid} - [x] OpenSearch client factory is implemented and supports passwordless local OpenSearch when security is disabled. - [x] OpenSearch `proofdb_chunks` hybrid index mapping exists with BM25 text fields and a 2048-dimension `knn_vector` embedding field. - [x] OpenSearch search-index task handler is implemented and writes embedded chunks through bulk upsert. +- [x] Archive-level `summary` metadata is written into OpenSearch chunk documents and participates in BM25 search alongside `text`, `title`, and other metadata fields. - [x] End-to-end embedding-to-OpenSearch smoke test passed for 14 chunks: all are `embedding_status=embedded`, `search_index_status=indexed`, and OpenSearch documents contain 2048-dimension vectors. - [x] Full-text search service, route, controller, and external API documentation are implemented for `POST /api/search/fulltext`. - [x] Full-text OpenSearch smoke test passed with `query="policy documents"`, returning 12 total hits from indexed chunks. @@ -254,13 +258,19 @@ GET /api/evidence/{chunk_uid} - [x] Hybrid search service, route, controller, and external API documentation are implemented for `POST /api/search/hybrid` using Reciprocal Rank Fusion over full-text and vector candidates. - [x] Hybrid smoke tests passed: English query combines fulltext/vector ranks, and Chinese query falls back to vector recall with the Iraq/Kuwait/Desert Storm chunk as top hit. - [x] Hybrid search supports `ai=true`: the original query is used for vector search, while the full-text query is rewritten into BM25 keywords through the existing OpenAI-compatible LLM chat path. Keyword generation has a shorter timeout and falls back to the original query on failure. +- [x] Chunk detail API and evidence API are implemented with external documentation: `GET /api/chunks/{chunk_uid}` and `GET /api/evidence/{chunk_uid}`. +- [x] Archive detail API is implemented with external documentation: `GET /api/archives/{archive_uid}`. +- [x] Archive chunk-list and archive evidence-list APIs are implemented with external documentation: `GET /api/archives/{archive_uid}/chunks` and `GET /api/archives/{archive_uid}/evidence`. +- [x] Evidence smoke test passed for `01KQHVREB6XPYF604RVZAP9NNY_1_39003`, returning page label, citation string, and chunk quote. +- [x] Historical `archives.content` can now be repaired with `php scripts/backfill_archive_content.php`, using normalized `raw` when available and ordered chunk text as fallback. +- [x] OpenSearch repair/reindex maintenance script exists: `php scripts/reindex_opensearch.php`, with optional `--archive_uid=...` targeting. ### Partially Done - [ ] Archive/Page/Chunk model is partly persisted: `archives` and `chunks` tables exist, but pages/page blocks are only summarized in import output and snapshots, not stored as first-class relational tables. -- [ ] `embedding_status`, `embedding_ref`, `embedding_model`, `embedding_error`, and `embedding_updated_at` fields exist; embedding generation into PostgreSQL JSONB and OpenSearch vector indexing are implemented, but vector retrieval API is not implemented yet. +- [x] `embedding_status`, `embedding_ref`, `embedding_model`, `embedding_error`, and `embedding_updated_at` fields exist; embedding generation into PostgreSQL JSONB, OpenSearch vector indexing, and vector retrieval API are all implemented. - [ ] `search_index_status`, `search_index_error`, and `search_index_updated_at` fields exist and are used by the generic task dispatcher/worker. -- [ ] Import response exposes page summaries and chunk IDs, but there is no read API yet to fetch archive, page, or chunk records after import. +- [ ] Import response exposes page summaries and chunk IDs. Archive-level and chunk-level read APIs now exist, but there is still no first-class page record API because pages are not stored as relational rows yet. - [ ] AI metadata enrichment updates the archive row, but import-time response only reports the queue state; clients need a follow-up API or polling path to observe completed enrichment. - [ ] Database and Redis credentials are hard-coded in config files; move them to environment variables before production use. @@ -300,16 +310,13 @@ Redis tasks may be duplicated or lost; PostgreSQL status is the recovery source ### Not Done -- [ ] Evidence reconstruction API is not implemented: `GET /api/evidence/{chunk_uid}`. -- [ ] Chunk detail API is not implemented: `GET /api/chunks/{chunk_uid}`. - [ ] Page-level citation reconstruction is not implemented beyond storing `page_start` and `page_end` on chunks. -- [ ] Reindex/re-embed maintenance commands are not present. -- [ ] Reindex maintenance should detect/recover OpenSearch index loss or stale `search_index_status=indexed` rows when the index has been recreated. +- [ ] Re-embed maintenance command is not present. - [ ] Request validation is handwritten in the service; no dedicated validator classes or reusable validation layer are present. - [ ] Automated tests for Markdown parsing, chunking, import persistence, queue behavior, and metadata enrichment are not present. -- [ ] API authentication, rate limiting, and admin controls are not present. -- [ ] Observability for import/search/enrichment jobs is minimal; no structured job metrics or admin status endpoints are present. -- [ ] Default index page/view still uses Webman starter content and is not Proof DB specific. +- [ ] Public API authentication and rate limiting are not present. Minimal admin login/session controls are now present for the maintenance frontend. +- [ ] Observability for import/search/enrichment jobs is still minimal; the admin panel now exposes coarse status endpoints, but there are no historical metrics, tracing, or alerting pipelines yet. +- [x] Default landing page is replaced with a Proof DB-specific admin entry surface instead of the Webman starter content. ### Future Optimizations @@ -321,4 +328,4 @@ Redis tasks may be duplicated or lost; PostgreSQL status is the recovery source 2. Add read APIs for archives/chunks/evidence so imported data can be verified without reading snapshots or the database directly. 3. Add focused tests for DOCMASTER page parsing, noise filtering, comment coalescing, chunk UID stability, and repository persistence. 4. Add async task foundation: task statuses, Redis task payload format, generic DB dispatcher process, and generic Redis worker process. (Done for embedding and OpenSearch indexing) -5. Add chunk detail API and evidence reconstruction API. +5. Improve page-level citation reconstruction beyond chunk page range metadata. diff --git a/scriptdoc/README.md b/scriptdoc/README.md new file mode 100644 index 0000000..7f77757 --- /dev/null +++ b/scriptdoc/README.md @@ -0,0 +1,35 @@ +# 脚本文档总览 + +当前 `scriptdoc/` 中的文档按脚本拆分: + +- [setup_database.md](/www/proofdb/scriptdoc/setup_database.md): PostgreSQL 结构初始化与升级 +- [setup_admin_users.md](/www/proofdb/scriptdoc/setup_admin_users.md): 管理员用户表与首个管理员账号初始化 +- [setup_opensearch.md](/www/proofdb/scriptdoc/setup_opensearch.md): OpenSearch 索引初始化 +- [reindex_opensearch.md](/www/proofdb/scriptdoc/reindex_opensearch.md): OpenSearch 重建索引与回灌 +- [backfill_archive_content.md](/www/proofdb/scriptdoc/backfill_archive_content.md): 历史 archive `content` 正文字段回填 + +## 当前运维脚本 + +```text +scripts/setup_database.php +scripts/setup_admin_users.php +scripts/setup_opensearch.php +scripts/reindex_opensearch.php +scripts/backfill_archive_content.php +``` + +## 推荐顺序 + +首次初始化或较完整的修复操作,通常按下面顺序执行: + +```bash +php scripts/setup_database.php +php scripts/setup_opensearch.php +php scripts/reindex_opensearch.php +``` + +如果本地 OpenSearch 使用 HTTPS 且证书为自签名,可在相关脚本前临时加: + +```bash +OPENSEARCH_SSL_VERIFY=false +``` diff --git a/scriptdoc/backfill_archive_content.md b/scriptdoc/backfill_archive_content.md new file mode 100644 index 0000000..8cbc8f0 --- /dev/null +++ b/scriptdoc/backfill_archive_content.md @@ -0,0 +1,76 @@ +# Archive Content 回填脚本 + +## 脚本路径 + +```text +scripts/backfill_archive_content.php +``` + +## 脚本作用 + +回填历史 `archives.content` 字段。 + +这个脚本主要用于修复旧数据中 `content` 为空的问题。它会按下面顺序尝试生成 `content`: + +1. 如果 archive 有 `raw`,就按当前导入规则把原始 Markdown 规范化成正文文本。 +2. 如果 `raw` 为空,就按 `chunk_index` 顺序拼接现有 chunk 的 `text` 作为回退正文。 + +脚本不会伪造 `raw`。如果历史数据里 `raw` 丢了,脚本只会尽力补 `content`。 + +## 运行前提 + +- 当前环境中的 PostgreSQL 配置可用。 +- 项目依赖已安装完成。 +- 从项目根目录执行命令。 + +## 运行命令 + +默认只处理 `content` 为空的 archive: + +```bash +php scripts/backfill_archive_content.php +``` + +只处理一个 archive: + +```bash +php scripts/backfill_archive_content.php --archive_uid=01KQHVREB6XPYF604RVZAP9NNY +``` + +强制重算,即使 `content` 已经有值: + +```bash +php scripts/backfill_archive_content.php --force +``` + +只预览,不写数据库: + +```bash +php scripts/backfill_archive_content.php --dry-run +``` + +## 成功输出示例 + +```text +[updated] 01KQHVREB6XPYF604RVZAP9NNY source=chunks content_length=6375 +Archive content backfill completed. +Archive filter: auto +Force mode: no +Dry run: no +Scanned: 1 +Updated: 1 +From raw: 0 +From chunks: 1 +Skipped: 0 +``` + +## 适用场景 + +- 修复旧版本导入留下的 `archives.content` 为空问题。 +- 导入逻辑更新后,希望重算归一化正文。 +- 为后续 AI / RAG / archive 级读取补齐正文字段。 + +## 重要限制 + +- 如果历史数据既没有 `raw`,也没有 chunks,脚本会跳过该 archive。 +- 用 chunks 回填时,得到的是拼接后的正文文本,不会恢复原始 Markdown 结构。 diff --git a/scriptdoc/reindex_opensearch.md b/scriptdoc/reindex_opensearch.md new file mode 100644 index 0000000..d515257 --- /dev/null +++ b/scriptdoc/reindex_opensearch.md @@ -0,0 +1,93 @@ +# OpenSearch 重建索引脚本 + +## 脚本路径 + +```text +scripts/reindex_opensearch.php +``` + +## 脚本作用 + +根据 PostgreSQL 中已经完成向量化的 chunk,重新构建 OpenSearch 中的 `proofdb_chunks` 索引内容,并刷新每条 chunk 的派生搜索字段。 + +脚本会做这些事: + +1. 确保 OpenSearch 索引存在。 +2. 把已向量化 chunk 的 `search_index_status` 重置为待索引。 +3. 按 archive 批量重新投递索引任务。 +4. 调用现有 OpenSearch indexing handler 批量写入 chunk 文档。 +5. 输出重建统计结果。 + +## 运行前提 + +- PostgreSQL 可连接。 +- OpenSearch 可连接。 +- 目标 chunk 的 `embedding_status` 已经是 `embedded`。 +- 项目依赖已安装完成。 +- 从项目根目录执行命令。 + +如果本地 OpenSearch 使用 HTTPS 且证书是自签名: + +```bash +OPENSEARCH_SSL_VERIFY=false php scripts/reindex_opensearch.php +``` + +## 运行命令 + +全量重建: + +```bash +php scripts/reindex_opensearch.php +``` + +只重建一个 archive: + +```bash +php scripts/reindex_opensearch.php --archive_uid=01KQHVREB6XPYF604RVZAP9NNY +``` + +## 成功输出示例 + +```text +OpenSearch reindex completed. +Index: proofdb_chunks +Archive filter: (all embedded archives) +Reset chunks: 14 +Indexed archives: 1 +Indexed chunk rows now marked indexed: 14 +Archives: 01KQHVREB6XPYF604RVZAP9NNY +``` + +## 适用场景 + +- `proofdb_chunks` 被误删后恢复。 +- 数据库里 `search_index_status=3`,但 OpenSearch 中没有对应文档。 +- 索引 mapping 重建后,需要把已经 embedding 完成的数据重新灌回 OpenSearch。 +- archive 的 `summary`、`title`、`tags` 等搜索元数据有更新后,需要刷新到 OpenSearch。 + +## 重要限制 + +这个脚本只处理已经向量化完成的 chunk。 + +它不会: + +- 重新生成 embedding。 +- 修复 embedding 失败的数据。 +- 修复 PostgreSQL 中缺失的 archive 或 chunk。 + +## 推荐用法 + +如果 OpenSearch 整个索引丢了,通常按下面顺序执行: + +```bash +php scripts/setup_opensearch.php +php scripts/reindex_opensearch.php +``` + +如果数据库 schema 也有变动,则先补数据库: + +```bash +php scripts/setup_database.php +php scripts/setup_opensearch.php +php scripts/reindex_opensearch.php +``` diff --git a/scriptdoc/setup_admin_users.md b/scriptdoc/setup_admin_users.md new file mode 100644 index 0000000..bba1da5 --- /dev/null +++ b/scriptdoc/setup_admin_users.md @@ -0,0 +1,56 @@ +# 管理员用户初始化脚本 + +## 脚本路径 + +```text +scripts/setup_admin_users.php +``` + +## 脚本作用 + +初始化管理员登录使用的 `admin_users` 表,并写入一个管理员账号。 + +当前版本会确保: + +- `admin_users` 表存在。 +- `username` 唯一索引存在。 +- `updated_time` 自动更新时间 trigger 存在。 +- 指定用户名会被创建;如果已存在,则会更新显示名和密码哈希。 + +## 运行前提 + +- 当前环境中的 PostgreSQL 配置可用。 +- 项目依赖已安装完成。 +- 从项目根目录执行命令。 + +## 运行命令 + +```bash +php scripts/setup_admin_users.php --username=admin --password='your-password' --display_name='Proof DB Admin' +``` + +其中: + +- `--username` 必填 +- `--password` 必填 +- `--display_name` 选填 + +## 成功输出示例 + +```text +Admin users table initialized. +Seeded username: admin +Display name: Proof DB Admin +``` + +## 适用场景 + +- 首次启用管理员登录。 +- 需要创建第一个管理员用户。 +- 需要重置已有管理员的密码。 + +## 重要说明 + +- 这个脚本不会输出明文密码。 +- 再次执行同一用户名时,会更新密码哈希。 +- 建议在安全环境下执行,不要把明文密码写进仓库文件。 diff --git a/scriptdoc/setup_database.md b/scriptdoc/setup_database.md new file mode 100644 index 0000000..d5c5ab2 --- /dev/null +++ b/scriptdoc/setup_database.md @@ -0,0 +1,51 @@ +# 数据库初始化脚本 + +## 脚本路径 + +```text +scripts/setup_database.php +``` + +## 脚本作用 + +初始化或升级 Proof DB 使用的 PostgreSQL 结构。 + +当前版本会确保: + +- `archives` 表存在。 +- `chunks` 表存在。 +- 档案与 chunk 的常用索引存在。 +- embedding / search index 相关状态字段存在。 +- `updated_time` 自动更新时间触发器存在。 + +## 运行前提 + +- 当前环境中的 PostgreSQL 配置可用。 +- 项目依赖已安装完成。 +- 从项目根目录执行命令。 + +## 运行命令 + +```bash +php scripts/setup_database.php +``` + +## 成功输出示例 + +```text +Database connection ok: postgre +Tables initialized: archives, chunks +``` + +## 适用场景 + +- 首次部署环境。 +- 拉取了数据库结构相关代码后同步 schema。 +- 新增了状态字段、索引或 trigger 后补齐现有数据库。 + +## 常见失败信号 + +- `PDOException` + 说明数据库地址、账号密码、网络或 DNS 有问题。 +- SQL 执行错误 + 说明权限不足,或者现有 schema 与代码预期不一致。 diff --git a/scriptdoc/setup_opensearch.md b/scriptdoc/setup_opensearch.md new file mode 100644 index 0000000..b45fd65 --- /dev/null +++ b/scriptdoc/setup_opensearch.md @@ -0,0 +1,59 @@ +# OpenSearch 索引初始化脚本 + +## 脚本路径 + +```text +scripts/setup_opensearch.php +``` + +## 脚本作用 + +创建或确认 Proof DB 使用的 OpenSearch chunk 索引,并同步缺失的增量 mapping 字段。 + +当前版本会确保: + +- `proofdb_chunks` 索引存在。 +- BM25 全文字段 mapping 已建立。 +- 已存在索引上的缺失字段 mapping 会被补齐,例如后续新增的 `summary`。 +- `embedding` 字段为 `knn_vector`。 +- 向量维度与当前配置一致。 + +## 运行前提 + +- OpenSearch 服务已经启动。 +- 当前环境中的 OpenSearch 配置可用。 +- 项目依赖已安装完成。 +- 从项目根目录执行命令。 + +如果本地 OpenSearch 使用 HTTPS 且证书是自签名: + +```bash +OPENSEARCH_SSL_VERIFY=false php scripts/setup_opensearch.php +``` + +## 运行命令 + +```bash +php scripts/setup_opensearch.php +``` + +## 成功输出示例 + +```text +OpenSearch chunk index initialized: proofdb_chunks +Vector dimensions: 2048 +``` + +## 适用场景 + +- OpenSearch 首次初始化。 +- `proofdb_chunks` 被删除后重建。 +- 增加了新的文档字段,例如 `summary`。 +- 调整了索引 mapping 或向量维度后重新准备索引。 + +## 常见失败信号 + +- `NoNodesAvailableException` + 说明 host、协议、端口、SSL 校验或服务状态不对。 +- 鉴权失败 + 说明 `OPENSEARCH_USERNAME` / `OPENSEARCH_PASSWORD` 不正确。 diff --git a/scripts/backfill_archive_content.php b/scripts/backfill_archive_content.php new file mode 100644 index 0000000..afa5219 --- /dev/null +++ b/scripts/backfill_archive_content.php @@ -0,0 +1,117 @@ +#!/usr/bin/env php +orderBy('id'); +if ($archiveUid !== null && trim($archiveUid) !== '') { + $query->where('archive_uid', trim($archiveUid)); +} + +if (!$force) { + $query->where(function ($builder) { + $builder->whereNull('content')->orWhere('content', ''); + }); +} + +$archives = $query->get(['archive_uid', 'title', 'content', 'raw'])->all(); +$normalizer = new ArticleImportService(); + +$scanned = 0; +$updated = 0; +$fromRaw = 0; +$fromChunks = 0; +$skipped = 0; + +foreach ($archives as $archive) { + $scanned++; + $archiveUidValue = (string) $archive->archive_uid; + $raw = is_string($archive->raw ?? null) ? $archive->raw : null; + $content = null; + $source = 'none'; + + if (is_string($raw) && trim($raw) !== '') { + $content = $normalizer->normalizeArchiveContentString($raw); + $source = 'raw'; + } else { + $chunks = Db::table('chunks') + ->where('archive_uid', $archiveUidValue) + ->orderBy('chunk_index') + ->pluck('text') + ->all(); + + $chunks = array_values(array_filter(array_map( + static fn ($value): string => trim((string) $value), + $chunks + ), static fn (string $value): bool => $value !== '')); + + if ($chunks !== []) { + $content = trim(implode("\n\n", $chunks)); + $source = 'chunks'; + } + } + + if ($content === null || $content === '') { + $skipped++; + echo "[skip] {$archiveUidValue} no usable raw/chunks" . PHP_EOL; + continue; + } + + if ($dryRun) { + echo "[dry-run] {$archiveUidValue} source={$source} content_length=" . mb_strlen($content) . PHP_EOL; + if ($source === 'raw') { + $fromRaw++; + } else { + $fromChunks++; + } + continue; + } + + Db::table('archives') + ->where('archive_uid', $archiveUidValue) + ->update(['content' => $content]); + + $updated++; + if ($source === 'raw') { + $fromRaw++; + } else { + $fromChunks++; + } + + echo "[updated] {$archiveUidValue} source={$source} content_length=" . mb_strlen($content) . PHP_EOL; +} + +echo 'Archive content backfill completed.' . PHP_EOL; +echo 'Archive filter: ' . ($archiveUid ?: 'auto') . PHP_EOL; +echo 'Force mode: ' . ($force ? 'yes' : 'no') . PHP_EOL; +echo 'Dry run: ' . ($dryRun ? 'yes' : 'no') . PHP_EOL; +echo 'Scanned: ' . $scanned . PHP_EOL; +echo 'Updated: ' . $updated . PHP_EOL; +echo 'From raw: ' . $fromRaw . PHP_EOL; +echo 'From chunks: ' . $fromChunks . PHP_EOL; +echo 'Skipped: ' . $skipped . PHP_EOL; diff --git a/scripts/reindex_opensearch.php b/scripts/reindex_opensearch.php new file mode 100644 index 0000000..c285a03 --- /dev/null +++ b/scripts/reindex_opensearch.php @@ -0,0 +1,68 @@ +#!/usr/bin/env php +ensureExists(); + + $resetCount = $repository->resetEmbeddedChunksToPending($archiveUid); + $archiveCount = 0; + $indexedArchives = []; + $indexedChunks = 0; + + while (true) { + $archiveUids = $repository->queuePendingArchiveTasks(100); + if ($archiveUids === []) { + break; + } + + foreach ($archiveUids as $uid) { + $handler->handle([ + 'task_type' => 'search_index', + 'target_type' => 'archive', + 'target_uid' => $uid, + 'attempt' => 1, + ]); + $archiveCount++; + $indexedArchives[] = $uid; + } + } + + $indexedChunksQuery = Db::table('chunks')->where('search_index_status', 3); + if ($archiveUid !== null && trim($archiveUid) !== '') { + $indexedChunksQuery->where('archive_uid', trim($archiveUid)); + } + $indexedChunks = (int) $indexedChunksQuery->count(); + + echo 'OpenSearch reindex completed.' . PHP_EOL; + echo 'Index: ' . config('opensearch.indices.chunks', 'proofdb_chunks') . PHP_EOL; + echo 'Archive filter: ' . ($archiveUid ?: '(all embedded archives)') . PHP_EOL; + echo 'Reset chunks: ' . $resetCount . PHP_EOL; + echo 'Indexed archives: ' . $archiveCount . PHP_EOL; + echo 'Indexed chunk rows now marked indexed: ' . $indexedChunks . PHP_EOL; + if ($indexedArchives !== []) { + echo 'Archives: ' . implode(', ', $indexedArchives) . PHP_EOL; + } +} catch (Throwable $exception) { + fwrite(STDERR, $exception::class . ': ' . $exception->getMessage() . PHP_EOL); + exit(1); +} diff --git a/scripts/setup_admin_users.php b/scripts/setup_admin_users.php new file mode 100644 index 0000000..12bb2e5 --- /dev/null +++ b/scripts/setup_admin_users.php @@ -0,0 +1,92 @@ +#!/usr/bin/env php + --password= [--display_name=]" . PHP_EOL); + exit(1); +} + +$username = trim($username); +$displayName = is_string($displayName) && trim($displayName) !== '' ? trim($displayName) : $username; +$passwordHash = password_hash($password, PASSWORD_DEFAULT); + +$statements = [ + <<getPdo(); + foreach ($statements as $statement) { + Db::statement($statement); + } + + Db::table('admin_users')->updateOrInsert( + ['username' => $username], + [ + 'display_name' => $displayName, + 'password_hash' => $passwordHash, + 'is_active' => true, + ] + ); + + echo 'Admin users table initialized.' . PHP_EOL; + echo 'Seeded username: ' . $username . PHP_EOL; + echo 'Display name: ' . $displayName . PHP_EOL; +} catch (Throwable $exception) { + fwrite(STDERR, $exception::class . ': ' . $exception->getMessage() . PHP_EOL); + exit(1); +}