client = $client ?? new OpenAICompatibleClient(); $this->queue = $queue ?? new LLMRetryQueue(); } public function enrich(array $payload): array { $missing = $this->missingFields($payload); if ($missing === [] || !$this->enabled()) { return $this->withAiMeta($payload, [ 'enabled' => $this->enabled(), 'attempted' => false, 'filled' => [], 'missing' => $missing, ]); } try { $result = $this->queue->run( fn (): array => $this->client->chatJson($this->messages($payload, $missing), [ 'model' => config('LLMapi.metadata.model'), 'temperature' => config('LLMapi.metadata.temperature', 0.1), 'max_tokens' => config('LLMapi.metadata.max_tokens', 1200), 'stream' => false, 'response_format' => config('LLMapi.metadata.response_format', ['type' => 'json_object']), 'thinking' => config('LLMapi.metadata.thinking', ['type' => 'disabled']), 'request_id' => $this->requestId($payload, $missing), ]), config('LLMapi.metadata.retry', []) ); } catch (Throwable $exception) { return $this->withAiMeta($payload, [ 'enabled' => true, 'attempted' => true, 'filled' => [], 'missing' => $missing, 'error' => $exception->getMessage(), ]); } $filled = []; foreach ($missing as $field) { if (!$this->hasUsefulValue($result, $field)) { continue; } $payload[$field] = $this->normalizeField($field, $result[$field]); $filled[] = $field; } return $this->withAiMeta($payload, [ 'enabled' => true, 'attempted' => true, 'filled' => $filled, 'missing' => array_values(array_diff($missing, $filled)), 'model' => config('LLMapi.metadata.model'), 'stream' => false, 'response_format' => config('LLMapi.metadata.response_format', ['type' => 'json_object']), 'thinking' => config('LLMapi.metadata.thinking', ['type' => 'disabled']), ]); } private function missingFields(array $payload): array { $fields = ['title', 'year', 'author', 'tags', 'summary']; return array_values(array_filter($fields, fn (string $field): bool => !$this->hasUsefulValue($payload, $field))); } private function enabled(): bool { return (bool) config('LLMapi.metadata.enabled', true) && $this->client->isConfigured(); } private function messages(array $payload, array $missing): array { $text = $this->sampleText($payload); return [ [ 'role' => 'system', 'content' => implode("\n", [ '你是历史档案元数据整理助手。', '你只能根据用户提供的档案文本抽取或推断元数据。', '请只返回 JSON 对象,不要返回 Markdown,不要解释。', '字段:title(string), year(integer|null), author(string|null), tags(array), summary(string)。', 'summary 简洁概括档案内容,80-200 字。', 'tags 用档案中常见专名和涉及主题,5-10 个。', '无法判断的字段返回 null 或空数组。', '以上请均使用档案中的语言。', ]), ], [ 'role' => 'user', 'content' => json_encode([ 'missing_fields' => $missing, 'known_fields' => [ 'title' => $payload['title'] ?? null, 'year' => $payload['year'] ?? null, 'author' => $payload['author'] ?? null, 'source' => $payload['source'] ?? null, 'series' => $payload['series'] ?? null, ], 'archive_text_sample' => $text, ], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES), ], ]; } private function requestId(array $payload, array $missing): string { return 'metadata-' . substr(hash('sha256', implode('|', [ (string) ($payload['source'] ?? ''), (string) ($payload['archive_uid'] ?? ''), mb_substr((string) ($payload['content'] ?? ''), 0, 1000), implode(',', $missing ?? []), ])), 0, 32); } private function sampleText(array $payload): string { $text = ''; if (isset($payload['content']) && is_string($payload['content'])) { $text = $payload['content']; } elseif (isset($payload['pages']) && is_array($payload['pages'])) { $parts = []; foreach ($payload['pages'] as $page) { if (isset($page['content']) && is_string($page['content'])) { $parts[] = $page['content']; } } $text = implode("\n\n", $parts); } $maxChars = (int) config('LLMapi.metadata.max_input_chars', 12000); return mb_substr($text, 0, $maxChars); } private function hasUsefulValue(array $payload, string $field): bool { if (!array_key_exists($field, $payload)) { return false; } if ($field === 'title' && (($payload['metadata']['title_source'] ?? null) === 'fallback')) { return false; } $value = $payload[$field]; if (is_array($value)) { return $value !== []; } if ($field === 'year') { return is_numeric($value) && (int) $value > 0; } return is_string($value) ? trim($value) !== '' : $value !== null; } private function normalizeField(string $field, mixed $value): mixed { if ($field === 'year') { return is_numeric($value) ? (int) $value : null; } if ($field === 'tags') { if (!is_array($value)) { return []; } return array_values(array_filter(array_map('strval', $value))); } return is_string($value) ? trim($value) : $value; } private function withAiMeta(array $payload, array $ai): array { $payload['metadata'] = is_array($payload['metadata'] ?? null) ? $payload['metadata'] : []; $payload['metadata']['ai_enrichment'] = $ai; return $payload; } }