proofdb/app/service/Search/SearchKeywordService.php
2026-05-07 01:40:58 +08:00

116 lines
4.4 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace app\service\Search;
use app\service\LLM\LLMRetryQueue;
use app\service\LLM\OpenAICompatibleClient;
use Throwable;
class SearchKeywordService
{
private OpenAICompatibleClient $client;
private LLMRetryQueue $retryQueue;
public function __construct(?OpenAICompatibleClient $client = null, ?LLMRetryQueue $retryQueue = null)
{
$this->client = $client ?? new OpenAICompatibleClient($this->clientConfig());
$this->retryQueue = $retryQueue ?? new LLMRetryQueue();
}
public function generate(string $query): array
{
if (!$this->client->isConfigured()) {
return $this->fallback($query, 'LLM API is not configured.');
}
try {
$result = $this->retryQueue->run(
fn (): array => $this->client->chatJson($this->messages($query), [
'model' => config('LLMapi.search_keywords.model', config('LLMapi.metadata.model')),
'temperature' => config('LLMapi.search_keywords.temperature', 0.1),
'max_tokens' => config('LLMapi.search_keywords.max_tokens', 300),
'stream' => false,
'response_format' => config('LLMapi.search_keywords.response_format', ['type' => 'json_object']),
'thinking' => config('LLMapi.search_keywords.thinking', ['type' => 'disabled']),
'request_id' => 'search-keywords-' . substr(hash('sha256', $query), 0, 32),
]),
config('LLMapi.search_keywords.retry', config('LLMapi.metadata.retry', []))
);
} catch (Throwable $exception) {
return $this->fallback($query, $exception->getMessage());
}
$keywords = $this->keywords($result);
if ($keywords === []) {
return $this->fallback($query, 'LLM returned no usable keywords.');
}
return [
'enabled' => true,
'attempted' => true,
'error' => null,
'keywords' => $keywords,
'query' => implode(' ', $keywords),
'model' => config('LLMapi.search_keywords.model', config('LLMapi.metadata.model')),
];
}
private function messages(string $query): array
{
return [
[
'role' => 'system',
'content' => implode("\n", [
'你是历史档案检索关键词生成助手。',
'任务:把用户的自然语言问题改写为适合 BM25 全文检索的关键词。',
'优先输出档案中可能出现的英文专名、政策名、事件名、人物名、地点名、缩写和年份。',
'如果用户输入中文,请翻译或扩展为可能出现在英文档案中的关键词。',
'只返回 JSON 对象,不要 Markdown不要解释。',
'JSON 格式:{"keywords":["keyword1","keyword2"],"query":"keyword1 keyword2"}。',
'keywords 数量 3-12 个;不要编造过于具体而输入中没有依据的事实。',
]),
],
[
'role' => 'user',
'content' => json_encode(['query' => $query], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
],
];
}
private function clientConfig(): array
{
$config = config('LLMapi.default', []);
$config['timeout'] = config('LLMapi.search_keywords.timeout', 12);
$config['connect_timeout'] = config('LLMapi.search_keywords.connect_timeout', 5);
return $config;
}
private function keywords(array $result): array
{
$keywords = [];
if (isset($result['keywords']) && is_array($result['keywords'])) {
$keywords = $result['keywords'];
} elseif (isset($result['query']) && is_string($result['query'])) {
$keywords = preg_split('/\s+/', $result['query']) ?: [];
}
return array_values(array_unique(array_filter(array_map(
static fn (mixed $value): string => trim((string) $value),
$keywords
))));
}
private function fallback(string $query, string $error): array
{
return [
'enabled' => true,
'attempted' => false,
'error' => $error,
'keywords' => [],
'query' => $query,
'model' => null,
];
}
}