116 lines
4.4 KiB
PHP
116 lines
4.4 KiB
PHP
<?php
|
||
|
||
namespace app\service\Search;
|
||
|
||
use app\service\LLM\LLMRetryQueue;
|
||
use app\service\LLM\OpenAICompatibleClient;
|
||
use Throwable;
|
||
|
||
class SearchKeywordService
|
||
{
|
||
private OpenAICompatibleClient $client;
|
||
private LLMRetryQueue $retryQueue;
|
||
|
||
public function __construct(?OpenAICompatibleClient $client = null, ?LLMRetryQueue $retryQueue = null)
|
||
{
|
||
$this->client = $client ?? new OpenAICompatibleClient($this->clientConfig());
|
||
$this->retryQueue = $retryQueue ?? new LLMRetryQueue();
|
||
}
|
||
|
||
public function generate(string $query): array
|
||
{
|
||
if (!$this->client->isConfigured()) {
|
||
return $this->fallback($query, 'LLM API is not configured.');
|
||
}
|
||
|
||
try {
|
||
$result = $this->retryQueue->run(
|
||
fn (): array => $this->client->chatJson($this->messages($query), [
|
||
'model' => config('LLMapi.search_keywords.model', config('LLMapi.metadata.model')),
|
||
'temperature' => config('LLMapi.search_keywords.temperature', 0.1),
|
||
'max_tokens' => config('LLMapi.search_keywords.max_tokens', 300),
|
||
'stream' => false,
|
||
'response_format' => config('LLMapi.search_keywords.response_format', ['type' => 'json_object']),
|
||
'thinking' => config('LLMapi.search_keywords.thinking', ['type' => 'disabled']),
|
||
'request_id' => 'search-keywords-' . substr(hash('sha256', $query), 0, 32),
|
||
]),
|
||
config('LLMapi.search_keywords.retry', config('LLMapi.metadata.retry', []))
|
||
);
|
||
} catch (Throwable $exception) {
|
||
return $this->fallback($query, $exception->getMessage());
|
||
}
|
||
|
||
$keywords = $this->keywords($result);
|
||
if ($keywords === []) {
|
||
return $this->fallback($query, 'LLM returned no usable keywords.');
|
||
}
|
||
|
||
return [
|
||
'enabled' => true,
|
||
'attempted' => true,
|
||
'error' => null,
|
||
'keywords' => $keywords,
|
||
'query' => implode(' ', $keywords),
|
||
'model' => config('LLMapi.search_keywords.model', config('LLMapi.metadata.model')),
|
||
];
|
||
}
|
||
|
||
private function messages(string $query): array
|
||
{
|
||
return [
|
||
[
|
||
'role' => 'system',
|
||
'content' => implode("\n", [
|
||
'你是历史档案检索关键词生成助手。',
|
||
'任务:把用户的自然语言问题改写为适合 BM25 全文检索的关键词。',
|
||
'优先输出档案中可能出现的英文专名、政策名、事件名、人物名、地点名、缩写和年份。',
|
||
'如果用户输入中文,请翻译或扩展为可能出现在英文档案中的关键词。',
|
||
'只返回 JSON 对象,不要 Markdown,不要解释。',
|
||
'JSON 格式:{"keywords":["keyword1","keyword2"],"query":"keyword1 keyword2"}。',
|
||
'keywords 数量 3-12 个;不要编造过于具体而输入中没有依据的事实。',
|
||
]),
|
||
],
|
||
[
|
||
'role' => 'user',
|
||
'content' => json_encode(['query' => $query], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
|
||
],
|
||
];
|
||
}
|
||
|
||
private function clientConfig(): array
|
||
{
|
||
$config = config('LLMapi.default', []);
|
||
$config['timeout'] = config('LLMapi.search_keywords.timeout', 12);
|
||
$config['connect_timeout'] = config('LLMapi.search_keywords.connect_timeout', 5);
|
||
|
||
return $config;
|
||
}
|
||
|
||
private function keywords(array $result): array
|
||
{
|
||
$keywords = [];
|
||
if (isset($result['keywords']) && is_array($result['keywords'])) {
|
||
$keywords = $result['keywords'];
|
||
} elseif (isset($result['query']) && is_string($result['query'])) {
|
||
$keywords = preg_split('/\s+/', $result['query']) ?: [];
|
||
}
|
||
|
||
return array_values(array_unique(array_filter(array_map(
|
||
static fn (mixed $value): string => trim((string) $value),
|
||
$keywords
|
||
))));
|
||
}
|
||
|
||
private function fallback(string $query, string $error): array
|
||
{
|
||
return [
|
||
'enabled' => true,
|
||
'attempted' => false,
|
||
'error' => $error,
|
||
'keywords' => [],
|
||
'query' => $query,
|
||
'model' => null,
|
||
];
|
||
}
|
||
}
|