proofdb/app/service/ArchiveRepository.php
2026-05-01 23:40:14 +08:00

135 lines
4.9 KiB
PHP

<?php
namespace app\service;
use support\Db;
class ArchiveRepository
{
public function saveImport(array $import): void
{
Db::transaction(function () use ($import): void {
$archive = $import['archive'];
$chunks = $import['chunks'];
$chunkUids = array_column($chunks, 'chunk_uid');
Db::table('archives')->updateOrInsert(
['archive_uid' => $archive['archive_uid']],
[
'title' => $archive['title'] ?? null,
'summary' => $archive['summary'] ?? null,
'year' => $archive['year'] ?? null,
'author' => $archive['author'] ?? null,
'source' => $archive['source'] ?? null,
'series' => $archive['series'] ?? null,
'tags' => json_encode($archive['tags'] ?? [], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
'metadata' => json_encode($archive['metadata'] ?? [], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
'content' => $archive['content'] ?? null,
'raw' => $archive['raw'] ?? null,
'chunks' => json_encode($chunkUids, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
]
);
Db::table('chunks')->where('archive_uid', $archive['archive_uid'])->delete();
foreach ($chunks as $chunk) {
Db::table('chunks')->insert([
'chunk_uid' => $chunk['chunk_uid'],
'archive_uid' => $archive['archive_uid'],
'chunk_index' => $chunk['chunk_index'],
'page_start' => $chunk['page_start'],
'page_end' => $chunk['page_end'],
'text' => $chunk['text'],
'length' => $chunk['length'],
'embedding_status' => 0,
'embedding_ref' => null,
'embedding_model' => null,
]);
}
});
}
public function findArchive(string $archiveUid): ?array
{
$archive = Db::table('archives')->where('archive_uid', $archiveUid)->first();
if (!$archive) {
return null;
}
return $this->archiveToArray($archive);
}
public function findChunksText(string $archiveUid, int $limit = 20): string
{
$chunks = Db::table('chunks')
->where('archive_uid', $archiveUid)
->orderBy('chunk_index')
->limit($limit)
->get(['text'])
->all();
return implode("\n\n", array_map(fn ($chunk): string => (string) $chunk->text, $chunks));
}
public function updateMetadata(string $archiveUid, array $fields, array $aiMeta): void
{
$archive = $this->findArchive($archiveUid);
$metadata = $archive['metadata'] ?? [];
$metadata['ai_enrichment'] = $aiMeta;
$updates = [
'metadata' => json_encode($metadata, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
];
foreach (['title', 'summary', 'year', 'author', 'series', 'tags'] as $field) {
if (!array_key_exists($field, $fields)) {
continue;
}
$updates[$field] = $field === 'tags'
? json_encode($fields[$field] ?? [], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)
: $fields[$field];
}
Db::table('archives')->where('archive_uid', $archiveUid)->update($updates);
}
public function archiveNeedsMetadata(array $archive): bool
{
foreach (['title', 'year', 'author', 'tags', 'summary'] as $field) {
$value = $archive[$field] ?? null;
if ($field === 'title' && (($archive['metadata']['title_source'] ?? null) === 'fallback')) {
return true;
}
if (is_array($value) && $value === []) {
return true;
}
if ($field === 'year' && (!$value || (int) $value <= 0)) {
return true;
}
if (!is_array($value) && ($value === null || trim((string) $value) === '')) {
return true;
}
}
return false;
}
private function archiveToArray(object $archive): array
{
return [
'archive_uid' => $archive->archive_uid,
'title' => $archive->title,
'summary' => $archive->summary,
'year' => $archive->year,
'author' => $archive->author,
'source' => $archive->source,
'series' => $archive->series,
'tags' => json_decode($archive->tags ?? '[]', true) ?: [],
'metadata' => json_decode($archive->metadata ?? '{}', true) ?: [],
'content' => $archive->content,
'raw' => $archive->raw,
'chunks' => json_decode($archive->chunks ?? '[]', true) ?: [],
];
}
}