135 lines
4.9 KiB
PHP
135 lines
4.9 KiB
PHP
<?php
|
|
|
|
namespace app\service;
|
|
|
|
use support\Db;
|
|
|
|
class ArchiveRepository
|
|
{
|
|
public function saveImport(array $import): void
|
|
{
|
|
Db::transaction(function () use ($import): void {
|
|
$archive = $import['archive'];
|
|
$chunks = $import['chunks'];
|
|
$chunkUids = array_column($chunks, 'chunk_uid');
|
|
|
|
Db::table('archives')->updateOrInsert(
|
|
['archive_uid' => $archive['archive_uid']],
|
|
[
|
|
'title' => $archive['title'] ?? null,
|
|
'summary' => $archive['summary'] ?? null,
|
|
'year' => $archive['year'] ?? null,
|
|
'author' => $archive['author'] ?? null,
|
|
'source' => $archive['source'] ?? null,
|
|
'series' => $archive['series'] ?? null,
|
|
'tags' => json_encode($archive['tags'] ?? [], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
|
|
'metadata' => json_encode($archive['metadata'] ?? [], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
|
|
'content' => $archive['content'] ?? null,
|
|
'raw' => $archive['raw'] ?? null,
|
|
'chunks' => json_encode($chunkUids, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
|
|
]
|
|
);
|
|
|
|
Db::table('chunks')->where('archive_uid', $archive['archive_uid'])->delete();
|
|
foreach ($chunks as $chunk) {
|
|
Db::table('chunks')->insert([
|
|
'chunk_uid' => $chunk['chunk_uid'],
|
|
'archive_uid' => $archive['archive_uid'],
|
|
'chunk_index' => $chunk['chunk_index'],
|
|
'page_start' => $chunk['page_start'],
|
|
'page_end' => $chunk['page_end'],
|
|
'text' => $chunk['text'],
|
|
'length' => $chunk['length'],
|
|
'embedding_status' => 0,
|
|
'embedding_ref' => null,
|
|
'embedding_model' => null,
|
|
]);
|
|
}
|
|
});
|
|
}
|
|
|
|
public function findArchive(string $archiveUid): ?array
|
|
{
|
|
$archive = Db::table('archives')->where('archive_uid', $archiveUid)->first();
|
|
if (!$archive) {
|
|
return null;
|
|
}
|
|
|
|
return $this->archiveToArray($archive);
|
|
}
|
|
|
|
public function findChunksText(string $archiveUid, int $limit = 20): string
|
|
{
|
|
$chunks = Db::table('chunks')
|
|
->where('archive_uid', $archiveUid)
|
|
->orderBy('chunk_index')
|
|
->limit($limit)
|
|
->get(['text'])
|
|
->all();
|
|
|
|
return implode("\n\n", array_map(fn ($chunk): string => (string) $chunk->text, $chunks));
|
|
}
|
|
|
|
public function updateMetadata(string $archiveUid, array $fields, array $aiMeta): void
|
|
{
|
|
$archive = $this->findArchive($archiveUid);
|
|
$metadata = $archive['metadata'] ?? [];
|
|
$metadata['ai_enrichment'] = $aiMeta;
|
|
|
|
$updates = [
|
|
'metadata' => json_encode($metadata, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES),
|
|
];
|
|
|
|
foreach (['title', 'summary', 'year', 'author', 'series', 'tags'] as $field) {
|
|
if (!array_key_exists($field, $fields)) {
|
|
continue;
|
|
}
|
|
|
|
$updates[$field] = $field === 'tags'
|
|
? json_encode($fields[$field] ?? [], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)
|
|
: $fields[$field];
|
|
}
|
|
|
|
Db::table('archives')->where('archive_uid', $archiveUid)->update($updates);
|
|
}
|
|
|
|
public function archiveNeedsMetadata(array $archive): bool
|
|
{
|
|
foreach (['title', 'year', 'author', 'tags', 'summary'] as $field) {
|
|
$value = $archive[$field] ?? null;
|
|
if ($field === 'title' && (($archive['metadata']['title_source'] ?? null) === 'fallback')) {
|
|
return true;
|
|
}
|
|
if (is_array($value) && $value === []) {
|
|
return true;
|
|
}
|
|
if ($field === 'year' && (!$value || (int) $value <= 0)) {
|
|
return true;
|
|
}
|
|
if (!is_array($value) && ($value === null || trim((string) $value) === '')) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
private function archiveToArray(object $archive): array
|
|
{
|
|
return [
|
|
'archive_uid' => $archive->archive_uid,
|
|
'title' => $archive->title,
|
|
'summary' => $archive->summary,
|
|
'year' => $archive->year,
|
|
'author' => $archive->author,
|
|
'source' => $archive->source,
|
|
'series' => $archive->series,
|
|
'tags' => json_decode($archive->tags ?? '[]', true) ?: [],
|
|
'metadata' => json_decode($archive->metadata ?? '{}', true) ?: [],
|
|
'content' => $archive->content,
|
|
'raw' => $archive->raw,
|
|
'chunks' => json_decode($archive->chunks ?? '[]', true) ?: [],
|
|
];
|
|
}
|
|
}
|