updateOrInsert( ['archive_uid' => $archive['archive_uid']], [ 'title' => $archive['title'] ?? null, 'summary' => $archive['summary'] ?? null, 'year' => $archive['year'] ?? null, 'author' => $archive['author'] ?? null, 'source' => $archive['source'] ?? null, 'series' => $archive['series'] ?? null, 'tags' => json_encode($archive['tags'] ?? [], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES), 'metadata' => json_encode($archive['metadata'] ?? [], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES), 'content' => $archive['content'] ?? null, 'raw' => $archive['raw'] ?? null, 'chunks' => json_encode($chunkUids, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES), ] ); Db::table('chunks')->where('archive_uid', $archive['archive_uid'])->delete(); foreach ($chunks as $chunk) { Db::table('chunks')->insert([ 'chunk_uid' => $chunk['chunk_uid'], 'archive_uid' => $archive['archive_uid'], 'chunk_index' => $chunk['chunk_index'], 'page_start' => $chunk['page_start'], 'page_end' => $chunk['page_end'], 'text' => $chunk['text'], 'length' => $chunk['length'], 'embedding_status' => 0, 'embedding_ref' => null, 'embedding_model' => null, 'embedding_error' => null, 'embedding_updated_at' => null, 'search_index_status' => 0, 'search_index_error' => null, 'search_index_updated_at' => null, ]); } }); } public function findArchive(string $archiveUid): ?array { $archive = Db::table('archives')->where('archive_uid', $archiveUid)->first(); if (!$archive) { return null; } return $this->archiveToArray($archive); } public function findChunksText(string $archiveUid, int $limit = 20): string { $chunks = Db::table('chunks') ->where('archive_uid', $archiveUid) ->orderBy('chunk_index') ->limit($limit) ->get(['text']) ->all(); return implode("\n\n", array_map(fn ($chunk): string => (string) $chunk->text, $chunks)); } public function findChunk(string $chunkUid): ?array { $row = Db::table('chunks') ->join('archives', 'chunks.archive_uid', '=', 'archives.archive_uid') ->where('chunks.chunk_uid', $chunkUid) ->first([ 'chunks.chunk_uid', 'chunks.archive_uid', 'chunks.chunk_index', 'chunks.page_start', 'chunks.page_end', 'chunks.text', 'chunks.length', 'chunks.embedding_status', 'chunks.embedding_ref', 'chunks.embedding_model', 'chunks.embedding_error', 'chunks.search_index_status', 'chunks.search_index_error', 'archives.title', 'archives.summary', 'archives.year', 'archives.author', 'archives.source', 'archives.series', 'archives.tags', 'archives.metadata', ]); if (!$row) { return null; } return [ 'chunk_uid' => (string) $row->chunk_uid, 'archive_uid' => (string) $row->archive_uid, 'chunk_index' => (int) $row->chunk_index, 'page_start' => $row->page_start === null ? null : (int) $row->page_start, 'page_end' => $row->page_end === null ? null : (int) $row->page_end, 'pages' => $this->pages($row->page_start, $row->page_end), 'text' => (string) $row->text, 'length' => $row->length === null ? null : (int) $row->length, 'embedding_status' => (int) $row->embedding_status, 'embedding_ref' => $this->decodeJson($row->embedding_ref ?? null, null), 'embedding_model' => $row->embedding_model, 'embedding_error' => $row->embedding_error, 'search_index_status' => (int) $row->search_index_status, 'search_index_error' => $row->search_index_error, 'archive' => [ 'archive_uid' => (string) $row->archive_uid, 'title' => $row->title, 'summary' => $row->summary, 'year' => $row->year === null ? null : (int) $row->year, 'author' => $row->author, 'source' => $row->source, 'series' => $row->series, 'tags' => $this->decodeJson($row->tags ?? null, []), 'metadata' => $this->decodeJson($row->metadata ?? null, []), ], ]; } public function findArchiveChunks(string $archiveUid): array { $rows = Db::table('chunks') ->join('archives', 'chunks.archive_uid', '=', 'archives.archive_uid') ->where('chunks.archive_uid', $archiveUid) ->orderBy('chunks.chunk_index') ->get([ 'chunks.chunk_uid', 'chunks.archive_uid', 'chunks.chunk_index', 'chunks.page_start', 'chunks.page_end', 'chunks.text', 'chunks.length', 'chunks.embedding_status', 'chunks.embedding_ref', 'chunks.embedding_model', 'chunks.embedding_error', 'chunks.search_index_status', 'chunks.search_index_error', 'archives.title', 'archives.summary', 'archives.year', 'archives.author', 'archives.source', 'archives.series', 'archives.tags', 'archives.metadata', ]) ->all(); return array_map(fn (object $row): array => $this->chunkRowToArray($row), $rows); } public function updateMetadata(string $archiveUid, array $fields, array $aiMeta): void { $archive = $this->findArchive($archiveUid); $metadata = $archive['metadata'] ?? []; $metadata['ai_enrichment'] = $aiMeta; $updates = [ 'metadata' => json_encode($metadata, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES), ]; foreach (['title', 'summary', 'year', 'author', 'series', 'tags'] as $field) { if (!array_key_exists($field, $fields)) { continue; } $updates[$field] = $field === 'tags' ? json_encode($fields[$field] ?? [], JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) : $fields[$field]; } Db::table('archives')->where('archive_uid', $archiveUid)->update($updates); } public function archiveNeedsMetadata(array $archive): bool { foreach (['title', 'year', 'author', 'tags', 'summary'] as $field) { $value = $archive[$field] ?? null; if ($field === 'title' && (($archive['metadata']['title_source'] ?? null) === 'fallback')) { return true; } if (is_array($value) && $value === []) { return true; } if ($field === 'year' && (!$value || (int) $value <= 0)) { return true; } if (!is_array($value) && ($value === null || trim((string) $value) === '')) { return true; } } return false; } private function archiveToArray(object $archive): array { return [ 'archive_uid' => $archive->archive_uid, 'title' => $archive->title, 'summary' => $archive->summary, 'year' => $archive->year, 'author' => $archive->author, 'source' => $archive->source, 'series' => $archive->series, 'tags' => json_decode($archive->tags ?? '[]', true) ?: [], 'metadata' => json_decode($archive->metadata ?? '{}', true) ?: [], 'content' => $archive->content, 'raw' => $archive->raw, 'chunks' => json_decode($archive->chunks ?? '[]', true) ?: [], ]; } private function chunkRowToArray(object $row): array { return [ 'chunk_uid' => (string) $row->chunk_uid, 'archive_uid' => (string) $row->archive_uid, 'chunk_index' => (int) $row->chunk_index, 'page_start' => $row->page_start === null ? null : (int) $row->page_start, 'page_end' => $row->page_end === null ? null : (int) $row->page_end, 'pages' => $this->pages($row->page_start, $row->page_end), 'text' => (string) $row->text, 'length' => $row->length === null ? null : (int) $row->length, 'embedding_status' => (int) $row->embedding_status, 'embedding_ref' => $this->decodeJson($row->embedding_ref ?? null, null), 'embedding_model' => $row->embedding_model, 'embedding_error' => $row->embedding_error, 'search_index_status' => (int) $row->search_index_status, 'search_index_error' => $row->search_index_error, 'archive' => [ 'archive_uid' => (string) $row->archive_uid, 'title' => $row->title, 'summary' => $row->summary, 'year' => $row->year === null ? null : (int) $row->year, 'author' => $row->author, 'source' => $row->source, 'series' => $row->series, 'tags' => $this->decodeJson($row->tags ?? null, []), 'metadata' => $this->decodeJson($row->metadata ?? null, []), ], ]; } private function decodeJson(mixed $value, mixed $fallback): mixed { if ($value === null) { return $fallback; } if (is_array($value)) { return $value; } if (!is_string($value) || trim($value) === '') { return $fallback; } $decoded = json_decode($value, true); return $decoded === null && json_last_error() !== JSON_ERROR_NONE ? $fallback : $decoded; } private function pages(mixed $pageStart, mixed $pageEnd): array { if (!is_numeric($pageStart) || !is_numeric($pageEnd)) { return array_values(array_filter([$pageStart, $pageEnd], static fn ($value): bool => $value !== null && $value !== '')); } $start = (int) $pageStart; $end = (int) $pageEnd; if ($end < $start) { $end = $start; } return range($start, $end); } }