[ 'hosts' => $config['hosts'] ?? [], 'ssl_verify' => (bool) ($config['ssl_verify'] ?? true), 'index_name' => $indexName, ], 'database' => [ 'archives_total' => (int) Db::table('archives')->count(), 'chunks_total' => (int) Db::table('chunks')->count(), 'embedded_chunks' => (int) Db::table('chunks')->where('embedding_status', 3)->count(), 'indexed_chunks' => (int) Db::table('chunks')->where('search_index_status', 3)->count(), ], 'opensearch' => [ 'reachable' => false, 'index_exists' => false, 'cluster_name' => null, 'health' => null, 'docs_count' => 0, 'mapping_fields' => [], 'error' => null, ], ]; try { $client = (new OpenSearchClientFactory())->make(); $health = $client->cluster()->health(); $status['opensearch']['reachable'] = true; $status['opensearch']['cluster_name'] = $health['cluster_name'] ?? null; $status['opensearch']['health'] = $health['status'] ?? null; $exists = (bool) $client->indices()->exists(['index' => $indexName]); $status['opensearch']['index_exists'] = $exists; if ($exists) { $stats = $client->indices()->stats(['index' => $indexName]); $mapping = $client->indices()->getMapping(['index' => $indexName]); $status['opensearch']['docs_count'] = (int) (($stats['_all']['primaries']['docs']['count'] ?? 0)); $status['opensearch']['mapping_fields'] = array_keys($mapping[$indexName]['mappings']['properties'] ?? []); } } catch (Throwable $exception) { $status['opensearch']['error'] = $exception->getMessage(); } return $status; } public function documents(string $query = '', int $size = 20): array { $size = min(50, max(1, $size)); $indexName = config('opensearch.indices.chunks', 'proofdb_chunks'); $client = (new OpenSearchClientFactory())->make(); if (!(bool) $client->indices()->exists(['index' => $indexName])) { return [ 'index_name' => $indexName, 'items' => [], 'total' => 0, ]; } $body = [ '_source' => [ 'includes' => [ 'chunk_uid', 'archive_uid', 'chunk_index', 'page_start', 'page_end', 'title', 'summary', 'source', 'author', 'year', 'series', 'tags', 'text', 'embedding_model', 'embedding_dimensions', 'created_time', 'updated_time', ], ], 'size' => $size, 'sort' => [ ['updated_time' => ['order' => 'desc']], ], ]; $query = trim($query); if ($query === '') { $body['query'] = ['match_all' => (object) []]; } else { $body['query'] = [ 'multi_match' => [ 'query' => $query, 'fields' => ['text^3', 'title^2', 'summary^2', 'source', 'author', 'tags'], 'type' => 'best_fields', ], ]; } $response = $client->search([ 'index' => $indexName, 'body' => $body, ]); $hits = $response['hits']['hits'] ?? []; return [ 'index_name' => $indexName, 'total' => (int) (($response['hits']['total']['value'] ?? 0)), 'items' => array_map(function (array $hit): array { $source = $hit['_source'] ?? []; $text = trim((string) ($source['text'] ?? '')); return [ 'score' => $hit['_score'] ?? null, 'chunk_uid' => $source['chunk_uid'] ?? ($hit['_id'] ?? null), 'archive_uid' => $source['archive_uid'] ?? null, 'chunk_index' => $source['chunk_index'] ?? null, 'page_start' => $source['page_start'] ?? null, 'page_end' => $source['page_end'] ?? null, 'title' => $source['title'] ?? null, 'summary' => $source['summary'] ?? null, 'source' => $source['source'] ?? null, 'author' => $source['author'] ?? null, 'year' => $source['year'] ?? null, 'series' => $source['series'] ?? null, 'tags' => $source['tags'] ?? [], 'text_preview' => mb_substr($text, 0, 320), 'embedding_model' => $source['embedding_model'] ?? null, 'embedding_dimensions' => $source['embedding_dimensions'] ?? null, 'created_time' => $source['created_time'] ?? null, 'updated_time' => $source['updated_time'] ?? null, ]; }, $hits), ]; } }