client(); $index = $this->indexName(); if ($client->indices()->exists(['index' => $index])) { $this->ensureProperties($client, $index); return; } $client->indices()->create([ 'index' => $index, 'body' => $this->mapping(), ]); } public function bulkIndex(array $documents): array { if ($documents === []) { return ['items' => [], 'errors' => false]; } $body = []; foreach ($documents as $document) { $body[] = [ 'index' => [ '_index' => $this->indexName(), '_id' => $document['chunk_uid'], ], ]; $body[] = $document; } return $this->client()->bulk([ 'refresh' => config('opensearch.bulk.refresh', 'false'), 'body' => $body, ]); } public function mapping(): array { return [ 'settings' => [ 'index' => [ 'knn' => true, ], ], 'mappings' => [ 'properties' => [ 'chunk_uid' => ['type' => 'keyword'], 'archive_uid' => ['type' => 'keyword'], 'chunk_index' => ['type' => 'integer'], 'page_start' => ['type' => 'integer'], 'page_end' => ['type' => 'integer'], 'title' => $this->textWithKeyword(), 'summary' => ['type' => 'text'], 'source' => $this->textWithKeyword(), 'author' => $this->textWithKeyword(), 'year' => ['type' => 'integer'], 'series' => $this->textWithKeyword(), 'tags' => ['type' => 'keyword'], 'text' => ['type' => 'text'], 'embedding' => [ 'type' => 'knn_vector', 'dimension' => (int) config('opensearch.vector.dimensions', 2048), 'method' => [ 'name' => 'hnsw', 'space_type' => config('opensearch.vector.space_type', 'cosinesimil'), 'engine' => config('opensearch.vector.engine', 'lucene'), ], ], 'embedding_model' => ['type' => 'keyword'], 'embedding_dimensions' => ['type' => 'integer'], 'created_time' => ['type' => 'date'], 'updated_time' => ['type' => 'date'], ], ], ]; } private function client(): Client { return $this->client ?? (new OpenSearchClientFactory())->make(); } private function ensureProperties(Client $client, string $index): void { $mapping = $client->indices()->getMapping(['index' => $index]); $existing = $mapping[$index]['mappings']['properties'] ?? []; $desired = $this->mapping()['mappings']['properties'] ?? []; $missing = []; foreach ($desired as $field => $definition) { if (!array_key_exists($field, $existing)) { $missing[$field] = $definition; } } if ($missing === []) { return; } $client->indices()->putMapping([ 'index' => $index, 'body' => [ 'properties' => $missing, ], ]); } private function indexName(): string { return config('opensearch.indices.chunks', 'proofdb_chunks'); } private function textWithKeyword(): array { return [ 'type' => 'text', 'fields' => [ 'keyword' => [ 'type' => 'keyword', 'ignore_above' => 512, ], ], ]; } }