129 lines
4.6 KiB
PHP
129 lines
4.6 KiB
PHP
#!/usr/bin/env php
|
|
<?php
|
|
|
|
use app\service\Search\ChunkSearchIndexHandler;
|
|
use app\service\Search\ChunkSearchIndexRepository;
|
|
use app\service\Search\OpenSearchClientFactory;
|
|
use app\service\Search\OpenSearchChunkIndex;
|
|
|
|
require __DIR__ . '/../vendor/autoload.php';
|
|
require __DIR__ . '/../support/bootstrap.php';
|
|
|
|
$archiveUid = null;
|
|
$forceReset = false;
|
|
foreach (array_slice($argv, 1) as $argument) {
|
|
if (str_starts_with($argument, '--archive_uid=')) {
|
|
$archiveUid = substr($argument, strlen('--archive_uid='));
|
|
continue;
|
|
}
|
|
|
|
if ($argument === '--reset') {
|
|
$forceReset = true;
|
|
}
|
|
}
|
|
|
|
$repository = new ChunkSearchIndexRepository();
|
|
$handler = new ChunkSearchIndexHandler();
|
|
$index = new OpenSearchChunkIndex();
|
|
$clientFactory = new OpenSearchClientFactory();
|
|
$bulkSize = max(1, (int) config('opensearch.bulk.chunk_size', 500));
|
|
|
|
try {
|
|
$client = $clientFactory->make();
|
|
$indexName = config('opensearch.indices.chunks', 'proofdb_chunks');
|
|
$indexExists = (bool) $client->indices()->exists(['index' => $indexName]);
|
|
|
|
$index->ensureExists();
|
|
|
|
$totalChunks = $repository->countEmbeddedChunks($archiveUid);
|
|
if ($totalChunks === 0) {
|
|
echo 'OpenSearch reindex completed.' . PHP_EOL;
|
|
echo 'Index: ' . $indexName . PHP_EOL;
|
|
echo 'Archive filter: ' . ($archiveUid ?: '(all embedded archives)') . PHP_EOL;
|
|
echo 'Mode: nothing-to-do' . PHP_EOL;
|
|
echo 'Eligible embedded chunks: 0' . PHP_EOL;
|
|
exit(0);
|
|
}
|
|
|
|
$mode = $forceReset || !$indexExists ? 'reset' : 'resume';
|
|
$resetCount = $mode === 'reset'
|
|
? $repository->resetEmbeddedChunksToPending($archiveUid)
|
|
: $repository->resetRecoverableChunksToPending($archiveUid);
|
|
|
|
$batchCount = 0;
|
|
$indexedArchives = [];
|
|
$progress = $repository->countIndexedChunks($archiveUid);
|
|
|
|
echo 'Progress granularity: OpenSearch bulk batches (up to ' . $bulkSize . ' chunks each)' . PHP_EOL;
|
|
renderProgress($progress, $totalChunks, 'Reindexing');
|
|
|
|
while (true) {
|
|
$archiveUids = $repository->queuePendingArchiveTasks(100);
|
|
if ($archiveUids === []) {
|
|
break;
|
|
}
|
|
|
|
foreach ($archiveUids as $uid) {
|
|
$processedChunkCount = $handler->handle([
|
|
'task_type' => 'search_index',
|
|
'target_type' => 'archive',
|
|
'target_uid' => $uid,
|
|
'attempt' => 1,
|
|
]);
|
|
if ($processedChunkCount <= 0) {
|
|
continue;
|
|
}
|
|
|
|
$batchCount++;
|
|
$indexedArchives[] = $uid;
|
|
$progress = $repository->countIndexedChunks($archiveUid);
|
|
renderProgress($progress, $totalChunks, 'Reindexing');
|
|
fwrite(STDOUT, PHP_EOL . sprintf(
|
|
'Batch #%d archive=%s chunks=%d progress=%d/%d%s',
|
|
$batchCount,
|
|
$uid,
|
|
$processedChunkCount,
|
|
$progress,
|
|
$totalChunks,
|
|
PHP_EOL
|
|
));
|
|
}
|
|
}
|
|
|
|
$indexedChunks = $repository->countIndexedChunks($archiveUid);
|
|
renderProgress($indexedChunks, $totalChunks, 'Reindexing', true);
|
|
|
|
echo 'OpenSearch reindex completed.' . PHP_EOL;
|
|
echo 'Index: ' . $indexName . PHP_EOL;
|
|
echo 'Archive filter: ' . ($archiveUid ?: '(all embedded archives)') . PHP_EOL;
|
|
echo 'Mode: ' . $mode . ($forceReset ? ' (--reset)' : (!$indexExists ? ' (index was missing)' : '')) . PHP_EOL;
|
|
echo 'Eligible embedded chunks: ' . $totalChunks . PHP_EOL;
|
|
echo 'OpenSearch bulk size: ' . $bulkSize . PHP_EOL;
|
|
echo 'Reset chunks: ' . $resetCount . PHP_EOL;
|
|
echo 'Indexed archives: ' . count(array_unique($indexedArchives)) . PHP_EOL;
|
|
echo 'Processed batches: ' . $batchCount . PHP_EOL;
|
|
echo 'Indexed chunk rows now marked indexed: ' . $indexedChunks . PHP_EOL;
|
|
if ($indexedArchives !== []) {
|
|
echo 'Archives: ' . implode(', ', $indexedArchives) . PHP_EOL;
|
|
}
|
|
} catch (Throwable $exception) {
|
|
fwrite(STDERR, $exception::class . ': ' . $exception->getMessage() . PHP_EOL);
|
|
exit(1);
|
|
}
|
|
|
|
function renderProgress(int $done, int $total, string $label, bool $final = false): void
|
|
{
|
|
$total = max(1, $total);
|
|
$done = max(0, min($done, $total));
|
|
$width = 32;
|
|
$filled = (int) floor(($done / $total) * $width);
|
|
$bar = str_repeat('=', $filled) . str_repeat(' ', max(0, $width - $filled));
|
|
$percent = str_pad(number_format(($done / $total) * 100, 1), 5, ' ', STR_PAD_LEFT);
|
|
$line = sprintf("\r%s [%s] %s%% (%d/%d)", $label, $bar, $percent, $done, $total);
|
|
fwrite(STDOUT, $line);
|
|
|
|
if ($final || $done >= $total) {
|
|
fwrite(STDOUT, PHP_EOL);
|
|
}
|
|
}
|