Files
documenter-mono/app/Services/DocxVariableExtractor.php
2025-10-31 16:48:05 +09:00

250 lines
8.7 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace App\Services;
use Illuminate\Support\Str;
use PhpOffice\PhpWord\IOFactory;
class DocxVariableExtractor
{
public function extractVariables($docxPath): array
{
$phpWord = IOFactory::load($docxPath);
$variables = [];
$textBuffer = '';
foreach ($phpWord->getSections() as $section) {
$this->extractFromSection($section, $variables, $textBuffer);
}
// Проверяем остаток в буфере
$this->extractFromText($textBuffer, $variables);
// Убираем дубликаты по полю 'name'
$uniqueVariables = [];
$seenNames = [];
foreach ($variables as $variable) {
if (!in_array($variable['name'], $seenNames)) {
$uniqueVariables[] = $variable;
$seenNames[] = $variable['name'];
}
}
return $uniqueVariables;
}
private function extractFromSection($section, array &$variables, string &$textBuffer): void
{
foreach ($section->getElements() as $element) {
$this->extractFromElement($element, $variables, $textBuffer);
}
}
private function extractFromElement($element, array &$variables, string &$textBuffer): void
{
$elementType = get_class($element);
switch ($elementType) {
case 'PhpOffice\PhpWord\Element\TextRun':
$this->extractFromTextRun($element, $variables, $textBuffer);
break;
case 'PhpOffice\PhpWord\Element\Text':
$textBuffer .= $element->getText();
$this->extractFromText($textBuffer, $variables);
break;
case 'PhpOffice\PhpWord\Element\Table':
$this->extractFromTable($element, $variables, $textBuffer);
break;
case 'PhpOffice\PhpWord\Element\Header':
case 'PhpOffice\PhpWord\Element\Footer':
$this->extractFromSection($element, $variables, $textBuffer);
break;
default:
// Сбрасываем буфер при смене типа элемента
$textBuffer = '';
break;
}
}
private function extractFromTextRun($textRun, array &$variables, string &$textBuffer): void
{
foreach ($textRun->getElements() as $element) {
if ($element instanceof \PhpOffice\PhpWord\Element\Text) {
$textBuffer .= $element->getText();
}
}
// Проверяем буфер после каждого TextRun
$this->extractFromText($textBuffer, $variables);
$textBuffer = ''; // Сбрасываем буфер после TextRun
}
private function extractFromTable($table, array &$variables, string &$textBuffer): void
{
foreach ($table->getRows() as $row) {
foreach ($row->getCells() as $cell) {
$cellBuffer = '';
foreach ($cell->getElements() as $element) {
if ($element instanceof \PhpOffice\PhpWord\Element\Text) {
$cellBuffer .= $element->getText();
} elseif ($element instanceof \PhpOffice\PhpWord\Element\TextRun) {
foreach ($element->getElements() as $textElement) {
if ($textElement instanceof \PhpOffice\PhpWord\Element\Text) {
$cellBuffer .= $textElement->getText();
}
}
}
}
$this->extractFromText($cellBuffer, $variables);
}
}
}
private function extractFromText(string $text, array &$variables): void
{
// Ищем переменные в формате ${variable_name}
preg_match_all('/\$\{\s*([a-zA-Zа-яА-ЯёЁ0-9_]+)\s*\}/u', $text, $matches);
if (!empty($matches[1])) {
foreach ($matches[0] as $index => $fullMatch) {
$variables[] = [
'name' => $fullMatch, // Полное выражение с ${}
'label' => $matches[1][$index], // Только содержимое внутри {}
'type' => 'text'
];
}
}
}
// Альтернативный метод: чтение напрямую из XML
public function extractVariablesFromXml($docxPath): array
{
$variables = [];
// Временная распаковка DOCX
$zip = new \ZipArchive();
if ($zip->open($docxPath) === TRUE) {
// Читаем document.xml
$documentXml = $zip->getFromName('word/document.xml');
if ($documentXml) {
// Ищем все текстовые узлы
preg_match_all('/(\{\{\s*[a-zA-Zа-яА-ЯёЁ0-9_]+\s*\}\})/u', $documentXml, $matches);
foreach ($matches[0] as $match) {
// Извлекаем имя переменной
preg_match('/\{\{\s*([a-zA-Zа-яА-ЯёЁ0-9_]+)\s*\}\}/u', $match, $varMatches);
if (isset($varMatches[1])) {
$variables[] = $varMatches[1];
}
}
}
// Также проверяем headers и footers
for ($i = 0; $i < $zip->numFiles; $i++) {
$filename = $zip->getNameIndex($i);
if (preg_match('/word\/header\d+\.xml/', $filename) ||
preg_match('/word\/footer\d+\.xml/', $filename)) {
$content = $zip->getFromName($filename);
preg_match_all('/(\{\{\s*[a-zA-Zа-яА-ЯёЁ0-9_]+\s*\}\})/u', $content, $matches);
foreach ($matches[0] as $match) {
preg_match('/\{\{\s*([a-zA-Zа-яА-ЯёЁ0-9_]+)\s*\}\}/u', $match, $varMatches);
if (isset($varMatches[1])) {
$variables[] = $varMatches[1];
}
}
}
}
$zip->close();
}
return array_unique($variables);
}
// Комбинированный метод
public function extractVariablesCombined($docxPath): array
{
$variables1 = $this->extractVariables($docxPath);
$variables2 = $this->extractVariablesFromXml($docxPath);
return array_unique(array_merge($variables1, $variables2));
}
public function validateVariables(array $variables): array
{
$validated = [];
foreach ($variables as $variable) {
if (preg_match('/^[a-zA-Zа-яА-ЯёЁ0-9_]+$/u', $variable)) {
$validated[] = $variable;
}
}
return array_unique($validated);
}
public function getVariablesWithExamples($docxPath): array
{
$variables = $this->extractVariablesCombined($docxPath);
$result = [];
foreach ($variables as $variable) {
$result[$variable] = [
'name' => $variable,
'human_name' => $this->makeHumanReadable($variable),
'example' => $this->generateExample($variable),
'type' => $this->detectType($variable)
];
}
return $result;
}
private function makeHumanReadable(string $variable): string
{
$readable = str_replace('_', ' ', $variable);
$readable = mb_strtolower($readable, 'UTF-8');
$readable = mb_convert_case($readable, MB_CASE_TITLE, 'UTF-8');
return $readable;
}
private function generateExample(string $variable): string
{
$examples = [
'name' => 'Иван Иванов',
'date' => '15.01.2024',
'number' => '123-2024',
'company' => 'ООО "Рога и копыта"',
'address' => 'г. Москва, ул. Примерная, д. 1',
'amount' => '100 000 руб.',
'quantity' => '5',
'price' => '20 000 руб.'
];
foreach ($examples as $key => $example) {
if (stripos($variable, $key) !== false) {
return $example;
}
}
return 'Пример значения';
}
private function detectType(string $variable): string
{
if (stripos($variable, 'date') !== false) return 'date';
if (stripos($variable, 'amount') !== false || stripos($variable, 'price') !== false) return 'money';
if (stripos($variable, 'quantity') !== false || stripos($variable, 'number') !== false) return 'number';
if (stripos($variable, 'list') !== false || stripos($variable, 'items') !== false) return 'array';
return 'text';
}
}