From 11ad2ccfa61fcde494c324dcb13ad28c0529f757 Mon Sep 17 00:00:00 2001 From: Mohamed Alsharaf Date: Wed, 13 Nov 2024 10:36:54 +1300 Subject: [PATCH] Add memory timeout fix Fix copied from https://github.com/smalot/pdfparser/issues/735 This is temporary fork until the depenency is fixed --- src/Smalot/PdfParser/Font.php | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/Smalot/PdfParser/Font.php b/src/Smalot/PdfParser/Font.php index 56dee588..d7e7d1a7 100644 --- a/src/Smalot/PdfParser/Font.php +++ b/src/Smalot/PdfParser/Font.php @@ -216,21 +216,6 @@ public function loadTranslateTable(): array // Support for multiple bfrange sections if (preg_match_all('/beginbfrange(?P.*?)endbfrange/s', $content, $matches)) { foreach ($matches['sections'] as $section) { - // Support for : - $regexp = '/<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)>[ \r\n]+/is'; - - preg_match_all($regexp, $section, $matches); - - foreach ($matches['from'] as $key => $from) { - $char_from = hexdec($from); - $char_to = hexdec($matches['to'][$key]); - $offset = hexdec($matches['offset'][$key]); - - for ($char = $char_from; $char <= $char_to; ++$char) { - $this->table[$char] = self::uchr($char - $char_from + $offset); - } - } - // Support for : [ ... ] // Some PDF file has 2-byte Unicode values on new lines > added \r\n $regexp = '/<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)> *\[(?P[\r\n<>0-9A-F ]+)\][ \r\n]+/is'; @@ -256,6 +241,25 @@ public function loadTranslateTable(): array } $this->table[$char_from + $position] = $text; } + + // Remove these found matches from the bfrange section + // This prevents the regexp below from finding false matches + $section = str_replace($matches[0][$key], '', $section); + } + + // Support for : + $regexp = '/<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)> *<(?P[0-9A-F]+)>[ \r\n]+/is'; + + preg_match_all($regexp, $section, $matches); + + foreach ($matches['from'] as $key => $from) { + $char_from = hexdec($from); + $char_to = hexdec($matches['to'][$key]); + $offset = hexdec($matches['offset'][$key]); + + for ($char = $char_from; $char <= $char_to; ++$char) { + $this->table[$char] = self::uchr($char - $char_from + $offset); + } } } }