From b41a2ec5318ac3d0073db33c2f36bab1d32728a9 Mon Sep 17 00:00:00 2001 From: remorhaz Date: Sat, 28 Apr 2018 14:23:00 +0300 Subject: [PATCH] IO: bug fixed in string buffer --- src/IO/StringBuffer.php | 2 +- src/Unicode/CharBuffer.php | 31 ++++++++- tests/CharBufferTest.php | 56 +++++++++++++++-- tests/StringBufferTest.php | 56 +++++++++++++++-- tests/Unicode/CharBufferTest.php | 105 +++++++++++++++++++++++++++++++ 5 files changed, 239 insertions(+), 11 deletions(-) diff --git a/src/IO/StringBuffer.php b/src/IO/StringBuffer.php index ce79ea2..92648d1 100644 --- a/src/IO/StringBuffer.php +++ b/src/IO/StringBuffer.php @@ -72,7 +72,7 @@ public function getTokenPosition(): TokenPosition public function getTokenAsString(): string { - return substr($this->data, $this->startOffset, $this->previewOffset); + return substr($this->data, $this->startOffset, $this->previewOffset - $this->startOffset); } public function getTokenAsArray(): array diff --git a/src/Unicode/CharBuffer.php b/src/Unicode/CharBuffer.php index c2d5371..640cd25 100644 --- a/src/Unicode/CharBuffer.php +++ b/src/Unicode/CharBuffer.php @@ -4,6 +4,7 @@ use Remorhaz\UniLex\Exception; use Remorhaz\UniLex\IO\CharBufferInterface; +use Remorhaz\UniLex\IO\TokenExtractInterface; use Remorhaz\UniLex\Lexer\Token; use Remorhaz\UniLex\Lexer\TokenFactoryInterface; use Remorhaz\UniLex\Lexer\TokenMatcherInterface; @@ -13,7 +14,7 @@ use Remorhaz\UniLex\Unicode\Grammar\TokenType; use Remorhaz\UniLex\Unicode\Grammar\Utf8TokenMatcher; -class CharBuffer implements CharBufferInterface +class CharBuffer implements CharBufferInterface, TokenExtractInterface { private $source; @@ -28,6 +29,8 @@ class CharBuffer implements CharBufferInterface private $previewOffset = 0; + private $buffer = []; + public function __construct(CharBufferInterface $source) { $this->source = $source; @@ -81,7 +84,9 @@ private function getMatchedChar(): int if ($token->getType() != TokenType::SYMBOL) { throw new Exception("Invalid Unicode char token"); } - return $token->getAttribute(TokenAttribute::UNICODE_CHAR); + $char = $token->getAttribute(TokenAttribute::UNICODE_CHAR); + $this->buffer[] = $char; + return $char; } /** @@ -109,12 +114,14 @@ public function finishToken(Token $token): void $token->setAttribute(TokenAttribute::UNICODE_CHAR_OFFSET_START, $this->startOffset); $token->setAttribute(TokenAttribute::UNICODE_CHAR_OFFSET_FINISH, $this->previewOffset); $this->startOffset = $this->previewOffset; + $this->buffer = []; } public function resetToken(): void { $this->previewOffset = $this->startOffset; $this->source->resetToken(); + $this->buffer = []; unset($this->char); } @@ -127,6 +134,26 @@ public function getTokenPosition(): TokenPosition return new TokenPosition($this->startOffset, $this->previewOffset); } + /** + * @return string + * @throws Exception + */ + public function getTokenAsString(): string + { + if ($this->source instanceof TokenExtractInterface) { + return $this->source->getTokenAsString(); + } + throw new Exception("Source buffer doesn't support extracting strings"); + } + + /** + * @return array + */ + public function getTokenAsArray(): array + { + return $this->buffer; + } + private function getMatcher(): TokenMatcherInterface { if (!isset($this->matcher)) { diff --git a/tests/CharBufferTest.php b/tests/CharBufferTest.php index d9fc553..29551eb 100644 --- a/tests/CharBufferTest.php +++ b/tests/CharBufferTest.php @@ -124,7 +124,7 @@ public function testGetTokenPosition_NextSymbolAndFinishTokenCalled_ReturnsMatch self::assertSame(1, $position->getFinishOffset()); } - public function testGetTokenAsArray_EmptyPosition_ReturnsEmptyArray(): void + public function testGetTokenAsArray_NextSymbolNotCalled_ReturnsEmptyArray(): void { $actualValue = (new CharBuffer(0x61))->getTokenAsArray(); self::assertSame([], $actualValue); @@ -133,7 +133,7 @@ public function testGetTokenAsArray_EmptyPosition_ReturnsEmptyArray(): void /** * @throws \Remorhaz\UniLex\Exception */ - public function testGetTokenAsArray_NotEmptyPosition_ReturnsMatchingArray(): void + public function testGetTokenAsArray_NextSymbolCalledTwice_ReturnsMatchingArray(): void { $buffer = new CharBuffer(0x61, 0x62); $buffer->nextSymbol(); @@ -145,7 +145,31 @@ public function testGetTokenAsArray_NotEmptyPosition_ReturnsMatchingArray(): voi /** * @throws \Remorhaz\UniLex\Exception */ - public function testGetTokenAsString_EmptyPosition_ReturnsEmptyString(): void + public function testGetTokenAsArray_NextSymbolAndResetTokenCalled_ReturnsEmptyArray(): void + { + $buffer = new CharBuffer(0x61, 0x62); + $buffer->nextSymbol(); + $buffer->resetToken(); + $actualValue = $buffer->getTokenAsArray(); + self::assertSame([], $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsArray_NextSymbolAndFinishTokenCalled_ReturnsEmptyArray(): void + { + $buffer = new CharBuffer(0x61, 0x62); + $buffer->nextSymbol(); + $buffer->finishToken(new Token(0, false)); + $actualValue = $buffer->getTokenAsArray(); + self::assertSame([], $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsString_NextSymbolNotCalled_ReturnsEmptyString(): void { $actualValue = (new CharBuffer(0x61))->getTokenAsString(); self::assertSame('', $actualValue); @@ -154,7 +178,7 @@ public function testGetTokenAsString_EmptyPosition_ReturnsEmptyString(): void /** * @throws \Remorhaz\UniLex\Exception */ - public function testGetTokenAsString_NotEmptyPosition_ReturnsMatchingArray(): void + public function testGetTokenAsString_NextSymbolCalledTwice_ReturnsMatchingString(): void { $buffer = new CharBuffer(0x61, 0x62); $buffer->nextSymbol(); @@ -163,6 +187,30 @@ public function testGetTokenAsString_NotEmptyPosition_ReturnsMatchingArray(): vo self::assertSame('ab', $actualValue); } + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsString_NextSymbolAndResetTokenCalled_ReturnsEmptyString(): void + { + $buffer = new CharBuffer(0x61, 0x62); + $buffer->nextSymbol(); + $buffer->resetToken(); + $actualValue = $buffer->getTokenAsString(); + self::assertSame('', $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsString_NextSymbolAndFinishTokenCalled_ReturnsEmptyString(): void + { + $buffer = new CharBuffer(0x61, 0x62); + $buffer->nextSymbol(); + $buffer->finishToken(new Token(0, false)); + $actualValue = $buffer->getTokenAsString(); + self::assertSame('', $actualValue); + } + /** * @throws \Remorhaz\UniLex\Exception * @expectedException \Remorhaz\UniLex\Exception diff --git a/tests/StringBufferTest.php b/tests/StringBufferTest.php index 5515544..42d9373 100644 --- a/tests/StringBufferTest.php +++ b/tests/StringBufferTest.php @@ -124,7 +124,7 @@ public function testGetTokenPosition_NextSymbolAndFinishTokenCalled_ReturnsMatch self::assertSame(1, $position->getFinishOffset()); } - public function testGetTokenAsArray_EmptyPosition_ReturnsEmptyArray(): void + public function testGetTokenAsArray_NextSymbolNotCalled_ReturnsEmptyArray(): void { $actualValue = (new StringBuffer('a'))->getTokenAsArray(); self::assertSame([], $actualValue); @@ -133,7 +133,7 @@ public function testGetTokenAsArray_EmptyPosition_ReturnsEmptyArray(): void /** * @throws \Remorhaz\UniLex\Exception */ - public function testGetTokenAsArray_NotEmptyPosition_ReturnsMatchingArray(): void + public function testGetTokenAsArray_NextSymbolCalledTwice_ReturnsMatchingArray(): void { $buffer = new StringBuffer('ab'); $buffer->nextSymbol(); @@ -142,7 +142,31 @@ public function testGetTokenAsArray_NotEmptyPosition_ReturnsMatchingArray(): voi self::assertSame([0x61, 0x62], $actualValue); } - public function testGetTokenAsString_EmptyPosition_ReturnsEmptyString(): void + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsArray_NextSymbolAndResetTokenCalled_ReturnsEmptyArray(): void + { + $buffer = new StringBuffer('ab'); + $buffer->nextSymbol(); + $buffer->resetToken(); + $actualValue = $buffer->getTokenAsArray(); + self::assertSame([], $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsArray_NextSymbolAndFinishTokenCalled_ReturnsEmptyArray(): void + { + $buffer = new StringBuffer('ab'); + $buffer->nextSymbol(); + $buffer->finishToken(new Token(0, false)); + $actualValue = $buffer->getTokenAsArray(); + self::assertSame([], $actualValue); + } + + public function testGetTokenAsString_NextSymbolNotCalled_ReturnsEmptyString(): void { $actualValue = (new StringBuffer('a'))->getTokenAsString(); self::assertSame('', $actualValue); @@ -152,7 +176,7 @@ public function testGetTokenAsString_EmptyPosition_ReturnsEmptyString(): void /** * @throws \Remorhaz\UniLex\Exception */ - public function testGetTokenAsString_NotEmptyPosition_ReturnsMatchingArray(): void + public function testGetTokenAsString_NextSymbolCalledTwice_ReturnsMatchingString(): void { $buffer = new StringBuffer('ab'); $buffer->nextSymbol(); @@ -160,4 +184,28 @@ public function testGetTokenAsString_NotEmptyPosition_ReturnsMatchingArray(): vo $actualValue = $buffer->getTokenAsString(); self::assertSame('ab', $actualValue); } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsString_NextSymbolAndResetTokenCalled_ReturnsEmptyString(): void + { + $buffer = new StringBuffer('ab'); + $buffer->nextSymbol(); + $buffer->resetToken(); + $actualValue = $buffer->getTokenAsString(); + self::assertSame('', $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsString_NextSymbolAndFinishTokenCalled_ReturnsEmptyString(): void + { + $buffer = new StringBuffer('ab'); + $buffer->nextSymbol(); + $buffer->finishToken(new Token(0, false)); + $actualValue = $buffer->getTokenAsString(); + self::assertSame('', $actualValue); + } } diff --git a/tests/Unicode/CharBufferTest.php b/tests/Unicode/CharBufferTest.php index 4fee788..24e57a8 100644 --- a/tests/Unicode/CharBufferTest.php +++ b/tests/Unicode/CharBufferTest.php @@ -211,6 +211,103 @@ public function testResetToken_NextSymbolCalled_FinishTokenSetsZeroByteOffsetsIn self::assertSame(0, $token->getAttribute(TokenAttribute::UNICODE_BYTE_OFFSET_FINISH)); } + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsString_NextSymbolNotCalled_ReturnsEmptyString(): void + { + $source = new StringBuffer('a'); + $buffer = new CharBuffer($source); + $actualValue = $buffer->getTokenAsString(); + self::assertSame('', $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsString_NextSymbolCalledTwice_ReturnsMatchingString(): void + { + $source = new StringBuffer('ab'); + $buffer = new CharBuffer($source); + $buffer->nextSymbol(); + $buffer->nextSymbol(); + $actualValue = $buffer->getTokenAsString(); + self::assertSame('ab', $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsString_NextSymbolAndResetTokenCalled_ReturnsEmptyString(): void + { + $source = new StringBuffer('ab'); + $buffer = new CharBuffer($source); + $buffer->nextSymbol(); + $buffer->resetToken(); + $actualValue = $buffer->getTokenAsString(); + self::assertSame('', $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsString_NextSymbolAndFinishTokenCalled_ReturnsEmptyString(): void + { + $source = new StringBuffer('ab'); + $buffer = new CharBuffer($source); + $buffer->nextSymbol(); + $buffer->finishToken(new Token(0, false)); + $actualValue = $buffer->getTokenAsString(); + self::assertSame('', $actualValue); + } + + public function testGetTokenAsArray_NextSymbolNotCalled_ReturnsEmptyArray(): void + { + $source = new StringBuffer('ab'); + $buffer = new CharBuffer($source); + $actualValue = $buffer->getTokenAsArray(); + self::assertSame([], $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsArray_NextSymbolCalledTwice_ReturnsMatchingArray(): void + { + $source = new StringBuffer('ab'); + $buffer = new CharBuffer($source); + $buffer->nextSymbol(); + $buffer->nextSymbol(); + $actualValue = $buffer->getTokenAsArray(); + self::assertSame([0x61, 0x62], $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsArray_NextSymbolAndResetTokenCalled_ReturnsEmptyArray(): void + { + $source = new StringBuffer('ab'); + $buffer = new CharBuffer($source); + $buffer->nextSymbol(); + $buffer->resetToken(); + $actualValue = $buffer->getTokenAsArray(); + self::assertSame([], $actualValue); + } + + /** + * @throws \Remorhaz\UniLex\Exception + */ + public function testGetTokenAsArray_NextSymbolAndFinishTokenCalled_ReturnsEmptyArray(): void + { + $source = new StringBuffer('ab'); + $buffer = new CharBuffer($source); + $buffer->nextSymbol(); + $buffer->finishToken(new Token(0, false)); + $actualValue = $buffer->getTokenAsArray(); + self::assertSame([], $actualValue); + } + private function createTokenMatcherThatNeverMatches(): TokenMatcherInterface { return new class implements TokenMatcherInterface @@ -221,6 +318,10 @@ public function match(CharBufferInterface $buffer, TokenFactoryInterface $tokenF return false; } + /** + * @return Token + * @throws Exception + */ public function getToken(): Token { throw new Exception("Not implemented"); @@ -238,6 +339,10 @@ public function createToken(int $tokenId): Token return new Token(TokenType::INVALID_BYTES, false); } + /** + * @return Token + * @throws Exception + */ public function createEoiToken(): Token { throw new Exception("Not implemented");