/*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
# include <fstream>
# include <iostream>
# include <vector>
# include <map>
# include <stack>
# include <string.h>
# include "Parser.h"
# include "Object.h"
# include "Exception.h"
# include "Utils.h"
using namespace merge_lib ;
using namespace std ;
const std : : string Parser : : WHITESPACES ( " \t \f \v \n \r " ) ;
const std : : string Parser : : DELIMETERS ( " ()<>{}/%][ " ) ;
const std : : string Parser : : NUMBERS ( " 0123456789 " ) ;
const std : : string Parser : : WHITESPACES_AND_DELIMETERS = Parser : : WHITESPACES + Parser : : DELIMETERS ;
Document * Parser : : parseDocument ( const char * fileName )
{
_document = new Document ( fileName ) ;
try
{
_createObjectTree ( fileName ) ;
_createDocument ( fileName ) ;
}
catch ( std : : exception & )
{
_clearParser ( ) ;
delete _document ;
_document = NULL ;
throw ;
}
return _document ;
}
void Parser : : _retrieveAllPages ( Object * objectWithKids )
{
std : : string & objectContent = objectWithKids - > getObjectContent ( ) ;
unsigned int startOfKids = objectContent . find ( " /Kids " ) ;
unsigned int endOfKids = objectContent . find ( " ] " , startOfKids ) ;
if (
( startOfKids = = std : : string : : npos ) & &
( objectContent . find ( " /Page " ) ! = std : : string : : npos )
)
{
unsigned int numberOfPages = _document - > _pages . size ( ) + 1 ;
Page * newPage = new Page ( numberOfPages ) ;
newPage - > _root = objectWithKids ;
_document - > _pages . insert ( std : : pair < unsigned int , Page * > ( numberOfPages , newPage ) ) ;
return ;
}
const std : : vector < Object * > & kids = objectWithKids - > getSortedByPositionChildren ( startOfKids , endOfKids ) ;
for ( size_t i ( 0 ) ; i < kids . size ( ) ; + + i )
{
_retrieveAllPages ( kids [ i ] ) ;
}
}
void Parser : : _createDocument ( const char * docName )
{
_document - > _root = _root ;
Object * objectWithPages = 0 ;
std : : string & rootContent = _root - > getObjectContent ( ) ;
unsigned int startOfPages = rootContent . find ( " /Pages " ) ;
if ( startOfPages = = std : : string : : npos )
throw Exception ( " Some document is wrong " ) ;
unsigned int endOfPages = rootContent . find ( " R " , startOfPages ) ;
std : : vector < Object * > objectWithKids = _root - > getChildrenByBounds ( startOfPages , endOfPages ) ;
if ( objectWithKids . size ( ) ! = 1 )
throw Exception ( " Some document is wrong " ) ;
_retrieveAllPages ( objectWithKids [ 0 ] ) ;
_root - > retrieveMaxObjectNumber ( _document - > _maxObjectNumber ) ;
_clearParser ( ) ;
}
void Parser : : _clearParser ( )
{
_root = 0 ;
_fileContent . clear ( ) ;
_fileContent . reserve ( ) ;
_objects . clear ( ) ;
}
void Parser : : _getFileContent ( const char * fileName )
{
ifstream pdfFile ;
pdfFile . open ( fileName , ios : : binary ) ;
if ( pdfFile . fail ( ) )
{
stringstream errorMessage ( " File " ) ;
errorMessage < < fileName < < " is absent " < < " \0 " ;
throw Exception ( errorMessage ) ;
}
// get length of file:
pdfFile . seekg ( 0 , ios : : end ) ;
int length = pdfFile . tellg ( ) ;
pdfFile . seekg ( 0 , ios : : beg ) ;
_fileContent . resize ( length ) ;
pdfFile . read ( & _fileContent [ 0 ] , length ) ;
// check version
const char * header = " %PDF-1. " ;
size_t verPos = _fileContent . find ( header ) ;
if ( verPos = = 0 )
{
verPos + = strlen ( header ) ;
char ver = _fileContent [ verPos ] ;
if ( ver < ' 0 ' | | ver > ' 4 ' )
{
stringstream errorMsg ;
errorMsg < < " File with verion 1. " < < ver < < " is not currently supported by merge library \n " ;
throw Exception ( errorMsg ) ;
}
}
else
{
throw Exception ( " Unrecognized header of PDF file " ) ;
}
pdfFile . close ( ) ;
}
void Parser : : _createObjectTree ( const char * fileName )
{
unsigned int rootObjectNumber = 0 ;
try
{
_getFileContent ( fileName ) ;
_readXRefAndCreateObjects ( ) ;
rootObjectNumber = _readTrailerAndReturnRoot ( ) ;
}
catch ( std : : exception & )
{
std : : map < unsigned int , Object * > : : const_iterator it ( _objects . begin ( ) ) ;
for ( ; it ! = _objects . end ( ) ; it + + )
{
delete ( * it ) . second ;
}
_objects . clear ( ) ;
throw ;
}
std : : map < unsigned int , Object * > : : iterator objectsIterator ;
for ( objectsIterator = _objects . begin ( ) ; objectsIterator ! = _objects . end ( ) ; objectsIterator + + )
{
Object * currentObject = ( * objectsIterator ) . second ;
_document - > _allObjects . push_back ( currentObject ) ;
//key - object number : value - positions in object content of this reference
const std : : map < unsigned int , Object : : ReferencePositionsInContent > & refs =
_getReferences ( currentObject - > getObjectContent ( ) ) ;
std : : map < unsigned int , Object : : ReferencePositionsInContent > : : const_iterator refsIterator = refs . begin ( ) ;
for ( ; refsIterator ! = refs . end ( ) ; + + refsIterator )
{
if ( _objects . count ( ( * refsIterator ) . first ) )
currentObject - > addChild ( _objects [ ( * refsIterator ) . first ] , ( * refsIterator ) . second ) ;
}
}
_root = _objects [ rootObjectNumber ] ;
}
const std : : map < unsigned int , Object : : ReferencePositionsInContent > & Parser : : _getReferences ( const std : : string & objectContent )
{
unsigned int currentPosition ( 0 ) , startOfNextSearch ( 0 ) ;
static std : : map < unsigned int , std : : vector < unsigned int > > searchResult ;
searchResult . clear ( ) ;
unsigned int streamStart = objectContent . find ( " stream " ) ;
if ( streamStart = = string : : npos )
streamStart = objectContent . size ( ) ;
while ( startOfNextSearch < streamStart )
{
//try to find reference. reference example is 15 0 R
startOfNextSearch = objectContent . find ( " R " , startOfNextSearch ) ;
currentPosition = startOfNextSearch ;
if ( currentPosition ! = std : : string : : npos )
{
//check that next character of " R" is WHITESPACE.
if ( ( WHITESPACES . find ( objectContent [ currentPosition + 2 ] ) = = string : : npos ) & &
( DELIMETERS . find ( objectContent [ currentPosition + 2 ] ) = = string : : npos )
)
{
//this is not reference. this is something looks like "0 0 0 RG"
+ + startOfNextSearch ;
continue ;
}
//get previos symbol and check that it is a number
unsigned int numberSearchCounter = _skipNumber ( objectContent , - - currentPosition ) ;
//previos symbol is not a number
if ( numberSearchCounter = = currentPosition )
{
+ + startOfNextSearch ;
continue ;
}
else
{
currentPosition = numberSearchCounter ;
}
bool isFound ( false ) ;
//previos symbols should be whitespaces
while ( ( objectContent [ currentPosition ] = = ' ' ) & & - - currentPosition )
{
isFound = true ;
}
//previos symbol is not a whitespace
if ( ! isFound )
{
+ + startOfNextSearch ;
continue ;
}
//check that this and may be previos symbols are a numbers
numberSearchCounter = _skipNumber ( objectContent , currentPosition ) ;
if ( numberSearchCounter = = currentPosition )
{
+ + startOfNextSearch ;
continue ;
}
unsigned int objectNumber = Utils : : stringToInt ( objectContent . substr ( numberSearchCounter + 1 , currentPosition - numberSearchCounter ) ) ;
searchResult [ objectNumber ] . push_back ( numberSearchCounter + 1 ) ;
+ + startOfNextSearch ;
}
else
break ;
}
return searchResult ;
}
unsigned int Parser : : _skipNumber ( const std : : string & str , unsigned int currentPosition )
{
unsigned int numberSearchCounter = currentPosition ;
while ( ( NUMBERS . find ( str [ numberSearchCounter ] ) ! = string : : npos ) & & - - numberSearchCounter )
{ }
return numberSearchCounter ;
}
void Parser : : _readXRefAndCreateObjects ( )
{
unsigned int currentPostion = _getStartOfXrefWithRoot ( ) ;
do
{
const std : : string & currentToken = _getNextToken ( currentPostion ) ;
if ( currentToken ! = " xref " )
{
throw Exception ( " Wrong xref in some document " ) ;
}
unsigned int endOfLine = _getEndOfLineFromContent ( currentPostion ) ;
if ( _countTokens ( currentPostion , endOfLine ) ! = 2 )
{
throw Exception ( " Wrong xref in some document " ) ;
}
//now we are reading the xref
while ( 1 )
{
unsigned int firstObjectNumber = Utils : : stringToInt ( _getNextToken ( currentPostion ) ) ;
unsigned int objectCount = Utils : : stringToInt ( _getNextToken ( currentPostion ) ) ;
for ( unsigned int i ( 0 ) ; i < objectCount ; i + + )
{
unsigned long first ;
unsigned long second ;
if ( _countTokens ( currentPostion , _getEndOfLineFromContent ( currentPostion ) ) = = 3 )
{
first = Utils : : stringToInt ( _getNextToken ( currentPostion ) ) ;
second = Utils : : stringToInt ( _getNextToken ( currentPostion ) ) ;
const string & use = _getNextToken ( currentPostion ) ;
if ( ! use . compare ( " n " ) )
{
unsigned int objectNumber ;
try
{
std : : pair < unsigned int , unsigned int > streamBounds ;
bool hasObjectStream ;
unsigned int generationNumber ;
const std : : string content = _getObjectContent ( first , objectNumber , generationNumber , streamBounds , hasObjectStream ) ;
if ( ! _objects . count ( objectNumber ) )
{
Object * newObject = new Object ( objectNumber , generationNumber , content , _document - > _documentName , streamBounds , hasObjectStream ) ;
_objects [ objectNumber ] = newObject ;
}
}
catch ( std : : exception & )
{
}
}
}
else
{
;
}
+ + currentPostion ;
}
unsigned int previosPostion = currentPostion ;
const std : : string & isTrailer = _getNextToken ( currentPostion ) ;
std : : string trailer ( " trailer " ) ;
if ( isTrailer = = trailer )
{
currentPostion - = trailer . size ( ) ;
break ;
}
else
currentPostion = previosPostion ;
}
}
while ( _readTrailerAndRterievePrev ( currentPostion , currentPostion ) ) ;
}
unsigned int Parser : : _getStartOfXrefWithRoot ( )
{
unsigned int leftBoundOfStartOfXref = _fileContent . rfind ( " startxref " ) ;
leftBoundOfStartOfXref = _fileContent . find_first_of ( NUMBERS , leftBoundOfStartOfXref ) ;
unsigned int rightBoundOfStartOfXref = _fileContent . find_first_not_of ( NUMBERS , leftBoundOfStartOfXref + 1 ) ;
std : : string startOfXref = _fileContent . substr ( leftBoundOfStartOfXref , rightBoundOfStartOfXref - leftBoundOfStartOfXref ) ;
int integerStartOfXref = Utils : : stringToInt ( startOfXref ) ;
return integerStartOfXref ;
}
unsigned int Parser : : _getEndOfLineFromContent ( unsigned int fromPosition )
{
fromPosition = _skipWhiteSpacesFromContent ( fromPosition ) ;
unsigned int endOfLine = _fileContent . find_first_of ( " \n \r " , fromPosition ) ;
endOfLine = _fileContent . find_last_of ( " \n \r " , endOfLine ) ;
return endOfLine ;
}
const std : : pair < unsigned int , unsigned int > & Parser : : _getLineBounds ( const std : : string & str , unsigned int fromPosition )
{
static std : : pair < unsigned int , unsigned int > bounds ;
bounds . first = str . rfind ( ' \n ' , fromPosition ) ;
if ( bounds . first = = string : : npos )
bounds . first = 0 ;
bounds . second = str . find ( ' \n ' , fromPosition ) ;
if ( bounds . second = = string : : npos )
bounds . second = str . size ( ) ;
return bounds ;
}
const std : : string & Parser : : _getNextToken ( unsigned int & fromPosition )
{
fromPosition = _skipWhiteSpacesFromContent ( fromPosition ) ;
unsigned int position = _fileContent . find_first_of ( WHITESPACES , fromPosition ) ;
static std : : string token ;
if ( position > fromPosition )
{
unsigned int tokenSize = position - fromPosition ;
token . resize ( tokenSize ) ;
memcpy ( & token [ 0 ] , & _fileContent [ fromPosition ] , tokenSize ) ;
fromPosition = position ;
return token ;
}
else
{
//TODO throw exception
}
token = " " ;
return token ;
}
unsigned int Parser : : _countTokens ( unsigned int leftBound , unsigned int rightBount )
{
unsigned int position = _skipWhiteSpacesFromContent ( leftBound ) ;
unsigned int tokensCount = 0 ;
while ( position < rightBount )
{
position = _fileContent . find_first_of ( WHITESPACES , position ) ;
if ( position ! = string : : npos )
+ + tokensCount ;
//start search from next symbol
+ + position ;
}
return tokensCount ;
}
unsigned int Parser : : _skipWhiteSpaces ( const std : : string & str , unsigned int fromPosition )
{
unsigned int position = fromPosition ;
if ( WHITESPACES . find ( str [ 0 ] ) ! = string : : npos )
position = str . find_first_not_of ( WHITESPACES , position ) ;
return position ;
}
unsigned int Parser : : _skipWhiteSpacesFromContent ( unsigned int fromPosition )
{
unsigned int position = fromPosition ;
if ( WHITESPACES . find ( _fileContent [ position ] ) ! = string : : npos )
position = _fileContent . find_first_not_of ( WHITESPACES , position ) ; // + 1;
return position ;
}
const std : : string & Parser : : _getObjectContent ( unsigned int objectPosition , unsigned int & objectNumber , unsigned int & generationNumber , std : : pair < unsigned int , unsigned int > & streamBounds , bool & hasObjectStream )
{
hasObjectStream = false ;
unsigned int currentPosition = objectPosition ;
std : : string token = _getNextToken ( currentPosition ) ; // number of object
objectNumber = Utils : : stringToInt ( token ) ;
token = _getNextToken ( currentPosition ) ; // generation number - not interesting
generationNumber = Utils : : stringToInt ( token ) ;
token = Parser : : getNextToken ( _fileContent , currentPosition ) ;
if ( token ! = " obj " )
{
std : : stringstream strOut ;
strOut < < " Wrong object in PDF, in position " < < currentPosition < < " cannot continue! \n " ;
throw Exception ( strOut . str ( ) ) ;
}
static std : : string objectContent ;
size_t contentStart = _fileContent . find_first_not_of ( Parser : : WHITESPACES , currentPosition ) ;
if ( contentStart = = std : : string : : npos )
{
std : : stringstream strOut ;
strOut < < " Wrong object " < < objectNumber < < " in PDF, cannot find content for it \n " ;
throw Exception ( strOut . str ( ) ) ;
}
currentPosition = contentStart ;
unsigned int endOfContent = _fileContent . find ( " endobj " , contentStart ) ;
if ( endOfContent = = std : : string : : npos )
{
stringstream errorMessage ( " Corrupted PDF file, obj does not have matching endobj " ) ;
throw Exception ( errorMessage ) ;
}
unsigned int endOfStream = _fileContent . find ( " endstream " , currentPosition ) ;
if ( ( endOfStream ! = std : : string : : npos ) & & ( endOfStream < endOfContent ) )
{
std : : string stream ( " stream " ) ;
unsigned int beginOfStream = _fileContent . find ( stream , currentPosition ) + stream . size ( ) ;
while ( _fileContent [ beginOfStream ] = = ' \r ' )
{
+ + beginOfStream ;
}
if ( _fileContent [ beginOfStream ] = = ' \n ' )
{
+ + beginOfStream ;
}
streamBounds . first = beginOfStream ;
// try to use Length field to determine end of stream.
std : : string lengthToken = " /Length " ;
size_t lengthBegin = Parser : : findTokenName ( _fileContent , lengthToken , contentStart ) ;
if ( lengthBegin ! = std : : string : : npos )
{
std : : string lengthStr ;
size_t lenPos = lengthBegin + lengthToken . size ( ) ;
bool useContentLength = false ;
if ( Parser : : getNextWord ( lengthStr , _fileContent , lenPos ) )
{
useContentLength = true ;
std : : string refStr ;
if ( Parser : : getNextWord ( refStr , _fileContent , lenPos ) )
{
if ( Parser : : getNextWord ( refStr , _fileContent , lenPos ) )
{
if ( refStr = = " R " )
{
useContentLength = false ;
//it is reference
}
}
}
}
if ( useContentLength )
{
std : : stringstream strin ( lengthStr ) ;
unsigned int streamEnd = 0 ;
strin > > streamEnd ;
streamEnd + = beginOfStream ;
unsigned int streamEndBegin = _fileContent . find ( " endstream " , streamEnd ) ;
if ( streamEndBegin ! = std : : string : : npos )
{
endOfStream = streamEndBegin ;
}
}
}
streamBounds . second = endOfStream ;
endOfContent = beginOfStream ;
hasObjectStream = true ;
}
unsigned int contentSize = endOfContent - currentPosition ;
objectContent . resize ( contentSize ) ;
memcpy ( & objectContent [ 0 ] , & _fileContent [ currentPosition ] , contentSize ) ;
return objectContent ;
}
unsigned int Parser : : _readTrailerAndReturnRoot ( )
{
unsigned int startOfTrailer = Parser : : findToken ( _fileContent , " trailer " , _getStartOfXrefWithRoot ( ) ) ;
std : : string rootStr ( " /Root " ) ;
unsigned int startOfRoot = Parser : : findToken ( _fileContent , rootStr . data ( ) , startOfTrailer ) ;
if ( startOfRoot = = std : : string : : npos )
{
throw Exception ( " Cannot find Root object ! " ) ;
}
std : : string encryptStr ( " /Encrypt " ) ;
if ( Parser : : findToken ( _fileContent , encryptStr , startOfTrailer ) ! = std : : string : : npos )
{
throw Exception ( " Encrypted PDF is not supported! " ) ;
}
startOfRoot + = rootStr . size ( ) + 1 ; //"/Root + ' '
unsigned int endOfRoot = startOfRoot ;
while ( NUMBERS . find ( _fileContent [ endOfRoot + + ] ) ! = string : : npos )
{ }
- - endOfRoot ;
return Utils : : stringToInt ( _fileContent . substr ( startOfRoot , endOfRoot - startOfRoot ) ) ;
}
unsigned int Parser : : _readTrailerAndRterievePrev ( const unsigned int startPositionForSearch , unsigned int & previosXref )
{
unsigned int startOfTrailer = Parser : : findToken ( _fileContent , " trailer " , startPositionForSearch ) ;
if ( startOfTrailer = = std : : string : : npos )
{
throw Exception ( " Cannot find trailer! " ) ;
}
unsigned int startOfPrev = _fileContent . find ( " Prev " , startOfTrailer ) ;
unsigned int startxref = _fileContent . find ( " startxref " , startOfTrailer ) ;
if ( startOfPrev = = string : : npos | | ( startOfPrev > startxref ) )
return false ;
//"Prev "s length = 5
else
startOfPrev + = 5 ;
unsigned int endOfPrev = startOfPrev ;
while ( NUMBERS . find ( _fileContent [ endOfPrev + + ] ) ! = string : : npos )
{ }
- - endOfPrev ;
previosXref = Utils : : stringToInt ( _fileContent . substr ( startOfPrev , endOfPrev - startOfPrev ) ) ;
return true ;
}
//Method finds the token from current position from string
// It uses PDF whitespaces and delimeters to recognize
// Returned string without begin/end spaces
std : : string Parser : : getNextToken ( const std : : string & str , unsigned int & position )
{
if ( position > = str . size ( ) )
{
return " " ;
}
//skip first spaces
size_t beg_pos = str . find_first_not_of ( Parser : : WHITESPACES , position ) ;
if ( beg_pos = = std : : string : : npos )
{
// it is empty string!
return " " ;
}
size_t end_pos = str . find_first_of ( Parser : : WHITESPACES_AND_DELIMETERS , beg_pos ) ;
if ( end_pos = = std : : string : : npos )
{
end_pos = str . size ( ) ;
}
position = end_pos ;
std : : string out = str . substr ( beg_pos , end_pos - beg_pos ) ;
Parser : : trim ( out ) ;
return out ;
}
/** @brief getNextWord
*
* method finds and returns next word from the string
* For example : " 1 0 R \n " will return " 1 " , then " 0 " then " R "
*/
bool Parser : : getNextWord ( std : : string & out , const std : : string & str , size_t & nextPosition , size_t * found )
{
if ( found )
{
* found = std : : string : : npos ;
}
//trace("position = %d",position);
if ( nextPosition > = str . size ( ) )
{
return false ;
}
//skip first spaces
size_t beg_pos = str . find_first_not_of ( Parser : : WHITESPACES , nextPosition ) ;
if ( beg_pos = = std : : string : : npos )
{
// it is empty string!
return false ;
}
if ( found )
{
* found = beg_pos ;
}
size_t end_pos = str . find_first_of ( Parser : : WHITESPACES , beg_pos ) ;
if ( end_pos = = std : : string : : npos )
{
end_pos = str . size ( ) ;
}
nextPosition = end_pos ;
out = str . substr ( beg_pos , end_pos - beg_pos ) ;
Parser : : trim ( out ) ;
if ( out . empty ( ) )
{
return false ;
}
return true ;
}
/** @brief trim
*
* @ todo : document this function
*/
void Parser : : trim ( std : : string & str )
{
std : : string : : size_type pos1 = str . find_first_not_of ( WHITESPACES ) ;
std : : string : : size_type pos2 = str . find_last_not_of ( WHITESPACES ) ;
str = str . substr ( pos1 = = std : : string : : npos ? 0 : pos1 ,
pos2 = = std : : string : : npos ? str . length ( ) - 1 : pos2 - pos1 + 1 ) ;
}
// Method tries to find the PDF token from the content
// The token is "/L 12 0R" or /Length 123
std : : string Parser : : findTokenStr ( const std : : string & content , const std : : string & pattern , size_t start , size_t & foundStart , size_t & foundEnd )
{
size_t cur_pos = Parser : : findToken ( content , pattern , start ) ;
if ( cur_pos = = std : : string : : npos )
{
return " " ;
}
foundStart = cur_pos ;
cur_pos + = pattern . size ( ) ;
// then lets parse the content of remaining part
size_t end_pos = content . find_first_of ( Parser : : DELIMETERS , cur_pos ) ;
if ( end_pos = = std : : string : : npos )
{
end_pos = content . size ( ) ;
}
std : : string token = content . substr ( cur_pos , end_pos - cur_pos ) ;
foundEnd = end_pos - 1 ;
return token ;
}
// Method tries to find token in the string from specified position,
// returns position of first occurent or npos if not found
// It properly handles cases when content contains strings which
// contains token but not euqal to it
// Example: content "/Transparency/ ..." pattern "/Trans
// will return npos.
size_t Parser : : findToken ( const std : : string & content , const std : : string & keyword , size_t start )
{
size_t cur_pos = start ;
// lets find pattern first
size_t foundStart = std : : string : : npos ;
size_t savedPos = 0 ;
while ( 1 )
{
cur_pos = content . find ( keyword , cur_pos ) ;
if ( cur_pos = = std : : string : : npos )
{
break ;
}
savedPos = cur_pos ;
cur_pos + = keyword . size ( ) ;
if ( cur_pos < content . size ( ) )
{
if ( Parser : : WHITESPACES . find ( content [ cur_pos ] ) ! = std : : string : : npos | |
Parser : : DELIMETERS . find ( content [ cur_pos ] ) ! = std : : string : : npos )
{
foundStart = savedPos ;
break ;
}
}
else
{
foundStart = savedPos ;
// end of line is reached
break ;
}
}
return foundStart ;
}
// Method checks if token at current position can be a Name or it is not name but value
// Example
// /H /P /P 12 0 R
// the tag /P can be a name (and a value also), while 12 cannot
// start defines the position of token content
bool Parser : : tokenIsAName ( const std : : string & content , size_t start )
{
std : : string openBraces = " <[({ " ;
bool found = false ;
while ( 1 )
{
size_t foundNonWhite = content . find_first_not_of ( Parser : : WHITESPACES , start ) ;
size_t foundDelim = content . find_first_of ( Parser : : DELIMETERS , start ) ;
if ( foundNonWhite ! = std : : string : : npos & &
foundDelim ! = std : : string : : npos )
{
if ( ( foundNonWhite < foundDelim ) | | ( openBraces . find ( content [ foundDelim ] ) ! = std : : string : : npos ) )
{
if ( found )
{
return false ;
}
else
{
return true ;
}
}
else
{
if ( found )
{
return true ;
}
else
{
found = true ;
start = content . find_first_of ( Parser : : WHITESPACES_AND_DELIMETERS , foundDelim + 1 ) ;
}
}
}
else
{
return true ;
}
}
}
// Method tries to find token name in the string from specified position,
// For example, the string contains /H /P /P 12 0 R.
// If search for /P then it will return position of /P 12 0 R, not value of
// /H /P
size_t Parser : : findTokenName ( const std : : string & content , const std : : string & keyword , size_t start )
{
size_t cur_pos = start ;
// lets find pattern first
size_t foundStart = std : : string : : npos ;
size_t savedPos = 0 ;
std : : string braces = " <[({ " ;
while ( 1 )
{
cur_pos = content . find ( keyword , cur_pos ) ;
if ( cur_pos = = std : : string : : npos )
{
break ;
}
savedPos = cur_pos ;
cur_pos + = keyword . size ( ) ;
if ( cur_pos < content . size ( ) )
{
if ( Parser : : WHITESPACES_AND_DELIMETERS . find ( content [ cur_pos ] ) ! = std : : string : : npos )
{
if ( tokenIsAName ( content , cur_pos ) )
{
foundStart = savedPos ;
break ;
}
}
}
else
{
foundStart = savedPos ;
// end of line is reached
break ;
}
}
return foundStart ;
}
unsigned int Parser : : findEndOfElementContent ( const std : : string & content , unsigned int startOfPageElement )
{
unsigned int foundEnd = std : : string : : npos ;
std : : stack < std : : string > delimStack ;
std : : string endDelim = " /]>)} " ;
unsigned int curPos = startOfPageElement ;
std : : string openDict ( " < " ) ;
std : : string openArray ( " [ " ) ;
std : : string delimeter = endDelim ;
delimStack . push ( delimeter ) ; //initial delimeter
bool compensation = true ;
while ( 1 )
{
unsigned int nonWhiteSpace = content . find_first_not_of ( Parser : : WHITESPACES , curPos ) ;
unsigned int foundDelimeter = content . find_first_of ( delimeter , curPos ) ;
unsigned int foundOpenBrace = content . find ( " [ " , curPos ) ;
unsigned int foundOpenDict = content . find ( " < " , curPos ) ;
if ( foundDelimeter = = std : : string : : npos & & foundOpenBrace = = std : : string : : npos & & foundOpenDict = = std : : string : : npos )
{
if ( ! delimStack . empty ( ) )
{
delimStack . pop ( ) ;
}
}
else if ( ( foundDelimeter < = foundOpenBrace & & foundDelimeter < = foundOpenDict ) )
{
if ( ! delimStack . empty ( ) )
{
delimStack . pop ( ) ;
}
if ( nonWhiteSpace = = foundDelimeter & & delimeter = = endDelim )
{
curPos = foundDelimeter ;
if ( content [ foundDelimeter ] = = ' / ' & & compensation )
{
curPos + + ;
compensation = false ;
}
}
else
{
compensation = false ;
if ( delimeter = = endDelim )
{
curPos = foundDelimeter ;
}
else
{
curPos = foundDelimeter + delimeter . size ( ) ;
}
}
}
else if ( foundOpenBrace < = foundDelimeter & & foundOpenBrace < = foundOpenDict )
{
compensation = false ;
delimStack . push ( " ] " ) ;
curPos = foundOpenBrace + openArray . size ( ) ;
}
else if ( foundOpenDict < = foundDelimeter & & foundOpenDict < = foundOpenBrace )
{
compensation = false ;
delimStack . push ( " > " ) ;
curPos = foundOpenDict + openDict . size ( ) ;
}
if ( delimStack . empty ( ) )
{
foundEnd = content . find_first_of ( delimeter , curPos ) ;
if ( foundEnd = = std : : string : : npos )
{
foundEnd = curPos ;
}
break ;
}
delimeter = delimStack . top ( ) ;
}
return foundEnd ;
}