/*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
# if !defined Parser_h
# define Parser_h
# include "Object.h"
# include "Document.h"
# include "Page.h"
# include <string>
# include <vector>
namespace merge_lib
{
class Document ;
//This class parsed the pdf document and creates
//an Document object
class Parser
{
public :
Parser ( ) : _root ( 0 ) , _fileContent ( ) , _objects ( ) , _document ( 0 ) { } ;
Document * parseDocument ( const char * fileName ) ;
static const std : : string WHITESPACES ;
static const std : : string DELIMETERS ;
static const std : : string NUMBERS ;
static const std : : string WHITESPACES_AND_DELIMETERS ;
static bool getNextWord ( std : : string & out , const std : : string & in , size_t & nextPosition , size_t * found = NULL ) ;
static std : : string getNextToken ( const std : : string & in , unsigned & position ) ;
static void trim ( std : : string & str ) ;
static std : : string findTokenStr ( const std : : string & content , const std : : string & pattern , size_t start , size_t & foundStart , size_t & foundEnd ) ;
static size_t findToken ( const std : : string & content , const std : : string & keyword , size_t start = 0 ) ;
static size_t findTokenName ( const std : : string & content , const std : : string & keyword , size_t start = 0 ) ;
static unsigned int findEndOfElementContent ( const std : : string & content , unsigned int startOfPageElement ) ;
static bool tokenIsAName ( const std : : string & content , size_t start ) ;
protected :
const std : : string & _getObjectContent ( unsigned int objectPosition , unsigned int & objectNumber , unsigned int & generationNumber , std : : pair < unsigned int , unsigned int > & , bool & ) ;
virtual unsigned int _readTrailerAndReturnRoot ( ) ;
private :
//methods
virtual void _getFileContent ( const char * fileName ) ;
bool _getNextObject ( Object * object ) ;
void _callObserver ( std : : string objectContent ) ;
void _createObjectTree ( const char * fileName ) ;
void _retrieveAllPages ( Object * objectWithKids ) ;
void _fillOutObjects ( ) ;
virtual void _readXRefAndCreateObjects ( ) ;
unsigned int _getEndOfLineFromContent ( unsigned int fromPosition ) ;
const std : : pair < unsigned int , unsigned int > & _getLineBounds ( const std : : string & str , unsigned int fromPosition ) ;
const std : : string & _getNextToken ( unsigned int & fromPosition ) ;
unsigned int _countTokens ( unsigned int leftBound , unsigned int rightBount ) ;
unsigned int _skipWhiteSpaces ( const std : : string & str ) ;
unsigned int _skipWhiteSpacesFromContent ( unsigned int fromPosition ) ;
const std : : map < unsigned int , Object : : ReferencePositionsInContent > & _getReferences ( const std : : string & objectContent ) ;
unsigned int _skipNumber ( const std : : string & str , unsigned int currentPosition ) ;
unsigned int _skipWhiteSpaces ( const std : : string & str , unsigned int fromPosition ) ;
void _createDocument ( const char * docName ) ;
virtual unsigned int _getStartOfXrefWithRoot ( ) ;
unsigned int _readTrailerAndRterievePrev ( const unsigned int startPositionForSearch , unsigned int & previosXref ) ;
void _clearParser ( ) ;
protected :
//members
Object * _root ;
std : : string _fileContent ;
std : : map < unsigned int , Object * > _objects ;
Document * _document ;
} ;
}
# endif