public class PdfTokenizer extends Object implements Closeable, Serializable
Modifier and Type | Class and Description |
---|---|
protected static class |
PdfTokenizer.ReusableRandomAccessSource
Deprecated.
Will be removed in 7.2. This inner class is not used anywhere
|
static class |
PdfTokenizer.TokenType |
Modifier and Type | Field and Description |
---|---|
static boolean[] |
delims |
static byte[] |
F |
static byte[] |
False |
protected int |
generation |
protected boolean |
hexString |
static byte[] |
N |
static byte[] |
Null |
static byte[] |
Obj |
protected ByteBuffer |
outBuf |
static byte[] |
R |
protected int |
reference |
static byte[] |
Startxref |
static byte[] |
Stream |
static byte[] |
Trailer |
static byte[] |
True |
protected PdfTokenizer.TokenType |
type |
static byte[] |
Xref |
Constructor and Description |
---|
PdfTokenizer(RandomAccessFileOrArray file)
Creates a PdfTokenizer for the specified RandomAccessFileOrArray .
|
Modifier and Type | Method and Description |
---|---|
void |
backOnePosition(int ch) |
void |
checkFdfHeader() |
static int[] |
checkObjectStart(PdfTokenizer lineTokenizer)
Check whether line starts with object declaration.
|
String |
checkPdfHeader() |
static boolean |
checkTrailer(ByteBuffer line)
Checks whether line equals to 'trailer'.
|
void |
close() |
static byte[] |
decodeStringContent(byte[] content, boolean hexWriting)
Resolve escape symbols or hexadecimal symbols.
|
protected static byte[] |
decodeStringContent(byte[] content, int from, int to, boolean hexWriting)
Resolve escape symbols or hexadecimal symbols.
|
byte[] |
getByteContent() |
byte[] |
getDecodedStringContent() |
int |
getGenNr() |
int |
getHeaderOffset() |
int |
getIntValue() |
long |
getLongValue() |
int |
getObjNr() |
long |
getPosition() |
RandomAccessFileOrArray |
getSafeFile() |
long |
getStartxref() |
String |
getStringValue() |
PdfTokenizer.TokenType |
getTokenType() |
boolean |
isCloseStream() |
protected static boolean |
isDelimiter(int ch) |
protected static boolean |
isDelimiterWhitespace(int ch) |
boolean |
isHexString() |
static boolean |
isWhitespace(int ch)
Is a certain character a whitespace? Currently checks on the following: '0', '9', '10', '12', '13', '32'.
|
protected static boolean |
isWhitespace(int ch, boolean isWhitespace)
Checks whether a character is a whitespace.
|
long |
length() |
boolean |
nextToken() |
void |
nextValidToken() |
int |
read() |
void |
readFully(byte[] bytes) |
boolean |
readLineSegment(ByteBuffer buffer)
Reads data into the provided byte[].
|
boolean |
readLineSegment(ByteBuffer buffer, boolean isNullWhitespace)
Reads data into the provided byte[].
|
String |
readString(int size) |
void |
seek(long pos) |
void |
setCloseStream(boolean closeStream) |
void |
throwError(String error, Object... messageParams)
Helper method to handle content errors.
|
boolean |
tokenValueEqualsTo(byte[] cmp) |
public static final boolean[] delims
public static final byte[] Obj
public static final byte[] R
public static final byte[] Xref
public static final byte[] Startxref
public static final byte[] Stream
public static final byte[] Trailer
public static final byte[] N
public static final byte[] F
public static final byte[] Null
public static final byte[] True
public static final byte[] False
protected PdfTokenizer.TokenType type
protected int reference
protected int generation
protected boolean hexString
protected ByteBuffer outBuf
public PdfTokenizer(RandomAccessFileOrArray file)
RandomAccessFileOrArray
. The beginning of the file is read to determine the location of the header, and the data source is adjusted as necessary to account for any junk that occurs in the byte source before the header
file
- the source
public void seek(long pos) throws IOException
IOException
public void readFully(byte[] bytes) throws IOException
IOException
public long getPosition() throws IOException
IOException
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
IOException
public long length() throws IOException
IOException
public int read() throws IOException
IOException
public String readString(int size) throws IOException
IOException
public PdfTokenizer.TokenType getTokenType()
public byte[] getByteContent()
public String getStringValue()
public byte[] getDecodedStringContent()
public boolean tokenValueEqualsTo(byte[] cmp)
public int getObjNr()
public int getGenNr()
public void backOnePosition(int ch)
public int getHeaderOffset() throws IOException
IOException
public String checkPdfHeader() throws IOException
IOException
public void checkFdfHeader() throws IOException
IOException
public long getStartxref() throws IOException
IOException
public void nextValidToken() throws IOException
IOException
public boolean nextToken() throws IOException
IOException
public long getLongValue()
public int getIntValue()
public boolean isHexString()
public boolean isCloseStream()
public void setCloseStream(boolean closeStream)
public RandomAccessFileOrArray getSafeFile()
protected static byte[] decodeStringContent(byte[] content, int from, int to, boolean hexWriting)
NOTE Due to PdfReference 1.7 part 3.2.3 String value contain ASCII characters, so we can convert it directly to byte array.
content
- string bytes to be decoded
from
- given start index
to
- given end index
hexWriting
- true if given string is hex-encoded, e.g. '<69546578…>'. False otherwise, e.g. '((iText( some version)…)'
String
.
public static byte[] decodeStringContent(byte[] content, boolean hexWriting)
content
- string bytes to be decoded
hexWriting
- true if given string is hex-encoded, e.g. '<69546578…>'. False otherwise, e.g. '((iText( some version)…)'
String
.
public static boolean isWhitespace(int ch)
isWhiteSpace(ch, true)
.
ch
- int
protected static boolean isWhitespace(int ch, boolean isWhitespace)
ch
- int
isWhitespace
- boolean
protected static boolean isDelimiter(int ch)
protected static boolean isDelimiterWhitespace(int ch)
public void throwError(String error, Object... messageParams)
PdfRuntimeException
.
error
- message.
messageParams
- error params.
IOException
- wrap error message into PdfRuntimeException
and add position in file.
public static boolean checkTrailer(ByteBuffer line)
line
equals to 'trailer'.
line
- for check.
public boolean readLineSegment(ByteBuffer buffer) throws IOException
isWhiteSpace(int)
or isWhiteSpace(int, boolean)
for a list of whitespace characters.
readLineSegment(input, true)
.
buffer
- a ByteBuffer
to which the result of reading will be saved
IOException
- in case of any reading error
public boolean readLineSegment(ByteBuffer buffer, boolean isNullWhitespace) throws IOException
isWhiteSpace(int)
or isWhiteSpace(int, boolean)
for a list of whitespace characters.
buffer
- a ByteBuffer
to which the result of reading will be saved
isNullWhitespace
- boolean to indicate whether '0' is whitespace or not. If in doubt, use true or overloaded method readLineSegment(input)
IOException
- in case of any reading error
public static int[] checkObjectStart(PdfTokenizer lineTokenizer)
lineTokenizer
- tokenizer, built by single line.
Copyright © 1998–2023 iText Group NV. All rights reserved.