1 Star 0 Fork 0

蔡风华 / pdf

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
lex.go 9.57 KB
一键复制 编辑 原始数据 按行查看 历史
nenormalka 提交于 2020-01-16 13:35 . fix infinity loop
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Reading of PDF tokens and objects from a raw byte stream.
package pdf
import (
// A token is a PDF token in the input stream, one of the following Go types:
// bool, a PDF boolean
// int64, a PDF integer
// float64, a PDF real
// string, a PDF string literal
// keyword, a PDF keyword
// name, a PDF name without the leading slash
type token interface{}
// A name is a PDF name, without the leading slash.
type name string
// A keyword is a PDF keyword.
// Delimiter tokens used in higher-level syntax,
// such as "<<", ">>", "[", "]", "{", "}", are also treated as keywords.
type keyword string
// A buffer holds buffered input bytes from the PDF file.
type buffer struct {
r io.Reader // source of data
buf []byte // buffered data
pos int // read index in buf
offset int64 // offset at end of buf; aka offset of next read
tmp []byte // scratch space for accumulating token
unread []token // queue of read but then unread tokens
allowEOF bool
allowObjptr bool
allowStream bool
eof bool
key []byte
useAES bool
objptr objptr
// newBuffer returns a new buffer reading from r at the given offset.
func newBuffer(r io.Reader, offset int64) *buffer {
return &buffer{
r: r,
offset: offset,
buf: make([]byte, 0, 4096),
allowObjptr: true,
allowStream: true,
func (b *buffer) seek(offset int64) {
b.offset = offset
b.buf = b.buf[:0]
b.pos = 0
b.unread = b.unread[:0]
func (b *buffer) readByte() byte {
if b.pos >= len(b.buf) {
if b.pos >= len(b.buf) {
return '\n'
c := b.buf[b.pos]
return c
func (b *buffer) errorf(format string, args ...interface{}) {
panic(fmt.Errorf(format, args...))
func (b *buffer) reload() bool {
n := cap(b.buf) - int(b.offset%int64(cap(b.buf)))
n, err := b.r.Read(b.buf[:n])
if n == 0 && err != nil {
b.buf = b.buf[:0]
b.pos = 0
if b.allowEOF && err == io.EOF {
b.eof = true
return false
b.errorf("malformed PDF: reading at offset %d: %v", b.offset, err)
return false
b.offset += int64(n)
b.buf = b.buf[:n]
b.pos = 0
return true
func (b *buffer) seekForward(offset int64) {
for b.offset < offset {
if !b.reload() {
b.pos = len(b.buf) - int(b.offset-offset)
func (b *buffer) readOffset() int64 {
return b.offset - int64(len(b.buf)) + int64(b.pos)
func (b *buffer) unreadByte() {
if b.pos > 0 {
func (b *buffer) unreadToken(t token) {
b.unread = append(b.unread, t)
func (b *buffer) readToken() token {
if n := len(b.unread); n > 0 {
t := b.unread[n-1]
b.unread = b.unread[:n-1]
return t
// Find first non-space, non-comment byte.
c := b.readByte()
for {
if isSpace(c) {
if b.eof {
return io.EOF
c = b.readByte()
} else if c == '%' {
for c != '\r' && c != '\n' {
c = b.readByte()
} else {
switch c {
case '<':
if b.readByte() == '<' {
return keyword("<<")
return b.readHexString()
case '(':
return b.readLiteralString()
case '[', ']', '{', '}':
return keyword(string(c))
case '/':
return b.readName()
case '>':
if b.readByte() == '>' {
return keyword(">>")
if isDelim(c) {
b.errorf("unexpected delimiter %#q", rune(c))
return nil
return b.readKeyword()
func (b *buffer) readHexString() token {
tmp := b.tmp[:0]
for {
c := b.readByte()
if c == '>' {
if isSpace(c) {
goto Loop
c2 := b.readByte()
if isSpace(c2) {
goto Loop2
x := unhex(c)<<4 | unhex(c2)
if x < 0 {
b.errorf("malformed hex string %c %c %s", c, c2, b.buf[b.pos:])
tmp = append(tmp, byte(x))
b.tmp = tmp
return string(tmp)
func unhex(b byte) int {
switch {
case '0' <= b && b <= '9':
return int(b) - '0'
case 'a' <= b && b <= 'f':
return int(b) - 'a' + 10
case 'A' <= b && b <= 'F':
return int(b) - 'A' + 10
return -1
func (b *buffer) readLiteralString() token {
tmp := b.tmp[:0]
depth := 1
for !b.eof {
c := b.readByte()
switch c {
tmp = append(tmp, c)
case '(':
tmp = append(tmp, c)
case ')':
if depth--; depth == 0 {
break Loop
tmp = append(tmp, c)
case '\\':
switch c = b.readByte(); c {
b.errorf("invalid escape sequence \\%c", c)
tmp = append(tmp, '\\', c)
case 'n':
tmp = append(tmp, '\n')
case 'r':
tmp = append(tmp, '\r')
case 'b':
tmp = append(tmp, '\b')
case 't':
tmp = append(tmp, '\t')
case 'f':
tmp = append(tmp, '\f')
case '(', ')', '\\':
tmp = append(tmp, c)
case '\r':
if b.readByte() != '\n' {
case '\n':
// no append
case '0', '1', '2', '3', '4', '5', '6', '7':
x := int(c - '0')
for i := 0; i < 2; i++ {
c = b.readByte()
if c < '0' || c > '7' {
x = x*8 + int(c-'0')
if x > 255 {
b.errorf("invalid octal escape \\%03o", x)
tmp = append(tmp, byte(x))
b.tmp = tmp
return string(tmp)
func (b *buffer) readName() token {
tmp := b.tmp[:0]
for {
c := b.readByte()
if isDelim(c) || isSpace(c) {
if c == '#' {
x := unhex(b.readByte())<<4 | unhex(b.readByte())
if x < 0 {
b.errorf("malformed name")
tmp = append(tmp, byte(x))
tmp = append(tmp, c)
b.tmp = tmp
return name(string(tmp))
func (b *buffer) readKeyword() token {
tmp := b.tmp[:0]
for {
c := b.readByte()
if isDelim(c) || isSpace(c) {
tmp = append(tmp, c)
b.tmp = tmp
s := string(tmp)
switch {
case s == "true":
return true
case s == "false":
return false
case isInteger(s):
x, err := strconv.ParseInt(s, 10, 64)
if err != nil {
b.errorf("invalid integer %s", s)
return x
case isReal(s):
x, err := strconv.ParseFloat(s, 64)
if err != nil {
b.errorf("invalid real %s", s)
return x
return keyword(string(tmp))
func isInteger(s string) bool {
if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
s = s[1:]
if len(s) == 0 {
return false
for _, c := range s {
if c < '0' || '9' < c {
return false
return true
func isReal(s string) bool {
if len(s) > 0 && (s[0] == '+' || s[0] == '-') {
s = s[1:]
if len(s) == 0 {
return false
ndot := 0
for _, c := range s {
if c == '.' {
if c < '0' || '9' < c {
return false
return ndot == 1
// An object is a PDF syntax object, one of the following Go types:
// bool, a PDF boolean
// int64, a PDF integer
// float64, a PDF real
// string, a PDF string literal
// name, a PDF name without the leading slash
// dict, a PDF dictionary
// array, a PDF array
// stream, a PDF stream
// objptr, a PDF object reference
// objdef, a PDF object definition
// An object may also be nil, to represent the PDF null.
type object interface{}
type dict map[name]object
type array []object
type stream struct {
hdr dict
ptr objptr
offset int64
type objptr struct {
id uint32
gen uint16
type objdef struct {
ptr objptr
obj object
func (b *buffer) readObject() object {
tok := b.readToken()
if kw, ok := tok.(keyword); ok {
switch kw {
case "null":
return nil
case "<<":
return b.readDict()
case "[":
return b.readArray()
b.errorf("unexpected keyword %q parsing object", kw)
return nil
if str, ok := tok.(string); ok && b.key != nil && b.objptr.id != 0 {
tok = decryptString(b.key, b.useAES, b.objptr, str)
if !b.allowObjptr {
return tok
if t1, ok := tok.(int64); ok && int64(uint32(t1)) == t1 {
tok2 := b.readToken()
if t2, ok := tok2.(int64); ok && int64(uint16(t2)) == t2 {
tok3 := b.readToken()
switch tok3 {
case keyword("R"):
return objptr{uint32(t1), uint16(t2)}
case keyword("obj"):
old := b.objptr
b.objptr = objptr{uint32(t1), uint16(t2)}
obj := b.readObject()
if _, ok := obj.(stream); !ok {
tok4 := b.readToken()
if tok4 != keyword("endobj") {
b.errorf("missing endobj after indirect object definition")
b.objptr = old
return objdef{objptr{uint32(t1), uint16(t2)}, obj}
return tok
func (b *buffer) readArray() object {
var x array
for {
tok := b.readToken()
if tok == nil || tok == keyword("]") {
x = append(x, b.readObject())
return x
func (b *buffer) readDict() object {
x := make(dict)
for {
tok := b.readToken()
if tok == nil || tok == keyword(">>") {
n, ok := tok.(name)
if !ok {
b.errorf("unexpected non-name key %T(%v) parsing dictionary", tok, tok)
x[n] = b.readObject()
if !b.allowStream {
return x
tok := b.readToken()
if tok != keyword("stream") {
return x
switch b.readByte() {
case '\r':
if b.readByte() != '\n' {
case '\n':
// ok
b.errorf("stream keyword not followed by newline")
return stream{x, b.objptr, b.readOffset()}
func isSpace(b byte) bool {
switch b {
case '\x00', '\t', '\n', '\f', '\r', ' ':
return true
return false
func isDelim(b byte) bool {
switch b {
case '<', '>', '(', ')', '[', ']', '{', '}', '/', '%':
return true
return false
马建仓 AI 助手


344bd9b3 5694891 D2dac590 5694891