1 Star 0 Fork 0

蔡风华 / pdf

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
read.go 27.25 KB
一键复制 编辑 原始数据 按行查看 历史
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package pdf implements reading of PDF files.
//
// Overview
//
// PDF is Adobe's Portable Document Format, ubiquitous on the internet.
// A PDF document is a complex data format built on a fairly simple structure.
// This package exposes the simple structure along with some wrappers to
// extract basic information. If more complex information is needed, it is
// possible to extract that information by interpreting the structure exposed
// by this package.
//
// Specifically, a PDF is a data structure built from Values, each of which has
// one of the following Kinds:
//
// Null, for the null object.
// Integer, for an integer.
// Real, for a floating-point number.
// Bool, for a boolean value.
// Name, for a name constant (as in /Helvetica).
// String, for a string constant.
// Dict, for a dictionary of name-value pairs.
// Array, for an array of values.
// Stream, for an opaque data stream and associated header dictionary.
//
// The accessors on Value—Int64, Float64, Bool, Name, and so on—return
// a view of the data as the given type. When there is no appropriate view,
// the accessor returns a zero result. For example, the Name accessor returns
// the empty string if called on a Value v for which v.Kind() != Name.
// Returning zero values this way, especially from the Dict and Array accessors,
// which themselves return Values, makes it possible to traverse a PDF quickly
// without writing any error checking. On the other hand, it means that mistakes
// can go unreported.
//
// The basic structure of the PDF file is exposed as the graph of Values.
//
// Most richer data structures in a PDF file are dictionaries with specific interpretations
// of the name-value pairs. The Font and Page wrappers make the interpretation
// of a specific Value as the corresponding type easier. They are only helpers, though:
// they are implemented only in terms of the Value API and could be moved outside
// the package. Equally important, traversal of other PDF data structures can be implemented
// in other packages as needed.
//
package pdf
// BUG(rsc): The package is incomplete, although it has been used successfully on some
// large real-world PDF files.
// BUG(rsc): There is no support for closing open PDF files. If you drop all references to a Reader,
// the underlying reader will eventually be garbage collected.
// BUG(rsc): The library makes no attempt at efficiency. A value cache maintained in the Reader
// would probably help significantly.
// BUG(rsc): The support for reading encrypted files is weak.
// BUG(rsc): The Value API does not support error reporting. The intent is to allow users to
// set an error reporting callback in Reader, but that code has not been implemented.
import (
"bytes"
"compress/zlib"
"crypto/aes"
"crypto/cipher"
"crypto/md5"
"crypto/rc4"
"encoding/ascii85"
"fmt"
"io"
"io/ioutil"
"os"
"sort"
"strconv"
)
// DebugOn is responsible for logging messages into stdout. If problems arise during reading, set it true.
var DebugOn = false
// A Reader is a single PDF file open for reading.
type Reader struct {
f io.ReaderAt
end int64
xref []xref
trailer dict
trailerptr objptr
key []byte
useAES bool
}
type xref struct {
ptr objptr
inStream bool
stream objptr
offset int64
}
func (r *Reader) errorf(format string, args ...interface{}) {
panic(fmt.Errorf(format, args...))
}
// Open opens a file for reading.
func Open(file string) (*os.File, *Reader, error) {
f, err := os.Open(file)
if err != nil {
f.Close()
return nil, nil, err
}
fi, err := f.Stat()
if err != nil {
f.Close()
return nil, nil, err
}
reader, err := NewReader(f, fi.Size())
return f, reader, err
}
// NewReader opens a file for reading, using the data in f with the given total size.
func NewReader(f io.ReaderAt, size int64) (*Reader, error) {
return NewReaderEncrypted(f, size, nil)
}
// NewReaderEncrypted opens a file for reading, using the data in f with the given total size.
// If the PDF is encrypted, NewReaderEncrypted calls pw repeatedly to obtain passwords
// to try. If pw returns the empty string, NewReaderEncrypted stops trying to decrypt
// the file and returns an error.
func NewReaderEncrypted(f io.ReaderAt, size int64, pw func() string) (*Reader, error) {
buf := make([]byte, 10)
f.ReadAt(buf, 0)
if !bytes.HasPrefix(buf, []byte("%PDF-1.")) || buf[7] < '0' || buf[7] > '7' || buf[8] != '\r' && buf[8] != '\n' {
return nil, fmt.Errorf("not a PDF file: invalid header")
}
end := size
const endChunk = 100
buf = make([]byte, endChunk)
f.ReadAt(buf, end-endChunk)
for len(buf) > 0 && buf[len(buf)-1] == '\n' || buf[len(buf)-1] == '\r' {
buf = buf[:len(buf)-1]
}
buf = bytes.TrimRight(buf, "\r\n\t ")
if !bytes.HasSuffix(buf, []byte("%%EOF")) {
return nil, fmt.Errorf("not a PDF file: missing %%%%EOF")
}
i := findLastLine(buf, "startxref")
if i < 0 {
return nil, fmt.Errorf("malformed PDF file: missing final startxref")
}
r := &Reader{
f: f,
end: end,
}
pos := end - endChunk + int64(i)
b := newBuffer(io.NewSectionReader(f, pos, end-pos), pos)
if b.readToken() != keyword("startxref") {
return nil, fmt.Errorf("malformed PDF file: missing startxref")
}
startxref, ok := b.readToken().(int64)
if !ok {
return nil, fmt.Errorf("malformed PDF file: startxref not followed by integer")
}
b = newBuffer(io.NewSectionReader(r.f, startxref, r.end-startxref), startxref)
xref, trailerptr, trailer, err := readXref(r, b)
if err != nil {
return nil, err
}
r.xref = xref
r.trailer = trailer
r.trailerptr = trailerptr
if trailer["Encrypt"] == nil {
return r, nil
}
err = r.initEncrypt("")
if err == nil {
return r, nil
}
if pw == nil || err != ErrInvalidPassword {
return nil, err
}
for {
next := pw()
if next == "" {
break
}
if r.initEncrypt(next) == nil {
return r, nil
}
}
return nil, err
}
// Trailer returns the file's Trailer value.
func (r *Reader) Trailer() Value {
return Value{r, r.trailerptr, r.trailer}
}
func readXref(r *Reader, b *buffer) ([]xref, objptr, dict, error) {
tok := b.readToken()
if tok == keyword("xref") {
return readXrefTable(r, b)
}
if _, ok := tok.(int64); ok {
b.unreadToken(tok)
return readXrefStream(r, b)
}
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: cross-reference table not found: %v", tok)
}
func readXrefStream(r *Reader, b *buffer) ([]xref, objptr, dict, error) {
obj1 := b.readObject()
obj, ok := obj1.(objdef)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: cross-reference table not found: %v", objfmt(obj1))
}
strmptr := obj.ptr
strm, ok := obj.obj.(stream)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: cross-reference table not found: %v", objfmt(obj))
}
if strm.hdr["Type"] != name("XRef") {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref stream does not have type XRef")
}
size, ok := strm.hdr["Size"].(int64)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref stream missing Size")
}
table := make([]xref, size)
table, err := readXrefStreamData(r, strm, table, size)
if err != nil {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: %v", err)
}
for prevoff := strm.hdr["Prev"]; prevoff != nil; {
off, ok := prevoff.(int64)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev is not integer: %v", prevoff)
}
b := newBuffer(io.NewSectionReader(r.f, off, r.end-off), off)
obj1 := b.readObject()
obj, ok := obj1.(objdef)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream not found: %v", objfmt(obj1))
}
prevstrm, ok := obj.obj.(stream)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream not found: %v", objfmt(obj))
}
prevoff = prevstrm.hdr["Prev"]
prev := Value{r, objptr{}, prevstrm}
if prev.Kind() != Stream {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream is not stream: %v", prev)
}
if prev.Key("Type").Name() != "XRef" {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream does not have type XRef")
}
psize := prev.Key("Size").Int64()
if psize > size {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref prev stream larger than last stream")
}
if table, err = readXrefStreamData(r, prev.data.(stream), table, psize); err != nil {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: reading xref prev stream: %v", err)
}
}
return table, strmptr, strm.hdr, nil
}
func readXrefStreamData(r *Reader, strm stream, table []xref, size int64) ([]xref, error) {
index, _ := strm.hdr["Index"].(array)
if index == nil {
index = array{int64(0), size}
}
if len(index)%2 != 0 {
return nil, fmt.Errorf("invalid Index array %v", objfmt(index))
}
ww, ok := strm.hdr["W"].(array)
if !ok {
return nil, fmt.Errorf("xref stream missing W array")
}
var w []int
for _, x := range ww {
i, ok := x.(int64)
if !ok || int64(int(i)) != i {
return nil, fmt.Errorf("invalid W array %v", objfmt(ww))
}
w = append(w, int(i))
}
if len(w) < 3 {
return nil, fmt.Errorf("invalid W array %v", objfmt(ww))
}
v := Value{r, objptr{}, strm}
wtotal := 0
for _, wid := range w {
wtotal += wid
}
buf := make([]byte, wtotal)
data := v.Reader()
for len(index) > 0 {
start, ok1 := index[0].(int64)
n, ok2 := index[1].(int64)
if !ok1 || !ok2 {
return nil, fmt.Errorf("malformed Index pair %v %v %T %T", objfmt(index[0]), objfmt(index[1]), index[0], index[1])
}
index = index[2:]
for i := 0; i < int(n); i++ {
_, err := io.ReadFull(data, buf)
if err != nil {
return nil, fmt.Errorf("error reading xref stream: %v", err)
}
v1 := decodeInt(buf[0:w[0]])
if w[0] == 0 {
v1 = 1
}
v2 := decodeInt(buf[w[0] : w[0]+w[1]])
v3 := decodeInt(buf[w[0]+w[1] : w[0]+w[1]+w[2]])
x := int(start) + i
for cap(table) <= x {
table = append(table[:cap(table)], xref{})
}
if table[x].ptr != (objptr{}) {
continue
}
switch v1 {
case 0:
table[x] = xref{ptr: objptr{0, 65535}}
case 1:
table[x] = xref{ptr: objptr{uint32(x), uint16(v3)}, offset: int64(v2)}
case 2:
table[x] = xref{ptr: objptr{uint32(x), 0}, inStream: true, stream: objptr{uint32(v2), 0}, offset: int64(v3)}
default:
if DebugOn {
fmt.Printf("invalid xref stream type %d: %x\n", v1, buf)
}
}
}
}
return table, nil
}
func decodeInt(b []byte) int {
x := 0
for _, c := range b {
x = x<<8 | int(c)
}
return x
}
func readXrefTable(r *Reader, b *buffer) ([]xref, objptr, dict, error) {
var table []xref
table, err := readXrefTableData(b, table)
if err != nil {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: %v", err)
}
trailer, ok := b.readObject().(dict)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref table not followed by trailer dictionary")
}
for prevoff := trailer["Prev"]; prevoff != nil; {
off, ok := prevoff.(int64)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev is not integer: %v", prevoff)
}
b := newBuffer(io.NewSectionReader(r.f, off, r.end-off), off)
tok := b.readToken()
if tok != keyword("xref") {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev does not point to xref")
}
table, err = readXrefTableData(b, table)
if err != nil {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: %v", err)
}
trailer, ok := b.readObject().(dict)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: xref Prev table not followed by trailer dictionary")
}
prevoff = trailer["Prev"]
}
size, ok := trailer[name("Size")].(int64)
if !ok {
return nil, objptr{}, nil, fmt.Errorf("malformed PDF: trailer missing /Size entry")
}
if size < int64(len(table)) {
table = table[:size]
}
return table, objptr{}, trailer, nil
}
func readXrefTableData(b *buffer, table []xref) ([]xref, error) {
for {
tok := b.readToken()
if tok == keyword("trailer") {
break
}
start, ok1 := tok.(int64)
n, ok2 := b.readToken().(int64)
if !ok1 || !ok2 {
return nil, fmt.Errorf("malformed xref table")
}
for i := 0; i < int(n); i++ {
off, ok1 := b.readToken().(int64)
gen, ok2 := b.readToken().(int64)
alloc, ok3 := b.readToken().(keyword)
if !ok1 || !ok2 || !ok3 || alloc != keyword("f") && alloc != keyword("n") {
return nil, fmt.Errorf("malformed xref table")
}
x := int(start) + i
for cap(table) <= x {
table = append(table[:cap(table)], xref{})
}
if len(table) <= x {
table = table[:x+1]
}
if alloc == "n" && table[x].offset == 0 {
table[x] = xref{ptr: objptr{uint32(x), uint16(gen)}, offset: int64(off)}
}
}
}
return table, nil
}
func findLastLine(buf []byte, s string) int {
bs := []byte(s)
max := len(buf)
for {
i := bytes.LastIndex(buf[:max], bs)
if i <= 0 || i+len(bs) >= len(buf) {
return -1
}
if (buf[i-1] == '\n' || buf[i-1] == '\r') && (buf[i+len(bs)] == '\n' || buf[i+len(bs)] == '\r') {
return i
}
max = i
}
}
// A Value is a single PDF value, such as an integer, dictionary, or array.
// The zero Value is a PDF null (Kind() == Null, IsNull() = true).
type Value struct {
r *Reader
ptr objptr
data interface{}
}
// IsNull reports whether the value is a null. It is equivalent to Kind() == Null.
func (v Value) IsNull() bool {
return v.data == nil
}
// A ValueKind specifies the kind of data underlying a Value.
type ValueKind int
// The PDF value kinds.
const (
Null ValueKind = iota
Bool
Integer
Real
String
Name
Dict
Array
Stream
)
// Kind reports the kind of value underlying v.
func (v Value) Kind() ValueKind {
switch v.data.(type) {
default:
return Null
case bool:
return Bool
case int64:
return Integer
case float64:
return Real
case string:
return String
case name:
return Name
case dict:
return Dict
case array:
return Array
case stream:
return Stream
}
}
// String returns a textual representation of the value v.
// Note that String is not the accessor for values with Kind() == String.
// To access such values, see RawString, Text, and TextFromUTF16.
func (v Value) String() string {
return objfmt(v.data)
}
func objfmt(x interface{}) string {
switch x := x.(type) {
default:
return fmt.Sprint(x)
case string:
if isPDFDocEncoded(x) {
return strconv.Quote(pdfDocDecode(x))
}
if isUTF16(x) {
return strconv.Quote(utf16Decode(x[2:]))
}
return strconv.Quote(x)
case name:
return "/" + string(x)
case dict:
var keys []string
for k := range x {
keys = append(keys, string(k))
}
sort.Strings(keys)
var buf bytes.Buffer
buf.WriteString("<<")
for i, k := range keys {
elem := x[name(k)]
if i > 0 {
buf.WriteString(" ")
}
buf.WriteString("/")
buf.WriteString(k)
buf.WriteString(" ")
buf.WriteString(objfmt(elem))
}
buf.WriteString(">>")
return buf.String()
case array:
var buf bytes.Buffer
buf.WriteString("[")
for i, elem := range x {
if i > 0 {
buf.WriteString(" ")
}
buf.WriteString(objfmt(elem))
}
buf.WriteString("]")
return buf.String()
case stream:
return fmt.Sprintf("%v@%d", objfmt(x.hdr), x.offset)
case objptr:
return fmt.Sprintf("%d %d R", x.id, x.gen)
case objdef:
return fmt.Sprintf("{%d %d obj}%v", x.ptr.id, x.ptr.gen, objfmt(x.obj))
}
}
// Bool returns v's boolean value.
// If v.Kind() != Bool, Bool returns false.
func (v Value) Bool() bool {
x, ok := v.data.(bool)
if !ok {
return false
}
return x
}
// Int64 returns v's int64 value.
// If v.Kind() != Int64, Int64 returns 0.
func (v Value) Int64() int64 {
x, ok := v.data.(int64)
if !ok {
return 0
}
return x
}
// Float64 returns v's float64 value, converting from integer if necessary.
// If v.Kind() != Float64 and v.Kind() != Int64, Float64 returns 0.
func (v Value) Float64() float64 {
x, ok := v.data.(float64)
if !ok {
x, ok := v.data.(int64)
if ok {
return float64(x)
}
return 0
}
return x
}
// RawString returns v's string value.
// If v.Kind() != String, RawString returns the empty string.
func (v Value) RawString() string {
x, ok := v.data.(string)
if !ok {
return ""
}
return x
}
// Text returns v's string value interpreted as a ``text string'' (defined in the PDF spec)
// and converted to UTF-8.
// If v.Kind() != String, Text returns the empty string.
func (v Value) Text() string {
x, ok := v.data.(string)
if !ok {
return ""
}
if isPDFDocEncoded(x) {
return pdfDocDecode(x)
}
if isUTF16(x) {
return utf16Decode(x[2:])
}
return x
}
// TextFromUTF16 returns v's string value interpreted as big-endian UTF-16
// and then converted to UTF-8.
// If v.Kind() != String or if the data is not valid UTF-16, TextFromUTF16 returns
// the empty string.
func (v Value) TextFromUTF16() string {
x, ok := v.data.(string)
if !ok {
return ""
}
if len(x)%2 == 1 {
return ""
}
if x == "" {
return ""
}
return utf16Decode(x)
}
// Name returns v's name value.
// If v.Kind() != Name, Name returns the empty string.
// The returned name does not include the leading slash:
// if v corresponds to the name written using the syntax /Helvetica,
// Name() == "Helvetica".
func (v Value) Name() string {
x, ok := v.data.(name)
if !ok {
return ""
}
return string(x)
}
// Key returns the value associated with the given name key in the dictionary v.
// Like the result of the Name method, the key should not include a leading slash.
// If v is a stream, Key applies to the stream's header dictionary.
// If v.Kind() != Dict and v.Kind() != Stream, Key returns a null Value.
func (v Value) Key(key string) Value {
x, ok := v.data.(dict)
if !ok {
strm, ok := v.data.(stream)
if !ok {
return Value{}
}
x = strm.hdr
}
return v.r.resolve(v.ptr, x[name(key)])
}
// Keys returns a sorted list of the keys in the dictionary v.
// If v is a stream, Keys applies to the stream's header dictionary.
// If v.Kind() != Dict and v.Kind() != Stream, Keys returns nil.
func (v Value) Keys() []string {
x, ok := v.data.(dict)
if !ok {
strm, ok := v.data.(stream)
if !ok {
return nil
}
x = strm.hdr
}
keys := []string{} // not nil
for k := range x {
keys = append(keys, string(k))
}
sort.Strings(keys)
return keys
}
// Index returns the i'th element in the array v.
// If v.Kind() != Array or if i is outside the array bounds,
// Index returns a null Value.
func (v Value) Index(i int) Value {
x, ok := v.data.(array)
if !ok || i < 0 || i >= len(x) {
return Value{}
}
return v.r.resolve(v.ptr, x[i])
}
// Len returns the length of the array v.
// If v.Kind() != Array, Len returns 0.
func (v Value) Len() int {
x, ok := v.data.(array)
if !ok {
return 0
}
return len(x)
}
func (r *Reader) resolve(parent objptr, x interface{}) Value {
if ptr, ok := x.(objptr); ok {
if ptr.id >= uint32(len(r.xref)) {
return Value{}
}
xref := r.xref[ptr.id]
if xref.ptr != ptr || !xref.inStream && xref.offset == 0 {
return Value{}
}
var obj object
if xref.inStream {
strm := r.resolve(parent, xref.stream)
Search:
for {
if strm.Kind() != Stream {
panic("not a stream")
}
if strm.Key("Type").Name() != "ObjStm" {
panic("not an object stream")
}
n := int(strm.Key("N").Int64())
first := strm.Key("First").Int64()
if first == 0 {
panic("missing First")
}
b := newBuffer(strm.Reader(), 0)
b.allowEOF = true
for i := 0; i < n; i++ {
id, _ := b.readToken().(int64)
off, _ := b.readToken().(int64)
if uint32(id) == ptr.id {
b.seekForward(first + off)
x = b.readObject()
break Search
}
}
ext := strm.Key("Extends")
if ext.Kind() != Stream {
panic("cannot find object in stream")
}
strm = ext
}
} else {
b := newBuffer(io.NewSectionReader(r.f, xref.offset, r.end-xref.offset), xref.offset)
b.key = r.key
b.useAES = r.useAES
obj = b.readObject()
def, ok := obj.(objdef)
if !ok {
panic(fmt.Errorf("loading %v: found %T instead of objdef", ptr, obj))
return Value{}
}
if def.ptr != ptr {
panic(fmt.Errorf("loading %v: found %v", ptr, def.ptr))
}
x = def.obj
}
parent = ptr
}
switch x := x.(type) {
case nil, bool, int64, float64, name, dict, array, stream:
return Value{r, parent, x}
case string:
return Value{r, parent, x}
default:
panic(fmt.Errorf("unexpected value type %T in resolve", x))
}
}
type errorReadCloser struct {
err error
}
func (e *errorReadCloser) Read([]byte) (int, error) {
return 0, e.err
}
func (e *errorReadCloser) Close() error {
return e.err
}
// Reader returns the data contained in the stream v.
// If v.Kind() != Stream, Reader returns a ReadCloser that
// responds to all reads with a ``stream not present'' error.
func (v Value) Reader() io.ReadCloser {
x, ok := v.data.(stream)
if !ok {
return &errorReadCloser{fmt.Errorf("stream not present")}
}
var rd io.Reader
rd = io.NewSectionReader(v.r.f, x.offset, v.Key("Length").Int64())
if v.r.key != nil {
rd = decryptStream(v.r.key, v.r.useAES, x.ptr, rd)
}
filter := v.Key("Filter")
param := v.Key("DecodeParms")
switch filter.Kind() {
default:
panic(fmt.Errorf("unsupported filter %v", filter))
case Null:
// ok
case Name:
rd = applyFilter(rd, filter.Name(), param)
case Array:
for i := 0; i < filter.Len(); i++ {
rd = applyFilter(rd, filter.Index(i).Name(), param.Index(i))
}
}
return ioutil.NopCloser(rd)
}
func applyFilter(rd io.Reader, name string, param Value) io.Reader {
switch name {
default:
panic("unknown filter " + name)
case "FlateDecode":
zr, err := zlib.NewReader(rd)
if err != nil {
panic(err)
}
pred := param.Key("Predictor")
if pred.Kind() == Null {
return zr
}
columns := param.Key("Columns").Int64()
switch pred.Int64() {
default:
if DebugOn {
fmt.Println("unknown predictor", pred)
}
panic("pred")
case 12:
return &pngUpReader{r: zr, hist: make([]byte, 1+columns), tmp: make([]byte, 1+columns)}
}
case "ASCII85Decode":
cleanASCII85 := newAlphaReader(rd)
decoder := ascii85.NewDecoder(cleanASCII85)
switch param.Keys() {
default:
if DebugOn {
fmt.Println("param=", param)
}
panic("not expected DecodeParms for ascii85")
case nil:
return decoder
}
}
}
type pngUpReader struct {
r io.Reader
hist []byte
tmp []byte
pend []byte
}
func (r *pngUpReader) Read(b []byte) (int, error) {
n := 0
for len(b) > 0 {
if len(r.pend) > 0 {
m := copy(b, r.pend)
n += m
b = b[m:]
r.pend = r.pend[m:]
continue
}
_, err := io.ReadFull(r.r, r.tmp)
if err != nil {
return n, err
}
if r.tmp[0] != 2 {
return n, fmt.Errorf("malformed PNG-Up encoding")
}
for i, b := range r.tmp {
r.hist[i] += b
}
r.pend = r.hist[1:]
}
return n, nil
}
var passwordPad = []byte{
0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A,
}
func (r *Reader) initEncrypt(password string) error {
// See PDF 32000-1:2008, §7.6.
encrypt, _ := r.resolve(objptr{}, r.trailer["Encrypt"]).data.(dict)
if encrypt["Filter"] != name("Standard") {
return fmt.Errorf("unsupported PDF: encryption filter %v", objfmt(encrypt["Filter"]))
}
n, _ := encrypt["Length"].(int64)
if n == 0 {
n = 40
}
if n%8 != 0 || n > 128 || n < 40 {
return fmt.Errorf("malformed PDF: %d-bit encryption key", n)
}
V, _ := encrypt["V"].(int64)
if V != 1 && V != 2 && (V != 4 || !okayV4(encrypt)) {
return fmt.Errorf("unsupported PDF: encryption version V=%d; %v", V, objfmt(encrypt))
}
ids, ok := r.trailer["ID"].(array)
if !ok || len(ids) < 1 {
return fmt.Errorf("malformed PDF: missing ID in trailer")
}
idstr, ok := ids[0].(string)
if !ok {
return fmt.Errorf("malformed PDF: missing ID in trailer")
}
ID := []byte(idstr)
R, _ := encrypt["R"].(int64)
if R < 2 {
return fmt.Errorf("malformed PDF: encryption revision R=%d", R)
}
if R > 4 {
return fmt.Errorf("unsupported PDF: encryption revision R=%d", R)
}
O, _ := encrypt["O"].(string)
U, _ := encrypt["U"].(string)
if len(O) != 32 || len(U) != 32 {
return fmt.Errorf("malformed PDF: missing O= or U= encryption parameters")
}
p, _ := encrypt["P"].(int64)
P := uint32(p)
// TODO: Password should be converted to Latin-1.
pw := []byte(password)
h := md5.New()
if len(pw) >= 32 {
h.Write(pw[:32])
} else {
h.Write(pw)
h.Write(passwordPad[:32-len(pw)])
}
h.Write([]byte(O))
h.Write([]byte{byte(P), byte(P >> 8), byte(P >> 16), byte(P >> 24)})
h.Write([]byte(ID))
key := h.Sum(nil)
if R >= 3 {
for i := 0; i < 50; i++ {
h.Reset()
h.Write(key[:n/8])
key = h.Sum(key[:0])
}
key = key[:n/8]
} else {
key = key[:40/8]
}
c, err := rc4.NewCipher(key)
if err != nil {
return fmt.Errorf("malformed PDF: invalid RC4 key: %v", err)
}
var u []byte
if R == 2 {
u = make([]byte, 32)
copy(u, passwordPad)
c.XORKeyStream(u, u)
} else {
h.Reset()
h.Write(passwordPad)
h.Write([]byte(ID))
u = h.Sum(nil)
c.XORKeyStream(u, u)
for i := 1; i <= 19; i++ {
key1 := make([]byte, len(key))
copy(key1, key)
for j := range key1 {
key1[j] ^= byte(i)
}
c, _ = rc4.NewCipher(key1)
c.XORKeyStream(u, u)
}
}
if !bytes.HasPrefix([]byte(U), u) {
return ErrInvalidPassword
}
r.key = key
r.useAES = V == 4
return nil
}
var ErrInvalidPassword = fmt.Errorf("encrypted PDF: invalid password")
func okayV4(encrypt dict) bool {
cf, ok := encrypt["CF"].(dict)
if !ok {
return false
}
stmf, ok := encrypt["StmF"].(name)
if !ok {
return false
}
strf, ok := encrypt["StrF"].(name)
if !ok {
return false
}
if stmf != strf {
return false
}
cfparam, ok := cf[stmf].(dict)
if cfparam["AuthEvent"] != nil && cfparam["AuthEvent"] != name("DocOpen") {
return false
}
if cfparam["Length"] != nil && cfparam["Length"] != int64(16) {
return false
}
if cfparam["CFM"] != name("AESV2") {
return false
}
return true
}
func cryptKey(key []byte, useAES bool, ptr objptr) []byte {
h := md5.New()
h.Write(key)
h.Write([]byte{byte(ptr.id), byte(ptr.id >> 8), byte(ptr.id >> 16), byte(ptr.gen), byte(ptr.gen >> 8)})
if useAES {
h.Write([]byte("sAlT"))
}
return h.Sum(nil)
}
func decryptString(key []byte, useAES bool, ptr objptr, x string) string {
key = cryptKey(key, useAES, ptr)
if useAES {
s := []byte(x)
if len(s) < aes.BlockSize {
panic("Encrypted text shorter that AES block size")
}
block, _ := aes.NewCipher(key)
iv := s[:aes.BlockSize]
s = s[aes.BlockSize:]
stream := cipher.NewCBCDecrypter(block, iv)
stream.CryptBlocks(s, s)
x = string(s)
} else {
c, _ := rc4.NewCipher(key)
data := []byte(x)
c.XORKeyStream(data, data)
x = string(data)
}
return x
}
func decryptStream(key []byte, useAES bool, ptr objptr, rd io.Reader) io.Reader {
key = cryptKey(key, useAES, ptr)
if useAES {
cb, err := aes.NewCipher(key)
if err != nil {
panic("AES: " + err.Error())
}
iv := make([]byte, 16)
io.ReadFull(rd, iv)
cbc := cipher.NewCBCDecrypter(cb, iv)
rd = &cbcReader{cbc: cbc, rd: rd, buf: make([]byte, 16)}
} else {
c, _ := rc4.NewCipher(key)
rd = &cipher.StreamReader{c, rd}
}
return rd
}
type cbcReader struct {
cbc cipher.BlockMode
rd io.Reader
buf []byte
pend []byte
}
func (r *cbcReader) Read(b []byte) (n int, err error) {
if len(r.pend) == 0 {
_, err = io.ReadFull(r.rd, r.buf)
if err != nil {
return 0, err
}
r.cbc.CryptBlocks(r.buf, r.buf)
r.pend = r.buf
}
n = copy(b, r.pend)
r.pend = r.pend[n:]
return n, nil
}
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Go
1
https://gitee.com/cfh008/pdf.git
git@gitee.com:cfh008/pdf.git
cfh008
pdf
pdf
master

搜索帮助

344bd9b3 5694891 D2dac590 5694891