1 # vim: sw=4:expandtab:foldmethod=marker
\r
3 # Copyright (c) 2006, Mathieu Fenniak
\r
4 # All rights reserved.
\r
6 # Redistribution and use in source and binary forms, with or without
\r
7 # modification, are permitted provided that the following conditions are
\r
10 # * Redistributions of source code must retain the above copyright notice,
\r
11 # this list of conditions and the following disclaimer.
\r
12 # * Redistributions in binary form must reproduce the above copyright notice,
\r
13 # this list of conditions and the following disclaimer in the documentation
\r
14 # and/or other materials provided with the distribution.
\r
15 # * The name of the author may not be used to endorse or promote products
\r
16 # derived from this software without specific prior written permission.
\r
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
\r
19 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
\r
20 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
\r
21 # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
\r
22 # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
\r
23 # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
\r
24 # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
\r
25 # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
\r
26 # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
\r
27 # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
\r
28 # POSSIBILITY OF SUCH DAMAGE.
\r
32 Implementation of stream filters for PDF.
\r
34 __author__ = "Mathieu Fenniak"
\r
35 __author_email__ = "biziqe@mathieu.fenniak.net"
\r
37 from utils import PdfReadError
\r
39 from cStringIO import StringIO
\r
41 from StringIO import StringIO
\r
45 def decompress(data):
\r
46 return zlib.decompress(data)
\r
48 return zlib.compress(data)
\r
50 # Unable to import zlib. Attempt to use the System.IO.Compression
\r
51 # library from the .NET framework. (IronPython only)
\r
53 from System import IO, Collections, Array
\r
54 def _string_to_bytearr(buf):
\r
55 retval = Array.CreateInstance(System.Byte, len(buf))
\r
56 for i in range(len(buf)):
\r
57 retval[i] = ord(buf[i])
\r
59 def _bytearr_to_string(bytes):
\r
61 for i in range(bytes.Length):
\r
62 retval += chr(bytes[i])
\r
64 def _read_bytes(stream):
\r
65 ms = IO.MemoryStream()
\r
66 buf = Array.CreateInstance(System.Byte, 2048)
\r
68 bytes = stream.Read(buf, 0, buf.Length)
\r
72 ms.Write(buf, 0, bytes)
\r
73 retval = ms.ToArray()
\r
76 def decompress(data):
\r
77 bytes = _string_to_bytearr(data)
\r
78 ms = IO.MemoryStream()
\r
79 ms.Write(bytes, 0, bytes.Length)
\r
80 ms.Position = 0 # fseek 0
\r
81 gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Decompress)
\r
82 bytes = _read_bytes(gz)
\r
83 retval = _bytearr_to_string(bytes)
\r
87 bytes = _string_to_bytearr(data)
\r
88 ms = IO.MemoryStream()
\r
89 gz = IO.Compression.DeflateStream(ms, IO.Compression.CompressionMode.Compress, True)
\r
90 gz.Write(bytes, 0, bytes.Length)
\r
92 ms.Position = 0 # fseek 0
\r
93 bytes = ms.ToArray()
\r
94 retval = _bytearr_to_string(bytes)
\r
99 class FlateDecode(object):
\r
100 def decode(data, decodeParms):
\r
101 data = decompress(data)
\r
104 predictor = decodeParms.get("/Predictor", 1)
\r
105 # predictor 1 == no predictor
\r
107 columns = decodeParms["/Columns"]
\r
109 if predictor >= 10 and predictor <= 15:
\r
110 output = StringIO()
\r
111 # PNG prediction can vary from row to row
\r
112 rowlength = columns + 1
\r
113 assert len(data) % rowlength == 0
\r
114 prev_rowdata = (0,) * rowlength
\r
115 for row in xrange(len(data) / rowlength):
\r
116 rowdata = [ord(x) for x in data[(row*rowlength):((row+1)*rowlength)]]
\r
117 filterByte = rowdata[0]
\r
118 if filterByte == 0:
\r
120 elif filterByte == 1:
\r
121 for i in range(2, rowlength):
\r
122 rowdata[i] = (rowdata[i] + rowdata[i-1]) % 256
\r
123 elif filterByte == 2:
\r
124 for i in range(1, rowlength):
\r
125 rowdata[i] = (rowdata[i] + prev_rowdata[i]) % 256
\r
127 # unsupported PNG filter
\r
128 raise PdfReadError("Unsupported PNG filter %r" % filterByte)
\r
129 prev_rowdata = rowdata
\r
130 output.write(''.join([chr(x) for x in rowdata[1:]]))
\r
131 data = output.getvalue()
\r
133 # unsupported predictor
\r
134 raise PdfReadError("Unsupported flatedecode predictor %r" % predictor)
\r
136 decode = staticmethod(decode)
\r
139 return compress(data)
\r
140 encode = staticmethod(encode)
\r
142 class ASCIIHexDecode(object):
\r
143 def decode(data, decodeParms=None):
\r
156 retval += chr(int(char, base=16))
\r
161 decode = staticmethod(decode)
\r
163 class ASCII85Decode(object):
\r
164 def decode(data, decodeParms=None):
\r
169 # remove all whitespace from data
\r
170 data = [y for y in data if not (y in ' \n\r\t')]
\r
173 if len(retval) == 0 and c == "<" and data[x+1] == "~":
\r
180 assert len(group) == 0
\r
181 retval += '\x00\x00\x00\x00'
\r
183 elif c == "~" and data[x+1] == ">":
\r
184 if len(group) != 0:
\r
185 # cannot have a final group of just 1 char
\r
186 assert len(group) > 1
\r
187 cnt = len(group) - 1
\r
188 group += [ 85, 85, 85 ]
\r
194 assert c >= 0 and c < 85
\r
196 if len(group) >= 5:
\r
197 b = group[0] * (85**4) + \
\r
198 group[1] * (85**3) + \
\r
199 group[2] * (85**2) + \
\r
202 assert b < (2**32 - 1)
\r
203 c4 = chr((b >> 0) % 256)
\r
204 c3 = chr((b >> 8) % 256)
\r
205 c2 = chr((b >> 16) % 256)
\r
207 retval += (c1 + c2 + c3 + c4)
\r
209 retval = retval[:-4+hitEod]
\r
213 decode = staticmethod(decode)
\r
215 def decodeStreamData(stream):
\r
216 from generic import NameObject
\r
217 filters = stream.get("/Filter", ())
\r
218 if len(filters) and not isinstance(filters[0], NameObject):
\r
219 # we have a single filter instance
\r
220 filters = (filters,)
\r
221 data = stream._data
\r
222 for filterType in filters:
\r
223 if filterType == "/FlateDecode":
\r
224 data = FlateDecode.decode(data, stream.get("/DecodeParms"))
\r
225 elif filterType == "/ASCIIHexDecode":
\r
226 data = ASCIIHexDecode.decode(data)
\r
227 elif filterType == "/ASCII85Decode":
\r
228 data = ASCII85Decode.decode(data)
\r
229 elif filterType == "/Crypt":
\r
230 decodeParams = stream.get("/DecodeParams", {})
\r
231 if "/Name" not in decodeParams and "/Type" not in decodeParams:
\r
234 raise NotImplementedError("/Crypt filter with /Name or /Type not supported yet")
\r
236 # unsupported filter
\r
237 raise NotImplementedError("unsupported filter %s" % filterType)
\r
240 if __name__ == "__main__":
\r
241 assert "abc" == ASCIIHexDecode.decode('61\n626\n3>')
\r
244 <~9jqo^BlbD-BleB1DJ+*+F(f,q/0JhKF<GL>Cj@.4Gp$d7F!,L7@<6@)/0JDEF<G%<+EV:2F!,
\r
245 O<DJ+*.@<*K0@<6L(Df-\\0Ec5e;DffZ(EZee.Bl.9pF"AGXBPCsi+DGm>@3BB/F*&OCAfu2/AKY
\r
246 i(DIb:@FD,*)+C]U=@3BN#EcYf8ATD3s@q?d$AftVqCh[NqF<G:8+EV:.+Cf>-FD5W8ARlolDIa
\r
247 l(DId<j@<?3r@:F%a+D58'ATD4$Bl@l3De:,-DJs`8ARoFb/0JMK@qB4^F!,R<AKZ&-DfTqBG%G
\r
248 >uD.RTpAKYo'+CT/5+Cei#DII?(E,9)oF*2M7/c~>
\r
250 ascii85_originalText="Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure."
\r
251 assert ASCII85Decode.decode(ascii85Test) == ascii85_originalText
\r