1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 import sys
22 import struct
23 from datetime import datetime
24 from functools import partial
25
26 verbose = False
27
28
30 """
31 TODO(wb): replace with logging
32 """
33 global verbose
34 if verbose:
35 print "# [d] %s" % (", ".join(map(str, message)))
36
37
39 """
40 TODO(wb): replace with logging
41 """
42 print "# [w] %s" % (message)
43
44
46 """
47 TODO(wb): replace with logging
48 """
49 print "# [i] %s" % (message)
50
51
53 """
54 TODO(wb): replace with logging
55 """
56 print "# [e] %s" % (message)
57 sys.exit(-1)
58
59
61 """
62 see:
63 http://code.activestate.com/recipes/142812-hex-dumper/
64 @param src A bytestring containing the data to dump.
65 @param start_addr An integer representing the start
66 address of the data in whatever context it comes from.
67 @return A string containing a classic hex dump with 16
68 bytes per line. If start_addr is provided, then the
69 data is interpreted as starting at this offset, and
70 the offset column is updated accordingly.
71 """
72 FILTER = ''.join([(len(repr(chr(x))) == 3) and
73 chr(x) or
74 '.' for x in range(256)])
75 length = 16
76 result = []
77
78 remainder_start_addr = start_addr
79
80 if start_addr % length != 0:
81 base_addr = start_addr - (start_addr % length)
82 num_spaces = (start_addr % length)
83 num_chars = length - (start_addr % length)
84
85 spaces = " ".join([" " for i in xrange(num_spaces)])
86 s = src[0:num_chars]
87 hexa = ' '.join(["%02X" % ord(x) for x in s])
88 printable = s.translate(FILTER)
89
90 result.append("%04X %s %s %s%s\n" %
91 (base_addr, spaces, hexa,
92 " " * (num_spaces + 1), printable))
93
94 src = src[num_chars:]
95 remainder_start_addr = base_addr + length
96
97 for i in xrange(0, len(src), length):
98 s = src[i:i + length]
99 hexa = ' '.join(["%02X" % ord(x) for x in s])
100 printable = s.translate(FILTER)
101 result.append("%04X %-*s %s\n" %
102 (remainder_start_addr + i, length * 3,
103 hexa, printable))
104
105 return ''.join(result)
106
107
109 """cache the return value of a method
110
111 From http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/
112
113 This class is meant to be used as a decorator of methods. The return value
114 from a given method invocation will be cached on the instance whose method
115 was invoked. All arguments passed to a method decorated with memoize must
116 be hashable.
117
118 If a memoized method is invoked directly on its class the result will not
119 be cached. Instead the method will be invoked like a static method:
120 class Obj(object):
121 @memoize
122 def add_to(self, arg):
123 return self + arg
124 Obj.add_to(1) # not enough arguments
125 Obj.add_to(1, 2) # returns 3, result is not cached
126 """
129
130 - def __get__(self, obj, objtype=None):
131 if obj is None:
132 return self.func
133 return partial(self, obj)
134
136 obj = args[0]
137 try:
138 cache = obj.__cache
139 except AttributeError:
140 cache = obj.__cache = {}
141 key = (self.func, args[1:], frozenset(kw.items()))
142 try:
143 res = cache[key]
144 except KeyError:
145 res = cache[key] = self.func(*args, **kw)
146 return res
147
148
149 -def align(offset, alignment):
150 """
151 Return the offset aligned to the nearest greater given alignment
152 Arguments:
153 - `offset`: An integer
154 - `alignment`: An integer
155 """
156 if offset % alignment == 0:
157 return offset
158 return offset + (alignment - (offset % alignment))
159
160
162 """
163 `dosdate`: 2 bytes, little endian.
164 `dostime`: 2 bytes, little endian.
165 returns: datetime.datetime or datetime.datetime.min on error
166 """
167 try:
168 t = ord(dosdate[1]) << 8
169 t |= ord(dosdate[0])
170 day = t & 0b0000000000011111
171 month = (t & 0b0000000111100000) >> 5
172 year = (t & 0b1111111000000000) >> 9
173 year += 1980
174
175 t = ord(dostime[1]) << 8
176 t |= ord(dostime[0])
177 sec = t & 0b0000000000011111
178 sec *= 2
179 minute = (t & 0b0000011111100000) >> 5
180 hour = (t & 0b1111100000000000) >> 11
181
182 return datetime.datetime(year, month, day, hour, minute, sec)
183 except:
184 return datetime.datetime.min
185
186
188
189 return datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600)
190
191
193 """
194 Base Exception class for binary parsing.
195 """
204
206 return "BinaryParserException(%r)" % (self._value)
207
209 return "Binary Parser Exception: %s" % (self._value)
210
211
213 """
214 An exception to be thrown during binary parsing, such as
215 when an invalid header is encountered.
216 """
224
226 return "ParseException(%r)" % (self._value)
227
229 return "Parse Exception(%s)" % (self._value)
230
231
234 tvalue = "read: %s, buffer length: %s" % (hex(readOffs), hex(bufLen))
235 super(ParseException, self).__init__(tvalue)
236
238 return "OverrunBufferException(%r)" % (self._value)
239
241 return "Tried to parse beyond the end of the file (%s)" % \
242 (self._value)
243
244
246 """
247 Base class for structure blocks in binary parsing.
248 A block is associated with a offset into a byte-string.
249 """
251 """
252 Constructor.
253 Arguments:
254 - `buf`: Byte string containing stuff to parse.
255 - `offset`: The offset into the buffer at which the block starts.
256 """
257 self._buf = buf
258 self._offset = offset
259 self._implicit_offset = 0
260
261
263 return "Block(buf=%r, offset=%r)" % (self._buf, self._offset)
264
266 return u"BLOCK @ %s." % (hex(self.offset()))
267
269 return str(unicode(self))
270
272 """
273 Declaratively add fields to this block.
274 This method will dynamically add corresponding
275 offset and unpacker methods to this block.
276 Arguments:
277 - `type`: A string. Should be one of the unpack_* types.
278 - `name`: A string.
279 - `offset`: A number.
280 - `length`: (Optional) A number. For (w)strings, length in chars.
281 """
282 if offset == None:
283 offset = self._implicit_offset
284 if length == None:
285
286 def no_length_handler():
287 f = getattr(self, "unpack_" + type)
288 return f(offset)
289 setattr(self, name, no_length_handler)
290 else:
291
292 def explicit_length_handler():
293 f = getattr(self, "unpack_" + type)
294 return f(offset, length)
295 setattr(self, name, explicit_length_handler)
296
297 setattr(self, "_off_" + name, offset)
298 if type == "byte":
299 self._implicit_offset = offset + 1
300 elif type == "int8":
301 self._implicit_offset = offset + 1
302 elif type == "word":
303 self._implicit_offset = offset + 2
304 elif type == "word_be":
305 self._implicit_offset = offset + 2
306 elif type == "int16":
307 self._implicit_offset = offset + 2
308 elif type == "dword":
309 self._implicit_offset = offset + 4
310 elif type == "dword_be":
311 self._implicit_offset = offset + 4
312 elif type == "int32":
313 self._implicit_offset = offset + 4
314 elif type == "qword":
315 self._implicit_offset = offset + 8
316 elif type == "int64":
317 self._implicit_offset = offset + 8
318 elif type == "float":
319 self._implicit_offset = offset + 4
320 elif type == "double":
321 self._implicit_offset = offset + 8
322 elif type == "dosdate":
323 self._implicit_offset = offset + 4
324 elif type == "filetime":
325 self._implicit_offset = offset + 8
326 elif type == "systemtime":
327 self._implicit_offset = offset + 8
328 elif type == "guid":
329 self._implicit_offset = offset + 16
330 elif type == "binary":
331 self._implicit_offset = offset + length
332 elif type == "string" and length != None:
333 self._implicit_offset = offset + length
334 elif type == "wstring" and length != None:
335 self._implicit_offset = offset + (2 * length)
336 elif "string" in type and length == None:
337 raise ParseException("Implicit offset not supported "
338 "for dynamic length strings")
339 else:
340 raise ParseException("Implicit offset not supported "
341 "for type: " + type)
342
344 return self._implicit_offset
345
347 """
348 Returns a little-endian unsigned byte from the relative offset.
349 Arguments:
350 - `offset`: The relative offset from the start of the block.
351 Throws:
352 - `OverrunBufferException`
353 """
354 o = self._offset + offset
355 try:
356 return struct.unpack_from("<B", self._buf, o)[0]
357 except struct.error:
358 raise OverrunBufferException(o, len(self._buf))
359
361 """
362 Returns a little-endian signed byte from the relative offset.
363 Arguments:
364 - `offset`: The relative offset from the start of the block.
365 Throws:
366 - `OverrunBufferException`
367 """
368 o = self._offset + offset
369 try:
370 return struct.unpack_from("<b", self._buf, o)[0]
371 except struct.error:
372 raise OverrunBufferException(o, len(self._buf))
373
375 """
376 Returns a little-endian unsigned WORD (2 bytes) from the
377 relative offset.
378 Arguments:
379 - `offset`: The relative offset from the start of the block.
380 Throws:
381 - `OverrunBufferException`
382 """
383 o = self._offset + offset
384 try:
385 return struct.unpack_from("<H", self._buf, o)[0]
386 except struct.error:
387 raise OverrunBufferException(o, len(self._buf))
388
390 """
391 Returns a big-endian unsigned WORD (2 bytes) from the
392 relative offset.
393 Arguments:
394 - `offset`: The relative offset from the start of the block.
395 Throws:
396 - `OverrunBufferException`
397 """
398 o = self._offset + offset
399 try:
400 return struct.unpack_from(">H", self._buf, o)[0]
401 except struct.error:
402 raise OverrunBufferException(o, len(self._buf))
403
405 """
406 Returns a little-endian signed WORD (2 bytes) from the
407 relative offset.
408 Arguments:
409 - `offset`: The relative offset from the start of the block.
410 Throws:
411 - `OverrunBufferException`
412 """
413 o = self._offset + offset
414 try:
415 return struct.unpack_from("<h", self._buf, o)[0]
416 except struct.error:
417 raise OverrunBufferException(o, len(self._buf))
418
420 """
421 Applies the little-endian WORD (2 bytes) to the relative offset.
422 Arguments:
423 - `offset`: The relative offset from the start of the block.
424 - `word`: The data to apply.
425 """
426 o = self._offset + offset
427 return struct.pack_into("<H", self._buf, o, word)
428
430 """
431 Returns a little-endian DWORD (4 bytes) from the relative offset.
432 Arguments:
433 - `offset`: The relative offset from the start of the block.
434 Throws:
435 - `OverrunBufferException`
436 """
437 o = self._offset + offset
438 try:
439 return struct.unpack_from("<I", self._buf, o)[0]
440 except struct.error:
441 raise OverrunBufferException(o, len(self._buf))
442
444 """
445 Returns a big-endian DWORD (4 bytes) from the relative offset.
446 Arguments:
447 - `offset`: The relative offset from the start of the block.
448 Throws:
449 - `OverrunBufferException`
450 """
451 o = self._offset + offset
452 try:
453 return struct.unpack_from(">I", self._buf, o)[0]
454 except struct.error:
455 raise OverrunBufferException(o, len(self._buf))
456
458 """
459 Returns a little-endian signed integer (4 bytes) from the
460 relative offset.
461 Arguments:
462 - `offset`: The relative offset from the start of the block.
463 Throws:
464 - `OverrunBufferException`
465 """
466 o = self._offset + offset
467 try:
468 return struct.unpack_from("<i", self._buf, o)[0]
469 except struct.error:
470 raise OverrunBufferException(o, len(self._buf))
471
473 """
474 Returns a little-endian QWORD (8 bytes) from the relative offset.
475 Arguments:
476 - `offset`: The relative offset from the start of the block.
477 Throws:
478 - `OverrunBufferException`
479 """
480 o = self._offset + offset
481 try:
482 return struct.unpack_from("<Q", self._buf, o)[0]
483 except struct.error:
484 raise OverrunBufferException(o, len(self._buf))
485
487 """
488 Returns a little-endian signed 64-bit integer (8 bytes) from
489 the relative offset.
490 Arguments:
491 - `offset`: The relative offset from the start of the block.
492 Throws:
493 - `OverrunBufferException`
494 """
495 o = self._offset + offset
496 try:
497 return struct.unpack_from("<q", self._buf, o)[0]
498 except struct.error:
499 raise OverrunBufferException(o, len(self._buf))
500
502 """
503 Returns a single-precision float (4 bytes) from
504 the relative offset. IEEE 754 format.
505 Arguments:
506 - `offset`: The relative offset from the start of the block.
507 Throws:
508 - `OverrunBufferException`
509 """
510 o = self._offset + offset
511 try:
512 return struct.unpack_from("<f", self._buf, o)[0]
513 except struct.error:
514 raise OverrunBufferException(o, len(self._buf))
515
517 """
518 Returns a double-precision float (8 bytes) from
519 the relative offset. IEEE 754 format.
520 Arguments:
521 - `offset`: The relative offset from the start of the block.
522 Throws:
523 - `OverrunBufferException`
524 """
525 o = self._offset + offset
526 try:
527 return struct.unpack_from("<d", self._buf, o)[0]
528 except struct.error:
529 raise OverrunBufferException(o, len(self._buf))
530
532 """
533 Returns raw binary data from the relative offset with the given length.
534 Arguments:
535 - `offset`: The relative offset from the start of the block.
536 - `length`: The length of the binary blob. If zero, the empty string
537 zero length is returned.
538 Throws:
539 - `OverrunBufferException`
540 """
541 if not length:
542 return ""
543 o = self._offset + offset
544 try:
545 return struct.unpack_from("<%ds" % (length), self._buf, o)[0]
546 except struct.error:
547 raise OverrunBufferException(o, len(self._buf))
548
550 """
551 Returns a string from the relative offset with the given length.
552 Arguments:
553 - `offset`: The relative offset from the start of the block.
554 - `length`: The length of the string.
555 Throws:
556 - `OverrunBufferException`
557 """
558 return self.unpack_binary(offset, length)
559
561 """
562 Returns a string from the relative offset with the given length,
563 where each character is a wchar (2 bytes)
564 Arguments:
565 - `offset`: The relative offset from the start of the block.
566 - `length`: The length of the string.
567 Throws:
568 - `UnicodeDecodeError`
569 """
570 try:
571 return self._buf[self._offset + offset:self._offset + offset + \
572 2 * length].tostring().decode("utf16")
573 except AttributeError:
574 return self._buf[self._offset + offset:self._offset + offset + \
575 2 * length].decode("utf16")
576
578 """
579 Returns a datetime from the DOSDATE and DOSTIME starting at
580 the relative offset.
581 Arguments:
582 - `offset`: The relative offset from the start of the block.
583 Throws:
584 - `OverrunBufferException`
585 """
586 try:
587 o = self._offset + offset
588 return dosdate(self._buf[o:o + 2], self._buf[o + 2:o + 4])
589 except struct.error:
590 raise OverrunBufferException(o, len(self._buf))
591
593 """
594 Returns a datetime from the QWORD Windows timestamp starting at
595 the relative offset.
596 Arguments:
597 - `offset`: The relative offset from the start of the block.
598 Throws:
599 - `OverrunBufferException`
600 """
601 return parse_filetime(self.unpack_qword(offset))
602
604 """
605 Returns a datetime from the QWORD Windows SYSTEMTIME timestamp
606 starting at the relative offset.
607 See http://msdn.microsoft.com/en-us/library/ms724950%28VS.85%29.aspx
608 Arguments:
609 - `offset`: The relative offset from the start of the block.
610 Throws:
611 - `OverrunBufferException`
612 """
613 o = self._offset + offset
614 try:
615 parts = struct.unpack_from("<WWWWWWWW", self._buf, o)
616 except struct.error:
617 raise OverrunBufferException(o, len(self._buf))
618 return datetime.datetime(parts[0], parts[1],
619 parts[3],
620 parts[4], parts[5],
621 parts[6], parts[7])
622
624 """
625 Returns a string containing a GUID starting at the relative offset.
626 Arguments:
627 - `offset`: The relative offset from the start of the block.
628 Throws:
629 - `OverrunBufferException`
630 """
631 o = self._offset + offset
632
633 try:
634 _bin = self._buf[o:o + 16]
635 except IndexError:
636 raise OverrunBufferException(o, len(self._buf))
637
638
639 h = map(ord, _bin)
640 return "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x" % \
641 (h[3], h[2], h[1], h[0],
642 h[5], h[4],
643 h[7], h[6],
644 h[8], h[9],
645 h[10], h[11], h[12], h[13], h[14], h[15])
646
648 """
649 Get the absolute offset from an offset relative to this block
650 Arguments:
651 - `offset`: The relative offset into this block.
652 """
653 return self._offset + offset
654
656 """
657 Equivalent to self.absolute_offset(0x0), which is the starting
658 offset of this block.
659 """
660 return self._offset
661