Package Evtx :: Module BinaryParser
[hide private]
[frames] | no frames]

Source Code for Module Evtx.BinaryParser

  1  #!/usr/bin/python 
  2  #    This file is part of python-evtx. 
  3  # 
  4  #   Copyright 2012, 2013 Willi Ballenthin <william.ballenthin@mandiant.com> 
  5  #                    while at Mandiant <http://www.mandiant.com> 
  6  # 
  7  #   Licensed under the Apache License, Version 2.0 (the "License"); 
  8  #   you may not use this file except in compliance with the License. 
  9  #   You may obtain a copy of the License at 
 10  # 
 11  #       http://www.apache.org/licenses/LICENSE-2.0 
 12  # 
 13  #   Unless required by applicable law or agreed to in writing, software 
 14  #   distributed under the License is distributed on an "AS IS" BASIS, 
 15  #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 16  #   See the License for the specific language governing permissions and 
 17  #   limitations under the License. 
 18  # 
 19  #   Version v.0.3.0 
 20   
 21  import sys 
 22  import struct 
 23  from datetime import datetime 
 24  from functools import partial 
 25   
 26  verbose = False 
 27   
 28   
29 -def debug(*message):
30 """ 31 TODO(wb): replace with logging 32 """ 33 global verbose 34 if verbose: 35 print "# [d] %s" % (", ".join(map(str, message)))
36 37
38 -def warning(message):
39 """ 40 TODO(wb): replace with logging 41 """ 42 print "# [w] %s" % (message)
43 44
45 -def info(message):
46 """ 47 TODO(wb): replace with logging 48 """ 49 print "# [i] %s" % (message)
50 51
52 -def error(message):
53 """ 54 TODO(wb): replace with logging 55 """ 56 print "# [e] %s" % (message) 57 sys.exit(-1)
58 59
60 -def hex_dump(src, start_addr=0):
61 """ 62 see: 63 http://code.activestate.com/recipes/142812-hex-dumper/ 64 @param src A bytestring containing the data to dump. 65 @param start_addr An integer representing the start 66 address of the data in whatever context it comes from. 67 @return A string containing a classic hex dump with 16 68 bytes per line. If start_addr is provided, then the 69 data is interpreted as starting at this offset, and 70 the offset column is updated accordingly. 71 """ 72 FILTER = ''.join([(len(repr(chr(x))) == 3) and 73 chr(x) or 74 '.' for x in range(256)]) 75 length = 16 76 result = [] 77 78 remainder_start_addr = start_addr 79 80 if start_addr % length != 0: 81 base_addr = start_addr - (start_addr % length) 82 num_spaces = (start_addr % length) 83 num_chars = length - (start_addr % length) 84 85 spaces = " ".join([" " for i in xrange(num_spaces)]) 86 s = src[0:num_chars] 87 hexa = ' '.join(["%02X" % ord(x) for x in s]) 88 printable = s.translate(FILTER) 89 90 result.append("%04X %s %s %s%s\n" % 91 (base_addr, spaces, hexa, 92 " " * (num_spaces + 1), printable)) 93 94 src = src[num_chars:] 95 remainder_start_addr = base_addr + length 96 97 for i in xrange(0, len(src), length): 98 s = src[i:i + length] 99 hexa = ' '.join(["%02X" % ord(x) for x in s]) 100 printable = s.translate(FILTER) 101 result.append("%04X %-*s %s\n" % 102 (remainder_start_addr + i, length * 3, 103 hexa, printable)) 104 105 return ''.join(result)
106 107
108 -class memoize(object):
109 """cache the return value of a method 110 111 From http://code.activestate.com/recipes/577452-a-memoize-decorator-for-instance-methods/ 112 113 This class is meant to be used as a decorator of methods. The return value 114 from a given method invocation will be cached on the instance whose method 115 was invoked. All arguments passed to a method decorated with memoize must 116 be hashable. 117 118 If a memoized method is invoked directly on its class the result will not 119 be cached. Instead the method will be invoked like a static method: 120 class Obj(object): 121 @memoize 122 def add_to(self, arg): 123 return self + arg 124 Obj.add_to(1) # not enough arguments 125 Obj.add_to(1, 2) # returns 3, result is not cached 126 """
127 - def __init__(self, func):
128 self.func = func
129
130 - def __get__(self, obj, objtype=None):
131 if obj is None: 132 return self.func 133 return partial(self, obj)
134
135 - def __call__(self, *args, **kw):
136 obj = args[0] 137 try: 138 cache = obj.__cache 139 except AttributeError: 140 cache = obj.__cache = {} 141 key = (self.func, args[1:], frozenset(kw.items())) 142 try: 143 res = cache[key] 144 except KeyError: 145 res = cache[key] = self.func(*args, **kw) 146 return res
147 148
149 -def align(offset, alignment):
150 """ 151 Return the offset aligned to the nearest greater given alignment 152 Arguments: 153 - `offset`: An integer 154 - `alignment`: An integer 155 """ 156 if offset % alignment == 0: 157 return offset 158 return offset + (alignment - (offset % alignment))
159 160
161 -def dosdate(dosdate, dostime):
162 """ 163 `dosdate`: 2 bytes, little endian. 164 `dostime`: 2 bytes, little endian. 165 returns: datetime.datetime or datetime.datetime.min on error 166 """ 167 try: 168 t = ord(dosdate[1]) << 8 169 t |= ord(dosdate[0]) 170 day = t & 0b0000000000011111 171 month = (t & 0b0000000111100000) >> 5 172 year = (t & 0b1111111000000000) >> 9 173 year += 1980 174 175 t = ord(dostime[1]) << 8 176 t |= ord(dostime[0]) 177 sec = t & 0b0000000000011111 178 sec *= 2 179 minute = (t & 0b0000011111100000) >> 5 180 hour = (t & 0b1111100000000000) >> 11 181 182 return datetime.datetime(year, month, day, hour, minute, sec) 183 except: 184 return datetime.datetime.min
185 186
187 -def parse_filetime(qword):
188 # see http://integriography.wordpress.com/2010/01/16/using-phython-to-parse-and-present-windows-64-bit-timestamps/ 189 return datetime.utcfromtimestamp(float(qword) * 1e-7 - 11644473600)
190 191
192 -class BinaryParserException(Exception):
193 """ 194 Base Exception class for binary parsing. 195 """
196 - def __init__(self, value):
197 """ 198 Constructor. 199 Arguments: 200 - `value`: A string description. 201 """ 202 super(BinaryParserException, self).__init__() 203 self._value = value
204
205 - def __repr__(self):
206 return "BinaryParserException(%r)" % (self._value)
207
208 - def __str__(self):
209 return "Binary Parser Exception: %s" % (self._value)
210 211
212 -class ParseException(BinaryParserException):
213 """ 214 An exception to be thrown during binary parsing, such as 215 when an invalid header is encountered. 216 """
217 - def __init__(self, value):
218 """ 219 Constructor. 220 Arguments: 221 - `value`: A string description. 222 """ 223 super(ParseException, self).__init__(value)
224
225 - def __repr__(self):
226 return "ParseException(%r)" % (self._value)
227
228 - def __str__(self):
229 return "Parse Exception(%s)" % (self._value)
230 231
232 -class OverrunBufferException(ParseException):
233 - def __init__(self, readOffs, bufLen):
234 tvalue = "read: %s, buffer length: %s" % (hex(readOffs), hex(bufLen)) 235 super(ParseException, self).__init__(tvalue)
236
237 - def __repr__(self):
238 return "OverrunBufferException(%r)" % (self._value)
239
240 - def __str__(self):
241 return "Tried to parse beyond the end of the file (%s)" % \ 242 (self._value)
243 244
245 -class Block(object):
246 """ 247 Base class for structure blocks in binary parsing. 248 A block is associated with a offset into a byte-string. 249 """
250 - def __init__(self, buf, offset):
251 """ 252 Constructor. 253 Arguments: 254 - `buf`: Byte string containing stuff to parse. 255 - `offset`: The offset into the buffer at which the block starts. 256 """ 257 self._buf = buf 258 self._offset = offset 259 self._implicit_offset = 0
260 #print "-- OBJECT: %s" % self.__class__.__name__ 261
262 - def __repr__(self):
263 return "Block(buf=%r, offset=%r)" % (self._buf, self._offset)
264
265 - def __unicode__(self):
266 return u"BLOCK @ %s." % (hex(self.offset()))
267
268 - def __str__(self):
269 return str(unicode(self))
270
271 - def declare_field(self, type, name, offset=None, length=None):
272 """ 273 Declaratively add fields to this block. 274 This method will dynamically add corresponding 275 offset and unpacker methods to this block. 276 Arguments: 277 - `type`: A string. Should be one of the unpack_* types. 278 - `name`: A string. 279 - `offset`: A number. 280 - `length`: (Optional) A number. For (w)strings, length in chars. 281 """ 282 if offset == None: 283 offset = self._implicit_offset 284 if length == None: 285 286 def no_length_handler(): 287 f = getattr(self, "unpack_" + type) 288 return f(offset)
289 setattr(self, name, no_length_handler) 290 else: 291 292 def explicit_length_handler(): 293 f = getattr(self, "unpack_" + type) 294 return f(offset, length)
295 setattr(self, name, explicit_length_handler) 296 297 setattr(self, "_off_" + name, offset) 298 if type == "byte": 299 self._implicit_offset = offset + 1 300 elif type == "int8": 301 self._implicit_offset = offset + 1 302 elif type == "word": 303 self._implicit_offset = offset + 2 304 elif type == "word_be": 305 self._implicit_offset = offset + 2 306 elif type == "int16": 307 self._implicit_offset = offset + 2 308 elif type == "dword": 309 self._implicit_offset = offset + 4 310 elif type == "dword_be": 311 self._implicit_offset = offset + 4 312 elif type == "int32": 313 self._implicit_offset = offset + 4 314 elif type == "qword": 315 self._implicit_offset = offset + 8 316 elif type == "int64": 317 self._implicit_offset = offset + 8 318 elif type == "float": 319 self._implicit_offset = offset + 4 320 elif type == "double": 321 self._implicit_offset = offset + 8 322 elif type == "dosdate": 323 self._implicit_offset = offset + 4 324 elif type == "filetime": 325 self._implicit_offset = offset + 8 326 elif type == "systemtime": 327 self._implicit_offset = offset + 8 328 elif type == "guid": 329 self._implicit_offset = offset + 16 330 elif type == "binary": 331 self._implicit_offset = offset + length 332 elif type == "string" and length != None: 333 self._implicit_offset = offset + length 334 elif type == "wstring" and length != None: 335 self._implicit_offset = offset + (2 * length) 336 elif "string" in type and length == None: 337 raise ParseException("Implicit offset not supported " 338 "for dynamic length strings") 339 else: 340 raise ParseException("Implicit offset not supported " 341 "for type: " + type) 342
343 - def current_field_offset(self):
344 return self._implicit_offset
345
346 - def unpack_byte(self, offset):
347 """ 348 Returns a little-endian unsigned byte from the relative offset. 349 Arguments: 350 - `offset`: The relative offset from the start of the block. 351 Throws: 352 - `OverrunBufferException` 353 """ 354 o = self._offset + offset 355 try: 356 return struct.unpack_from("<B", self._buf, o)[0] 357 except struct.error: 358 raise OverrunBufferException(o, len(self._buf))
359
360 - def unpack_int8(self, offset):
361 """ 362 Returns a little-endian signed byte from the relative offset. 363 Arguments: 364 - `offset`: The relative offset from the start of the block. 365 Throws: 366 - `OverrunBufferException` 367 """ 368 o = self._offset + offset 369 try: 370 return struct.unpack_from("<b", self._buf, o)[0] 371 except struct.error: 372 raise OverrunBufferException(o, len(self._buf))
373
374 - def unpack_word(self, offset):
375 """ 376 Returns a little-endian unsigned WORD (2 bytes) from the 377 relative offset. 378 Arguments: 379 - `offset`: The relative offset from the start of the block. 380 Throws: 381 - `OverrunBufferException` 382 """ 383 o = self._offset + offset 384 try: 385 return struct.unpack_from("<H", self._buf, o)[0] 386 except struct.error: 387 raise OverrunBufferException(o, len(self._buf))
388
389 - def unpack_word_be(self, offset):
390 """ 391 Returns a big-endian unsigned WORD (2 bytes) from the 392 relative offset. 393 Arguments: 394 - `offset`: The relative offset from the start of the block. 395 Throws: 396 - `OverrunBufferException` 397 """ 398 o = self._offset + offset 399 try: 400 return struct.unpack_from(">H", self._buf, o)[0] 401 except struct.error: 402 raise OverrunBufferException(o, len(self._buf))
403
404 - def unpack_int16(self, offset):
405 """ 406 Returns a little-endian signed WORD (2 bytes) from the 407 relative offset. 408 Arguments: 409 - `offset`: The relative offset from the start of the block. 410 Throws: 411 - `OverrunBufferException` 412 """ 413 o = self._offset + offset 414 try: 415 return struct.unpack_from("<h", self._buf, o)[0] 416 except struct.error: 417 raise OverrunBufferException(o, len(self._buf))
418
419 - def pack_word(self, offset, word):
420 """ 421 Applies the little-endian WORD (2 bytes) to the relative offset. 422 Arguments: 423 - `offset`: The relative offset from the start of the block. 424 - `word`: The data to apply. 425 """ 426 o = self._offset + offset 427 return struct.pack_into("<H", self._buf, o, word)
428
429 - def unpack_dword(self, offset):
430 """ 431 Returns a little-endian DWORD (4 bytes) from the relative offset. 432 Arguments: 433 - `offset`: The relative offset from the start of the block. 434 Throws: 435 - `OverrunBufferException` 436 """ 437 o = self._offset + offset 438 try: 439 return struct.unpack_from("<I", self._buf, o)[0] 440 except struct.error: 441 raise OverrunBufferException(o, len(self._buf))
442
443 - def unpack_dword_be(self, offset):
444 """ 445 Returns a big-endian DWORD (4 bytes) from the relative offset. 446 Arguments: 447 - `offset`: The relative offset from the start of the block. 448 Throws: 449 - `OverrunBufferException` 450 """ 451 o = self._offset + offset 452 try: 453 return struct.unpack_from(">I", self._buf, o)[0] 454 except struct.error: 455 raise OverrunBufferException(o, len(self._buf))
456
457 - def unpack_int32(self, offset):
458 """ 459 Returns a little-endian signed integer (4 bytes) from the 460 relative offset. 461 Arguments: 462 - `offset`: The relative offset from the start of the block. 463 Throws: 464 - `OverrunBufferException` 465 """ 466 o = self._offset + offset 467 try: 468 return struct.unpack_from("<i", self._buf, o)[0] 469 except struct.error: 470 raise OverrunBufferException(o, len(self._buf))
471
472 - def unpack_qword(self, offset):
473 """ 474 Returns a little-endian QWORD (8 bytes) from the relative offset. 475 Arguments: 476 - `offset`: The relative offset from the start of the block. 477 Throws: 478 - `OverrunBufferException` 479 """ 480 o = self._offset + offset 481 try: 482 return struct.unpack_from("<Q", self._buf, o)[0] 483 except struct.error: 484 raise OverrunBufferException(o, len(self._buf))
485
486 - def unpack_int64(self, offset):
487 """ 488 Returns a little-endian signed 64-bit integer (8 bytes) from 489 the relative offset. 490 Arguments: 491 - `offset`: The relative offset from the start of the block. 492 Throws: 493 - `OverrunBufferException` 494 """ 495 o = self._offset + offset 496 try: 497 return struct.unpack_from("<q", self._buf, o)[0] 498 except struct.error: 499 raise OverrunBufferException(o, len(self._buf))
500
501 - def unpack_float(self, offset):
502 """ 503 Returns a single-precision float (4 bytes) from 504 the relative offset. IEEE 754 format. 505 Arguments: 506 - `offset`: The relative offset from the start of the block. 507 Throws: 508 - `OverrunBufferException` 509 """ 510 o = self._offset + offset 511 try: 512 return struct.unpack_from("<f", self._buf, o)[0] 513 except struct.error: 514 raise OverrunBufferException(o, len(self._buf))
515
516 - def unpack_double(self, offset):
517 """ 518 Returns a double-precision float (8 bytes) from 519 the relative offset. IEEE 754 format. 520 Arguments: 521 - `offset`: The relative offset from the start of the block. 522 Throws: 523 - `OverrunBufferException` 524 """ 525 o = self._offset + offset 526 try: 527 return struct.unpack_from("<d", self._buf, o)[0] 528 except struct.error: 529 raise OverrunBufferException(o, len(self._buf))
530
531 - def unpack_binary(self, offset, length=False):
532 """ 533 Returns raw binary data from the relative offset with the given length. 534 Arguments: 535 - `offset`: The relative offset from the start of the block. 536 - `length`: The length of the binary blob. If zero, the empty string 537 zero length is returned. 538 Throws: 539 - `OverrunBufferException` 540 """ 541 if not length: 542 return "" 543 o = self._offset + offset 544 try: 545 return struct.unpack_from("<%ds" % (length), self._buf, o)[0] 546 except struct.error: 547 raise OverrunBufferException(o, len(self._buf))
548
549 - def unpack_string(self, offset, length):
550 """ 551 Returns a string from the relative offset with the given length. 552 Arguments: 553 - `offset`: The relative offset from the start of the block. 554 - `length`: The length of the string. 555 Throws: 556 - `OverrunBufferException` 557 """ 558 return self.unpack_binary(offset, length)
559
560 - def unpack_wstring(self, offset, length):
561 """ 562 Returns a string from the relative offset with the given length, 563 where each character is a wchar (2 bytes) 564 Arguments: 565 - `offset`: The relative offset from the start of the block. 566 - `length`: The length of the string. 567 Throws: 568 - `UnicodeDecodeError` 569 """ 570 try: 571 return self._buf[self._offset + offset:self._offset + offset + \ 572 2 * length].tostring().decode("utf16") 573 except AttributeError: # already a 'str' ? 574 return self._buf[self._offset + offset:self._offset + offset + \ 575 2 * length].decode("utf16")
576
577 - def unpack_dosdate(self, offset):
578 """ 579 Returns a datetime from the DOSDATE and DOSTIME starting at 580 the relative offset. 581 Arguments: 582 - `offset`: The relative offset from the start of the block. 583 Throws: 584 - `OverrunBufferException` 585 """ 586 try: 587 o = self._offset + offset 588 return dosdate(self._buf[o:o + 2], self._buf[o + 2:o + 4]) 589 except struct.error: 590 raise OverrunBufferException(o, len(self._buf))
591
592 - def unpack_filetime(self, offset):
593 """ 594 Returns a datetime from the QWORD Windows timestamp starting at 595 the relative offset. 596 Arguments: 597 - `offset`: The relative offset from the start of the block. 598 Throws: 599 - `OverrunBufferException` 600 """ 601 return parse_filetime(self.unpack_qword(offset))
602
603 - def unpack_systemtime(self, offset):
604 """ 605 Returns a datetime from the QWORD Windows SYSTEMTIME timestamp 606 starting at the relative offset. 607 See http://msdn.microsoft.com/en-us/library/ms724950%28VS.85%29.aspx 608 Arguments: 609 - `offset`: The relative offset from the start of the block. 610 Throws: 611 - `OverrunBufferException` 612 """ 613 o = self._offset + offset 614 try: 615 parts = struct.unpack_from("<WWWWWWWW", self._buf, o) 616 except struct.error: 617 raise OverrunBufferException(o, len(self._buf)) 618 return datetime.datetime(parts[0], parts[1], 619 parts[3], # skip part 2 (day of week) 620 parts[4], parts[5], 621 parts[6], parts[7])
622
623 - def unpack_guid(self, offset):
624 """ 625 Returns a string containing a GUID starting at the relative offset. 626 Arguments: 627 - `offset`: The relative offset from the start of the block. 628 Throws: 629 - `OverrunBufferException` 630 """ 631 o = self._offset + offset 632 633 try: 634 _bin = self._buf[o:o + 16] 635 except IndexError: 636 raise OverrunBufferException(o, len(self._buf)) 637 638 # Yeah, this is ugly 639 h = map(ord, _bin) 640 return "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x" % \ 641 (h[3], h[2], h[1], h[0], 642 h[5], h[4], 643 h[7], h[6], 644 h[8], h[9], 645 h[10], h[11], h[12], h[13], h[14], h[15])
646
647 - def absolute_offset(self, offset):
648 """ 649 Get the absolute offset from an offset relative to this block 650 Arguments: 651 - `offset`: The relative offset into this block. 652 """ 653 return self._offset + offset
654
655 - def offset(self):
656 """ 657 Equivalent to self.absolute_offset(0x0), which is the starting 658 offset of this block. 659 """ 660 return self._offset
661