Imported Upstream version 1.2.2-1
authorDevon Kearns <dookie@kali.org>
Wed, 4 Dec 2013 20:34:02 +0000 (13:34 -0700)
committerDevon Kearns <dookie@kali.org>
Wed, 4 Dec 2013 20:34:02 +0000 (13:34 -0700)
29 files changed:
bin/binwalk
binwalk/__init__.py
binwalk/compression.py [new file with mode: 0644]
binwalk/config.py
binwalk/config/extract.conf
binwalk/entropy.py
binwalk/extractor.py
binwalk/filter.py
binwalk/hexdiff.py [new file with mode: 0644]
binwalk/magic/binarch
binwalk/magic/binwalk
binwalk/magic/zlib [new file with mode: 0644]
binwalk/maths.py [new file with mode: 0644]
binwalk/plugins/zlib.py [new file with mode: 0644]
binwalk/prettyprint.py
binwalk/smartsignature.py
binwalk/smartstrings.py
binwalk/update.py
debian_quick_install.sh
docs/README
magic/archives
magic/compressed
magic/crypto
magic/executables
magic/filesystems
magic/firmware
magic/images
setup.py
support/lzma_gen.py [deleted file]

index ac145ab..b872469 100755 (executable)
@@ -4,12 +4,12 @@ import sys
 import os.path
 import binwalk
 from threading import Thread
-from getopt import GetoptError, getopt as GetOpt
+from getopt import GetoptError, gnu_getopt as GetOpt
 
 def display_status():
        global bwalk
 
-       while True:
+       while bwalk is not None:
                # Display the current scan progress when the enter key is pressed.
                try:
                        raw_input()
@@ -29,6 +29,10 @@ Extracting files from firmware:
 
 \t$ %s -Me firmware.bin
 
+Hueristic compression/encryption analysis:
+
+\t$ %s -H firmware.bin
+
 Scanning firmware for executable code:
 
 \t$ %s -A firmware.bin
@@ -45,8 +49,12 @@ Display identified file signatures on entropy graph:
 
 \t$ %s -EB firmware.bin
 
+Diffing multiple files:
+
+\t$ %s -W firmware1.bin firmware2.bin firmware3.bin
+
 See http://code.google.com/p/binwalk/wiki/TableOfContents for more.
-""" % (name, name, name, name, name, name)
+""" % (name, name, name, name, name, name, name, name)
        sys.exit(0)
 
 def usage(fd):
@@ -68,6 +76,7 @@ def usage(fd):
        fd.write("\t-x, --exclude=<filter>        Exclude matches that have <filter> in their description\n")
        fd.write("\t-y, --include=<filter>        Only search for matches that have <filter> in their description\n")
        fd.write("\t-I, --show-invalid            Show results marked as invalid\n")
+       fd.write("\t-T, --ignore-time-skew        Do not show results that have timestamps more than 1 year in the future\n")
        fd.write("\t-k, --keep-going              Show all matching results at a given offset, not just the first one\n")
        fd.write("\t-b, --dumb                    Disable smart signature keywords\n")
        fd.write("\n")
@@ -79,14 +88,24 @@ def usage(fd):
        
        fd.write("Entropy Analysis:\n")
        fd.write("\t-E, --entropy                 Plot file entropy (may be combined with -B, -R, -A, or -S)\n")
-       fd.write("\t-K, --block=<int>             Set the block size for entropy analysis\n")
-       fd.write("\t-a, --shannon                 Use the Shannon entropy algorithm\n")
+       fd.write("\t-H, --heuristic               Identify unknown compression/encryption based on entropy heuristics (implies -E)\n")
+       fd.write("\t-K, --block=<int>             Set the block size for entropy analysis (default: %d)\n" % binwalk.entropy.FileEntropy.DEFAULT_BLOCK_SIZE)
+       fd.write("\t-a, --gzip                    Use gzip compression ratios to measure entropy\n")
        fd.write("\t-N, --no-plot                 Do not generate an entropy plot graph\n")
        fd.write("\t-F, --marker=<offset:name>    Add a marker to the entropy plot graph\n")
        fd.write("\t-Q, --no-legend               Omit the legend from the entropy plot graph\n")
        fd.write("\t-J, --save-plot               Save plot as an SVG (implied if multiple files are specified)\n")
        fd.write("\n")
 
+       fd.write("Binary Diffing:\n")
+       fd.write("\t-W, --diff                    Hexdump / diff the specified files\n")
+       fd.write("\t-K, --block=<int>             Number of bytes to display per line (default: %d)\n" % binwalk.hexdiff.HexDiff.DEFAULT_BLOCK_SIZE)
+       fd.write("\t-G, --green                   Only show hex dump lines that contain bytes which were the same in all files\n")
+       fd.write("\t-i, --red                     Only show hex dump lines that contain bytes which were different in all files\n")
+       fd.write("\t-U, --blue                    Only show hex dump lines that contain bytes which were different in some files\n")
+       fd.write("\t-w, --terse                   Diff all files, but only display a hex dump of the first file\n")
+       fd.write("\n")
+
        fd.write("Extraction Options:\n")
        fd.write("\t-D, --dd=<type:ext[:cmd]>     Extract <type> signatures, give the files an extension of <ext>, and execute <cmd>\n")
        fd.write("\t-e, --extract=[file]          Automatically extract known file types; load rules from file, if specified\n")
@@ -134,9 +153,10 @@ def main():
        strlen = 0
        verbose = 0
        matryoshka = 1
-       entropy_block = 0
+       block_size = 0
        failed_open_count = 0
        quiet = False
+       do_comp = False
        do_files = False
        log_file = None
        do_csv = False
@@ -150,9 +170,11 @@ def main():
        format_to_terminal = False
        custom_signature = None
        delay_extraction = False
+       ignore_time_skew = True
        extract_rules_file = None
        ignore_failed_open = False
        extract_from_config = False
+       show_single_hex_dump = False
        cleanup_after_extract = False
        explicit_signature_scan = False
        ignore_signature_keywords = False
@@ -172,10 +194,13 @@ def main():
 
        config = binwalk.Config()
 
-       short_options = "AaBbCcdEehIJkLMNnOPpQqrStuv?D:F:f:g:K:o:l:m:R:s:X:x:Y:y:"
+       short_options = "AaBbCcdEeGHhIiJkLMNnOPpQqrSTtUuvWw?D:F:f:g:K:o:l:m:R:s:X:x:Y:y:"
        long_options = [
                        "rm",
                        "help",
+                       "green",
+                       "red",
+                       "blue",
                        "examples",
                        "quiet", 
                        "csv",
@@ -186,14 +211,19 @@ def main():
                        "binwalk", 
                        "keep-going",
                        "show-invalid",
+                       "ignore-time-skew",
                        "profile",
                        "delay",
                        "skip-unopened",
                        "term",
                        "tim",
+                       "terse",
+                       "diff",
                        "dumb",
                        "entropy",
-                       "shannon",
+                       "heuristic",
+                       "math",
+                       "gzip",
                        "save-plot",
                        "no-plot",
                        "no-legend", 
@@ -248,10 +278,16 @@ def main():
                        show_legend = False
                elif opt in ("-J", "--save-plot"):
                        save_plot = True
+               elif opt in ("-N", "--no-plot"):
+                       show_plot = False
                elif opt in ("-E", "--entropy"):
                        requested_scans.append(binwalk.Binwalk.ENTROPY)
-               elif opt in ("-a", "--shannon"):
-                       entropy_algorithm = 'shannon'
+               elif opt in ("-W", "--diff"):
+                       requested_scans.append(binwalk.Binwalk.HEXDIFF)
+               elif opt in ("-w", "--terse"):
+                       show_single_hex_dump = True
+               elif opt in ("-a", "--gzip"):
+                       entropy_algorithm = 'gzip'
                elif opt in("-t", "--term", "--tim"):
                        format_to_terminal = True
                elif opt in("-p", "--disable-plugins"):
@@ -260,8 +296,6 @@ def main():
                        ignore_signature_keywords = True
                elif opt in ("-v", "--verbose"):
                        verbose += 1
-               elif opt in ("-N", "--no-plot"):
-                       show_plot = False
                elif opt in ("-S", "--strings"):
                        requested_scans.append(binwalk.Binwalk.STRINGS)
                elif opt in ("-O", "--skip-unopened"):
@@ -278,6 +312,12 @@ def main():
                        extracts.append(arg)
                elif opt in ("-g", "--grep"):
                        greps.append(arg)
+               elif opt in ("-G", "--green"):
+                       greps.append("32;")
+               elif opt in ("-i", "--red"):
+                       greps.append("31;")
+               elif opt in ("-U", "--blue"):
+                       greps.append("34;")
                elif opt in ("-r", "--rm"):
                        cleanup_after_extract = True
                elif opt in ("-m", "--magic"):
@@ -292,12 +332,18 @@ def main():
                        # Original Zvyozdochkin matrhoska set had 8 dolls. This is a good number.
                        matryoshka = 8
                elif opt in ("-K", "--block"):
-                       entropy_block = binwalk.common.str2int(arg)
+                       block_size = binwalk.common.str2int(arg)
                elif opt in ("-X", "--disable-plugin"):
                        plugin_blacklist.append(arg)
                elif opt in ("-Y", "--enable-plugin"):
                        plugin_whitelist.append(arg)
+               elif opt in ("-T", "--ignore-time-skew"):
+                       ignore_time_skew = False
 
+               elif opt in ("-H", "--heuristic", "--math"):
+                       do_comp = True
+                       if binwalk.Binwalk.ENTROPY not in requested_scans:
+                               requested_scans.append(binwalk.Binwalk.ENTROPY)
                elif opt in ("-F", "--marker"):
                        if ':' in arg:
                                (location, description) = arg.split(':', 1)
@@ -402,12 +448,12 @@ def main():
                usage(sys.stderr)
 
        # Instantiate the Binwalk class
-       bwalk = binwalk.Binwalk(flags=magic_flags, verbose=verbose, log=log_file, quiet=quiet, ignore_smart_keywords=ignore_signature_keywords, load_plugins=enable_plugins)
+       bwalk = binwalk.Binwalk(magic_files=magic_files, flags=magic_flags, verbose=verbose, log=log_file, quiet=quiet, ignore_smart_keywords=ignore_signature_keywords, load_plugins=enable_plugins, ignore_time_skews=ignore_time_skew)
 
        # If a custom signature was specified, create a temporary magic file containing the custom signature
        # and ensure that it is the only magic file that will be loaded when Binwalk.scan() is called.
        if custom_signature is not None:
-               magic_files = bwalk.parser.file_from_string(custom_signature)
+               bwalk.magic_files = [bwalk.parser.file_from_string(custom_signature)]
 
        # Set any specified filters
        bwalk.filter.exclude(excludes)
@@ -432,7 +478,7 @@ def main():
        bwalk.extractor.enable_delayed_extract(delay_extraction)
 
        # Load the magic file(s)
-       bwalk.load_signatures(magic_files=magic_files)
+       #bwalk.load_signatures(magic_files=magic_files)
 
        # If --term was specified, enable output formatting to terminal
        if format_to_terminal:
@@ -488,14 +534,18 @@ def main():
                                                        length=length, 
                                                        offset=offset, 
                                                        n=strlen, 
-                                                       block=entropy_block, 
-                                                       algorithm=entropy_algorithm,
+                                                       block=block_size, 
                                                        load_plugins=enable_plugins, 
                                                        whitelist=plugin_whitelist, 
                                                        blacklist=plugin_blacklist)
                                        
                                bwalk.concatenate_results(results, r)
 
+                       elif scan_type == binwalk.Binwalk.COMPRESSION:
+
+                               r = bwalk.analyze_compression(target_files, offset=offset, length=length)
+                               bwalk.concatenate_results(results, r)
+
                        elif scan_type == binwalk.Binwalk.ENTROPY:
 
                                if not results:
@@ -511,19 +561,26 @@ def main():
                                bwalk.analyze_entropy(results,
                                                        offset, 
                                                        length, 
-                                                       entropy_block
+                                                       block_size
                                                        show_plot, 
                                                        show_legend, 
                                                        save_plot,
                                                        algorithm=entropy_algorithm,
                                                        load_plugins=enable_plugins,
                                                        whitelist=plugin_whitelist,
-                                                       blacklist=plugin_blacklist)
+                                                       blacklist=plugin_blacklist,
+                                                       compcheck=do_comp)
+
+                       elif scan_type == binwalk.Binwalk.HEXDIFF:
+                               
+                               bwalk.hexdiff(target_files, offset=offset, length=length, block=block_size, first=show_single_hex_dump)
 
        except KeyboardInterrupt:
                pass
-#      except Exception, e:
-#              print "Unexpected error:", str(e)
+       except IOError:
+               pass
+       except Exception, e:
+               print "Unexpected error:", str(e)
                
        bwalk.cleanup()
 
index 61d17b3..d1fca17 100644 (file)
@@ -2,12 +2,14 @@ __all__ = ["Binwalk"]
 
 import os
 import re
+import time
 import magic
 from config import *
 from update import *
 from filter import *
 from parser import *
 from plugins import *
+from hexdiff import *
 from entropy import *
 from extractor import *
 from prettyprint import *
@@ -69,9 +71,11 @@ class Binwalk(object):
        BINARCH = 0x02
        BINCAST = 0x04
        STRINGS = 0x08
-       ENTROPY = 0x10
+       COMPRESSION = 0x10
+       HEXDIFF = 0x20
+       ENTROPY = 0x40
 
-       def __init__(self, magic_files=[], flags=magic.MAGIC_NONE, log=None, quiet=False, verbose=0, ignore_smart_keywords=False, load_extractor=False, load_plugins=True):
+       def __init__(self, magic_files=[], flags=magic.MAGIC_NONE, log=None, quiet=False, verbose=0, ignore_smart_keywords=False, ignore_time_skews=False, load_extractor=False, load_plugins=True):
                '''
                Class constructor.
 
@@ -81,6 +85,7 @@ class Binwalk(object):
                @quiet                  - If set to True, supress PrettyPrint output to stdout.
                @verbose                - Verbosity level.
                @ignore_smart_keywords  - Set to True to ignore smart signature keywords.
+               @ignore_time_skews      - Set to True to ignore file results with timestamps in the future.
                @load_extractor         - Set to True to load the default extraction rules automatically.
                @load_plugins           - Set to False to disable plugin support.
 
@@ -95,6 +100,8 @@ class Binwalk(object):
                self.scan_length = 0
                self.total_read = 0
                self.matryoshka = 1
+               self.epoch = 0
+               self.year = 0
                self.plugins = None
                self.magic = None
                self.mfile = None
@@ -102,6 +109,12 @@ class Binwalk(object):
                self.strings = None
                self.scan_type = self.BINWALK
 
+               if not ignore_time_skews:
+                       # Consider timestamps up to 1 year in the future valid,
+                       # to account for any minor time skew on the local system.
+                       self.year = time.localtime().tm_year + 1
+                       self.epoch = int(time.time()) + (60 * 60 * 24 * 365)
+
                # Instantiate the config class so we can access file/directory paths
                self.config = Config()
 
@@ -147,11 +160,16 @@ class Binwalk(object):
 
        def cleanup(self):
                '''
-               Cleanup any temporary files generated by the internal instance of MagicParser.
+               Close magic and cleanup any temporary files generated by the internal instance of MagicParser.
 
                Returns None.
                '''
                try:
+                       self.magic.close()
+               except:
+                       pass
+
+               try:
                        self.parser.cleanup()
                except:
                        pass
@@ -174,7 +192,15 @@ class Binwalk(object):
                self.magic = magic.open(self.flags)
                self.magic.load(self.mfile)
 
-       def analyze_strings(self, file_names, length=0, offset=0, n=0, block=0, algorithm=None, load_plugins=True, whitelist=[], blacklist=[]):
+       def hexdiff(self, file_names, length=0x100, offset=0, block=16, first=False):
+               if not length and len(file_names) > 0:
+                       length = file_size(file_names[0])
+               if not block:
+                       block = 16
+
+               HexDiff(self).display(file_names, offset=offset, size=length, block=block, show_first_only=first)
+
+       def analyze_strings(self, file_names, length=0, offset=0, n=0, block=0, load_plugins=True, whitelist=[], blacklist=[]):
                '''
                Performs a strings analysis on the specified file(s).
 
@@ -183,7 +209,6 @@ class Binwalk(object):
                @offset       - The starting offset into the file to begin analysis.
                @n            - The minimum valid string length.
                @block        - The block size to use when performing entropy analysis.
-               @algorithm    - The entropy algorithm to use when performing entropy analysis.
                @load_plugins - Set to False to disable plugin callbacks.
                @whitelist    - A list of whitelisted plugins.
                @blacklist    - A list of blacklisted plugins.
@@ -206,7 +231,7 @@ class Binwalk(object):
                                        offset=offset,
                                        n=n,
                                        block=block,
-                                       algorithm=algorithm,
+                                       algorithm='gzip',               # Use gzip here as it is faster and we don't need the detail provided by shannon
                                        load_plugins=load_plugins,
                                        whitelist=whitelist,
                                        blacklist=blacklist)
@@ -218,7 +243,7 @@ class Binwalk(object):
 
                return data
 
-       def analyze_entropy(self, files, offset=0, length=0, block=0, plot=True, legend=True, save=False, algorithm=None, load_plugins=True, whitelist=[], blacklist=[]):
+       def analyze_entropy(self, files, offset=0, length=0, block=0, plot=True, legend=True, save=False, algorithm=None, load_plugins=True, whitelist=[], blacklist=[], compcheck=False):
                 '''
                Performs an entropy analysis on the specified file(s).
 
@@ -229,10 +254,11 @@ class Binwalk(object):
                @plot         - Set to False to disable plotting.
                @legend       - Set to False to exclude the legend and custom offset markers from the plot.
                @save         - Set to True to save plots to disk instead of displaying them.
-               @algorithm    - Set to 'shannon' to use shannon entropy algorithm.
+               @algorithm    - Set to 'gzip' to use the gzip entropy "algorithm".
                @load_plugins - Set to False to disable plugin callbacks.
                @whitelist    - A list of whitelisted plugins.
                @blacklist    - A list of blacklisted plugins.
+               @compcheck    - Set to True to perform heuristic compression detection.
 
                Returns a dictionary of:
                         
@@ -253,7 +279,8 @@ class Binwalk(object):
                                        algorithm=algorithm,
                                        load_plugins=plugins,
                                        whitelist=whitelist,
-                                       blacklist=blacklist)
+                                       blacklist=blacklist,
+                                       compcheck=compcheck)
                
                data = self.entropy.analyze()
                
@@ -307,6 +334,10 @@ class Binwalk(object):
                if self.load_plugins:
                        self.plugins._load_plugins()
 
+               # Load the default signatures if self.load_signatures has not already been invoked
+               if self.magic is None:
+                       self.load_signatures()
+
                while i < self.matryoshka:
                        new_target_files = []
 
@@ -411,15 +442,12 @@ class Binwalk(object):
                self.total_scanned = 0
                self.scan_length = length
                self.filter.show_invalid_results = show_invalid_results
+               self.start_offset = offset
 
                # Check to make sure either a target file or a file descriptor was supplied
                if not target_file and fd is None:
                        raise Exception("Must supply Binwalk.single_scan with a valid file path or file object")
 
-               # Load the default signatures if self.load_signatures has not already been invoked
-               if self.magic is None:
-                       self.load_signatures()
-
                # Need the total size of the target file, even if we aren't scanning the whole thing
                if target_file:
                        fsize = file_size(target_file)
@@ -451,6 +479,10 @@ class Binwalk(object):
                # Invoke any pre-scan plugins
                plugret_start = self.plugins._pre_scan_callbacks(fd)
                
+               # Load the default signatures if self.load_signatures has not already been invoked
+               if self.magic is None:
+                       self.load_signatures()
+
                # Main loop, scan through all the data
                while not ((plugret | plugret_start) & PLUGIN_TERMINATE):
                        i = 0
@@ -465,10 +497,16 @@ class Binwalk(object):
                        # jump offset that was beyond the end of the then current data block.
                        #
                        # If this is the case, we need to index into this data block appropriately in order to 
-                       # resume the scan from the appropriate offset, and adjust dlen accordingly.
+                       # resume the scan from the appropriate offset.
+                       #
+                       # Don't update dlen though, as it is the literal offset into the data block that we
+                       # are to scan up to in this loop iteration. It is also appended to self.total_scanned,
+                       # which is what we want (even if we have been told to skip part of the block, the skipped
+                       # part is still considered part of the total bytes scanned).
                        if jump_offset > 0:
                                total_check = self.total_scanned + dlen
 
+                               # Is the jump offset beyond the total amount of data that we've currently read in (i.e., in a future data block)?
                                if jump_offset >= total_check:
                                        i = -1
                                        
@@ -477,13 +515,18 @@ class Binwalk(object):
                                                fd.seek(jump_offset)
                                                self.total_read = jump_offset
                                                self.total_scanned = jump_offset - dlen
-                                               jump_offset = 0
                                        except:
                                                pass
-                               elif jump_offset < total_check:
-                                       # Index into this block appropriately
+
+                               # Is the jump offset inside this block of data?
+                               elif jump_offset > self.total_scanned and jump_offset < total_check:
+                                       # Index into this block appropriately; jump_offset is the file offset that
+                                       # we need to jump to, and self.total_scanned is the file offset that starts
+                                       # the beginning of the current block
                                        i = jump_offset - self.total_scanned
-                                       jump_offset = 0
+
+                               # We're done with jump_offset, zero it out for the next round
+                               jump_offset = 0
 
                        # Scan through each block of data looking for signatures
                        if i >= 0 and i < dlen:
@@ -517,7 +560,7 @@ class Binwalk(object):
                                                        smart = self.smart.parse(magic_result)
        
                                                        # Validate the jump value and check if the response description should be displayed
-                                                       if smart['jump'] > -1 and self._should_display(smart['description']):
+                                                       if smart['jump'] > -1 and self._should_display(smart):
                                                                # If multiple results are returned and one of them has smart['jump'] set to a non-zero value,
                                                                # the calculated results offset will be wrong since i will have been incremented. Only set the
                                                                # results_offset value when the first match is encountered.
@@ -568,6 +611,10 @@ class Binwalk(object):
 
                        # Track the total number of bytes scanned
                        self.total_scanned += dlen
+                       # The starting offset only affects the reported offset for results
+                       # in the first block of data. Zero it out after the first block has
+                       # been processed.
+                       offset = 0
 
                # Sort the results before returning them
                scan_items = scan_results.items()
@@ -621,16 +668,20 @@ class Binwalk(object):
                                        if not found_offset:
                                                results[new_file_name] += new_data
 
-       def _should_display(self, data):
+       def _should_display(self, result):
                '''
                Determines if a result string should be displayed to the user or not.
                
-               @data - Display string.
+               @result - Result dictionary, as returned by self.smart.parse.
 
                Returns True if the string should be displayed.
                Returns False if the string should not be displayed.
                '''
-               return (data and data is not None and not self.filter.invalid(data) and self.filter.filter(data) != self.filter.FILTER_EXCLUDE)
+               if result['invalid'] == True or (self.year and result['year'] > self.year) or (self.epoch and result['epoch'] > self.epoch):
+                       return False
+               
+               desc = result['description']
+               return (desc and desc is not None and not self.filter.invalid(desc) and self.filter.filter(desc) != self.filter.FILTER_EXCLUDE)
 
        def _read_block(self, fd):
                '''
@@ -649,7 +700,7 @@ class Binwalk(object):
 
                # Check to make sure we only read up to scan_length bytes (streams have a scan length of -1)
                if self.scan_length == -1 or self.total_read < self.scan_length:
-                       
+               
                        # Read in the next rlen bytes, plus any extra data from the previous read (only neeced for streams)
                        data = self.last_extra_data_section + fd.read(rlen - len(self.last_extra_data_section))
                        
@@ -673,7 +724,7 @@ class Binwalk(object):
                                # If fd is a stream, this seek will fail; keep a copy of the extra buffer data so that it
                                # can be added to the data buffer the next time this method is invoked.
                                try:
-                                       fd.seek(self.total_read)
+                                       fd.seek(self.start_offset + self.total_read)
                                except:
                                        self.last_extra_data_section = data[dlen:]
 
diff --git a/binwalk/compression.py b/binwalk/compression.py
new file mode 100644 (file)
index 0000000..d559c06
--- /dev/null
@@ -0,0 +1,203 @@
+#!/usr/bin/env python
+# Routines to perform Monte Carlo Pi approximation and Chi Squared tests. 
+# Used for fingerprinting unknown areas of high entropy (e.g., is this block of high entropy data compressed or encrypted?).
+# Inspired by people who actually know what they're doing: http://www.fourmilab.ch/random/
+
+import math
+
+class MonteCarloPi(object):
+       '''
+       Performs a Monte Carlo Pi approximation.
+       Currently unused.
+       '''
+
+       def __init__(self):
+               '''
+               Class constructor.
+               
+               Returns None.
+               '''
+               self.reset()
+
+       def reset(self):
+               '''
+               Reset state to the beginning.
+               '''
+               self.pi = 0
+               self.error = 0
+               self.m = 0
+               self.n = 0
+
+       def update(self, data):
+               '''
+               Update the pi approximation with new data.
+
+               @data - A string of bytes to update (length must be >= 6).
+
+               Returns None.
+               '''
+               c = 0
+               dlen = len(data)
+
+               while (c+6) < dlen:
+                       # Treat 3 bytes as an x coordinate, the next 3 bytes as a y coordinate.
+                       # Our box is 1x1, so divide by 2^24 to put the x y values inside the box.
+                       x = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
+                       c += 3
+                       y = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
+                       c += 3
+       
+                       # Does the x,y point lie inside the circle inscribed within our box, with diameter == 1?
+                       if ((x**2) + (y**2)) <= 1:
+                               self.m += 1
+                       self.n += 1
+       
+       def montecarlo(self):
+               '''
+               Approximates the value of Pi based on the provided data.
+               
+               Returns a tuple of (approximated value of pi, percent deviation).
+               '''
+               if self.n:
+                       self.pi = (float(self.m) / float(self.n) * 4.0)
+
+               if self.pi:
+                       self.error = math.fabs(1.0 - (math.pi / self.pi)) * 100.0
+                       return (self.pi, self.error)
+               else:
+                       return (0.0, 0.0)
+
+class ChiSquare(object):
+       '''
+       Performs a Chi Squared test against the provided data.
+       '''
+
+       IDEAL = 256.0
+
+       def __init__(self):
+               '''
+               Class constructor.
+
+               Returns None.
+               '''
+               self.bytes = {}
+               self.freedom = self.IDEAL - 1 
+               
+               # Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255)
+               for i in range(0, int(self.IDEAL)):
+                       self.bytes[chr(i)] = 0
+               
+               self.reset()
+
+       def reset(self):
+               self.xc2 = 0.0
+               self.byte_count = 0
+
+               for key in self.bytes.keys():
+                       self.bytes[key] = 0             
+
+       def update(self, data):
+               '''
+               Updates the current byte counts with new data.
+
+               @data - String of bytes to update.
+
+               Returns None.
+               '''
+               # Count the number of occurances of each byte value
+               for i in data:
+                       self.bytes[i] += 1
+
+               self.byte_count += len(data)
+
+       def chisq(self):
+               '''
+               Calculate the Chi Square critical value.
+
+               Returns the critical value.
+               '''
+               expected = self.byte_count / self.IDEAL
+
+               if expected:
+                       for byte in self.bytes.values():
+                               self.xc2 += ((byte - expected) ** 2 ) / expected
+
+               return self.xc2
+
+class CompressionEntropyAnalyzer(object):
+       '''
+       Class wrapper around ChiSquare.
+       Performs analysis and attempts to interpret the results.
+       '''
+
+       BLOCK_SIZE = 32
+       CHI_CUTOFF = 512
+
+       DESCRIPTION = "Statistical Compression Analysis"
+
+       def __init__(self, fname, start, length, binwalk=None, fp=None):
+               '''
+               Class constructor.
+
+               @fname    - The file to scan.
+               @start    - The start offset to begin analysis at.
+               @length   - The number of bytes to analyze.
+               @callback - Callback function compatible with Binwalk.display.
+
+               Returns None.
+               '''
+               if fname:
+                       self.fp = open(fname, 'rb')
+               else:
+                       self.fp = fp
+
+               self.start = start
+               self.length = length
+               self.binwalk = binwalk
+
+       def analyze(self):
+               '''
+               Perform analysis and interpretation.
+
+               Returns a descriptive string containing the results and attempted interpretation.
+               '''
+               i = 0
+               num_error = 0
+               analyzer_results = []
+
+               if self.binwalk:
+                       self.binwalk.display.header(file_name=self.fp.name, description=self.DESCRIPTION)
+
+               chi = ChiSquare()
+
+               self.fp.seek(self.start)
+               while i < self.length:
+                       rsize = self.length - i
+                       if rsize > self.BLOCK_SIZE:
+                               rsize = self.BLOCK_SIZE
+
+                       d = self.fp.read(rsize)
+                       if len(d) != rsize:
+                               break
+
+                       chi.reset()
+                       chi.update(d)
+
+                       if chi.chisq() >= self.CHI_CUTOFF:
+                               num_error += 1
+
+                       i += rsize
+
+               if num_error > 0:
+                       verdict = 'Moderate entropy data, best guess: compressed'
+               else:
+                       verdict = 'High entropy data, best guess: encrypted'
+
+               result = [{'offset' : self.start, 'description' : '%s, size: %d, %d low entropy blocks' % (verdict, self.length, num_error)}]
+
+               if self.binwalk:
+                       self.binwalk.display.results(self.start, result)
+                       self.binwalk.display.footer()
+
+               return result
+
index 5c26254..ad7a15d 100644 (file)
@@ -33,7 +33,7 @@ class Config:
                o PLUGINS             - Path to the plugins directory.
        '''
        # Release version
-       VERSION = "1.2.1"
+       VERSION = "1.2.2-1"
 
        # Sub directories
        BINWALK_USER_DIR = ".binwalk"
@@ -47,6 +47,7 @@ class Config:
        BINWALK_MAGIC_FILE = "binwalk"
        BINCAST_MAGIC_FILE = "bincast"
        BINARCH_MAGIC_FILE = "binarch"
+       ZLIB_MAGIC_FILE = "zlib"
 
        def __init__(self):
                '''
@@ -74,6 +75,7 @@ class Config:
                self.paths['system'][self.BINWALK_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINWALK_MAGIC_FILE)
                self.paths['system'][self.BINCAST_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINCAST_MAGIC_FILE)
                self.paths['system'][self.BINARCH_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.BINARCH_MAGIC_FILE)
+               self.paths['system'][self.ZLIB_MAGIC_FILE] = self._system_path(self.BINWALK_MAGIC_DIR, self.ZLIB_MAGIC_FILE)
                self.paths['system'][self.EXTRACT_FILE] = self._system_path(self.BINWALK_CONFIG_DIR, self.EXTRACT_FILE)
                self.paths['system'][self.PLUGINS] = self._system_path(self.BINWALK_PLUGINS_DIR)
        
index 3de0aab..02aa722 100644 (file)
 ^posix tar archive:tar:tar xvf '%e'
 ^rar archive data:rar:unrar e '%e'
 ^arj archive data.*comment header:arj:arj e '%e'
+^iso 9660:iso:7z x '%e' -oiso-root
 
 # These assume the firmware-mod-kit is installed to /opt/firmware-mod-kit.
 # If not, change the file paths appropriately.
+^squashfs filesystem:squashfs:/opt/firmware-mod-kit/unsquashfs_all.sh '%e'
+^jffs2 filesystem:jffs2:/opt/firmware-mod-kit/src/jffs2/unjffs2 '%e'
+^ascii cpio archive:cpio:/opt/firmware-mod-kit/uncpio.sh '%e'
+^cramfs filesystem:cramfs:/opt/firmware-mod-kit/uncramfs_all.sh '%e'
+^bff volume entry:bff:/opt/firmware-mod-kit/src/bff/bffxtractor.py '%e'
+^wdk file system:wdk:/opt/firmware-mod-kit/src/firmware-tools/unwdk.py '%e'
+^zlib header:zlib:/opt/firmware-mod-kit/src/firmware-tools/unzlib.py '%e'
+^ext2 filesystem:ext2:/opt/firmware-mod-kit/src/mountcp/mountcp '%e' ext2-root
+^romfs filesystem:romfs:/opt/firmware-mod-kit/src/mountcp/mountcp '%e' romfs-root
+
+# These paths are for the depreciated firmware-mod-kit file paths, which included the 'trunk' directory.
+# These will only be run if the above file paths don't exist.
 ^squashfs filesystem:squashfs:/opt/firmware-mod-kit/trunk/unsquashfs_all.sh '%e'
 ^jffs2 filesystem:jffs2:/opt/firmware-mod-kit/trunk/src/jffs2/unjffs2 '%e' # requires root
 ^ascii cpio archive:cpio:/opt/firmware-mod-kit/trunk/uncpio.sh '%e'
 ^cramfs filesystem:cramfs:/opt/firmware-mod-kit/trunk/uncramfs_all.sh '%e'
 ^bff volume entry:bff:/opt/firmware-mod-kit/trunk/src/bff/bffxtractor.py '%e'
 
+# If FMK isn't installed, try the system's unsquashfs for SquashFS files
+^squashfs filesystem:squashfs:unsquashfs '%e'
+
 # Extract, but don't run anything
-^ext2 filesystem:ext2
-^romfs filesystem:romfs
-^private key:key
+private key:key
+certificate:crt
index 64b7337..885518c 100644 (file)
@@ -3,6 +3,87 @@ import math
 import os.path
 import plugins
 import common
+import compression
+
+class PlotEntropy(object):
+       '''
+       Class to plot entropy data on a graph.
+       '''
+
+       YLIM_MIN = 0
+       YLIM_MAX = 1.5
+
+       XLABEL = 'Offset'
+       YLABEL = 'Entropy'
+
+       LINE_WIDTH = 1.5
+
+       COLORS = ['darkgreen', 'blueviolet', 'saddlebrown', 'deeppink', 'goldenrod', 'olive', 'black']
+
+       FILE_FORMAT = 'svg'
+       
+       def __init__(self, x, y, title='Entropy', average=0, file_results={}, show_legend=True, save=False):
+               '''
+               Plots entropy data.
+
+               @x            - List of graph x-coordinates (i.e., data offsets).
+               @y            - List of graph y-coordinates (i.e., entropy for each offset).
+               @title        - Graph title.
+               @average      - The average entropy.
+               @file_results - Binwalk results, if any.
+               @show_legend  - Set to False to not generate a color-coded legend and plotted x coordinates for the graph.
+               @save         - If set to True, graph will be saved to disk rather than displayed.
+
+               Returns None.
+               '''
+               import matplotlib.pyplot as plt
+               import numpy as np
+
+               i = 0
+               trigger = 0
+               new_ticks = []
+               color_mappings = {}
+
+               plt.clf()
+
+               if file_results:
+                       for (offset, results) in file_results:
+                               label = None
+                               description = results[0]['description'].split(',')[0]
+
+                               if not color_mappings.has_key(description):
+                                       if show_legend:
+                                               label = description
+
+                                       color_mappings[description] = self.COLORS[i]
+                                       i += 1
+                                       if i >= len(self.COLORS):
+                                               i = 0
+                       
+                               plt.axvline(x=offset, label=label, color=color_mappings[description], linewidth=self.LINE_WIDTH)
+                               new_ticks.append(offset)
+
+                       if show_legend:
+                               plt.legend()
+
+                               if new_ticks:
+                                       new_ticks.sort()
+                                       plt.xticks(np.array(new_ticks), new_ticks)
+
+               plt.plot(x, y, linewidth=self.LINE_WIDTH)
+
+               if average:
+                       plt.plot(x, [average] * len(x), linestyle='--', color='r')
+
+               plt.xlabel(self.XLABEL)
+               plt.ylabel(self.YLABEL)
+               plt.title(title)
+               plt.ylim(self.YLIM_MIN, self.YLIM_MAX)
+               if save:
+                       plt.savefig(common.unique_file_name(title, self.FILE_FORMAT))
+               else:
+                       plt.show()
+
 
 class FileEntropy(object):
        '''
@@ -13,19 +94,20 @@ class FileEntropy(object):
        DEFAULT_BLOCK_SIZE = 1024
        ENTROPY_TRIGGER = 0.9
        ENTROPY_MAX = 0.95
-       FILE_FORMAT = 'svg'
 
-       def __init__(self, file_name=None, fd=None, binwalk=None, offset=0, length=None, block=DEFAULT_BLOCK_SIZE, plugins=None):
+       def __init__(self, file_name=None, fd=None, binwalk=None, offset=0, length=None, block=DEFAULT_BLOCK_SIZE, plugins=None, file_results=[], compcheck=False):
                '''
                Class constructor.
 
-               @file_name - The path to the file to analyze.
-               @fd        - A file object to analyze data from.
-               @binwalk   - An instance of the Binwalk class.
-               @offset    - The offset into the data to begin analysis.
-               @length    - The number of bytes to analyze.
-               @block     - The size of the data blocks to analyze.
-               @plugins   - Instance of the Plugins class.
+               @file_name    - The path to the file to analyze.
+               @fd           - A file object to analyze data from.
+               @binwalk      - An instance of the Binwalk class.
+               @offset       - The offset into the data to begin analysis.
+               @length       - The number of bytes to analyze.
+               @block        - The size of the data blocks to analyze.
+               @plugins      - Instance of the Plugins class.
+               @file_results - Scan results to overlay on the entropy plot graph.
+               @compcheck    - Set to True to enable entropy compression detection.
 
                Returns None.
                '''
@@ -37,6 +119,8 @@ class FileEntropy(object):
                self.plugins = plugins
                self.total_read = 0
                self.fd_open = False
+               self.file_results = file_results
+               self.do_chisq = compcheck
 
                if file_name is None and self.fd is None:
                        raise Exception("Entropy.__init__ requires at least the file_name or fd options")
@@ -111,8 +195,7 @@ class FileEntropy(object):
        def gzip(self, offset, data, truncate=True):
                '''
                Performs an entropy analysis based on zlib compression ratio.
-               This is the default analysis used as it is faster than the shannon entropy analysis
-               and produces basically the same data.
+               This is faster than the shannon entropy analysis, but not as accurate.
                '''
                # Entropy is a simple ratio of: <zlib compressed size> / <original size>
                e = float(float(len(zlib.compress(data, 9))) / float(len(data)))
@@ -176,7 +259,7 @@ class FileEntropy(object):
                                e = float(results['description'])
 
                        if not ((plug_pre_ret | plug_ret) & (plugins.PLUGIN_TERMINATE | plugins.PLUGIN_NO_DISPLAY)):
-                               if self.binwalk:
+                               if self.binwalk and not self.do_chisq:
                                        self.binwalk.display.results(offset, [results])
 
                                entropy.append(e)
@@ -195,117 +278,100 @@ class FileEntropy(object):
        
                return (offsets, entropy, average)
 
+       def _look_for_compression(self, x, y):
+               '''
+               Analyzes areas of high entropy for signs of compression or encryption and displays the results.
+               '''
+               trigger = self.ENTROPY_TRIGGER
+               pairs = []
+               scan_pairs = []
+               index = -1
+               total = 0
+
+               if not self.file_results:
+                       for j in range(0, len(x)):
+                               if y[j] >= trigger and (j == 0 or y[j-1] < trigger):
+                                       pairs.append([x[j]])
+                                       index = len(pairs) - 1
+                               elif y[j] <= trigger and y[j-1] > trigger and index > -1 and len(pairs[index]) == 1:
+                                       pairs[index].append(x[j])
+
+                       # Generate a list of tuples containing the starting offset to begin analysis plus a length
+                       for pair in pairs:
+                               start = pair[0]
+                               if len(pair) == 2:
+                                       stop = pair[1]
+                               else:
+                                       self.fd.seek(0, 2)
+                                       stop = self.fd.tell()
+
+                               length = stop - start
+                               total += length
+                               scan_pairs.append((start, length))
+
+                       # Update the binwalk scan length and total scanned values so that the percent complete
+                       # isn't stuck at 100% after the initial entropy analysis (which has already finished).
+                       if self.binwalk and total > 0:
+                               self.binwalk.scan_length = total
+                               self.binwalk.total_scanned = 0
+
+                       # Analyze each scan pair and display the results
+                       for (start, length) in scan_pairs:
+                               # Ignore anything less than 4KB in size
+                               if length > (self.DEFAULT_BLOCK_SIZE * 4):
+                                       # Ignore the first and last 1KB of data to prevent header/footer or extra data from skewing results
+                                       result = compression.CompressionEntropyAnalyzer(None, start+self.DEFAULT_BLOCK_SIZE, length-self.DEFAULT_BLOCK_SIZE, fp=self.fd).analyze()
+                                       results = [{'description' : result[0]['description'], 'offset' : start}]
+       
+                                       self.file_results.append((start, results))
+                                       if self.binwalk:
+                                               self.binwalk.display.results(start, results)
+
+                               # Keep the total scanned length updated
+                               if self.binwalk:
+                                       self.binwalk.total_scanned += length
+
        def analyze(self, algorithm=None):
                '''
                Performs an entropy analysis of the data using the specified algorithm.
 
                @algorithm - A method inside of the Entropy class to invoke for entropy analysis.
-                            Default method: self.gzip.
-                            Other available methods: self.shannon.
-                            May also be a string: 'shannon'.
+                            Default method: self.shannon.
+                            Other available methods: self.gzip.
+                            May also be a string: 'gzip'.
 
                Returns the return value of algorithm.
                '''
-               algo = self.gzip
+               algo = self.shannon
 
                if algorithm:
                        if callable(algorithm):
                                algo = algorithm
 
                        try:
-                               if algorithm.lower() == 'shannon':
-                                       algo = self.shannon
+                               if algorithm.lower() == 'gzip':
+                                       algo = self.gzip
                        except:
                                pass
 
-               data = self._do_analysis(algo)
-
-               return data
+               return self._do_analysis(algo)
        
-       def plot(self, x, y, average=0, file_results=[], show_legend=True, save=False):
+       def plot(self, x, y, average=0, show_legend=True, save=False):
                '''
                Plots entropy data.
 
                @x            - List of graph x-coordinates (i.e., data offsets).
                @y            - List of graph y-coordinates (i.e., entropy for each offset).
                @average      - The average entropy.
-               @file_results - A list of tuples containing additional analysis data, as returned by Binwalk.single_scan.
                @show_legend  - Set to False to not generate a color-coded legend and plotted x coordinates for the graph.
                @save         - If set to True, graph will be saved to disk rather than displayed.
 
                Returns None.
                '''
-               import matplotlib.pyplot as plt
-               import numpy as np
+               if self.do_chisq:
+                       self._look_for_compression(x, y)
 
-               i = 0
-               trigger = 0
-               new_ticks = []
-               colors = ['darkgreen', 'blueviolet', 'saddlebrown', 'deeppink', 'goldenrod', 'olive', 'black']
-               color_mappings = {}
-
-               plt.clf()
-
-               if not file_results and show_legend and average:
-                       file_results = []
-
-                       # Typically the average entropy is used as the trigger level for rising/falling entropy edges.
-                       # If the average entropy is too low, false rising and falling edges will be marked; if this is
-                       # the case, and if there is at least one data point greater than ENTROPY_MAX, use ENTROPY_TRIGGER
-                       # as the trigger level to avoid false edges.
-                       if average < self.ENTROPY_TRIGGER:
-                               for point in y:
-                                       if point > self.ENTROPY_MAX:
-                                               trigger = self.ENTROPY_TRIGGER
-                                               break
-
-                       if not trigger:
-                               trigger = average
-
-                       for j in range(0, len(x)):
-                               if j > 0:
-                                       if y[j] >= trigger and y[j-1] < trigger:
-                                               file_results.append((x[j], [{'description' : 'Entropy rising edge'}]))
-                                       elif y[j] <= trigger and y[j-1] > trigger:
-                                               file_results.append((x[j], [{'description' : 'Entropy falling edge'}]))
-
-               if file_results:
-                       for (offset, results) in file_results:
-                               label = None
-                               description = results[0]['description'].split(',')[0]
-
-                               if not color_mappings.has_key(description):
-                                       if show_legend:
-                                               label = description
-
-                                       color_mappings[description] = colors[i]
-                                       i += 1
-                                       if i >= len(colors):
-                                               i = 0
-                       
-                               plt.axvline(x=offset, label=label, color=color_mappings[description], linewidth=1.5)
-                               new_ticks.append(offset)
-
-                       if show_legend:
-                               plt.legend()
-
-                               if new_ticks:
-                                       new_ticks.sort()
-                                       plt.xticks(np.array(new_ticks), new_ticks)
-
-               plt.plot(x, y, linewidth=1.5)
-
-               if average:
-                       plt.plot(x, [average] * len(x), linestyle='--', color='r')
-
-               plt.xlabel('Offset')
-               plt.ylabel('Entropy')
-               plt.title(self.fd.name)
-               plt.ylim(0, 1.5)
-               if save:
-                       plt.savefig(common.unique_file_name(os.path.join(os.path.dirname(self.fd.name), '_' + os.path.basename(self.fd.name)), self.FILE_FORMAT))
-               else:
-                       plt.show()
+               PlotEntropy(x, y, self.fd.name, average, self.file_results, show_legend, save)
 
 class Entropy(object):
        '''
@@ -326,10 +392,11 @@ class Entropy(object):
                bwalk.cleanup()
        '''
 
-       DESCRIPTION = "ENTROPY"
+       DESCRIPTION = "ENTROPY ANALYSIS"
+       ALT_DESCRIPTION = "HEURISTIC ANALYSIS"
        ENTROPY_SCAN = 'entropy'
 
-       def __init__(self, files, binwalk=None, offset=0, length=0, block=0, plot=True, legend=True, save=False, algorithm=None, load_plugins=True, whitelist=[], blacklist=[]):
+       def __init__(self, files, binwalk=None, offset=0, length=0, block=0, plot=True, legend=True, save=False, algorithm=None, load_plugins=True, whitelist=[], blacklist=[], compcheck=False):
                '''
                Class constructor.
 
@@ -341,10 +408,11 @@ class Entropy(object):
                @plot         - Set to False to disable plotting.
                @legend       - Set to False to exclude the legend and custom offset markers from the plot.
                @save         - Set to True to save plots to disk instead of displaying them.
-               @algorithm    - Set to 'shannon' to use shannon entropy algorithm.
+               @algorithm    - Set to 'gzip' to use the gzip entropy "algorithm".
                @load_plugins - Set to False to disable plugin callbacks.
                @whitelist    - A list of whitelisted plugins.
                @blacklist    - A list of blacklisted plugins.
+               @compcheck    - Set to True to enable entropy compression detection.
 
                Returns None.
                '''
@@ -361,6 +429,7 @@ class Entropy(object):
                self.load_plugins = load_plugins
                self.whitelist = whitelist
                self.blacklist = blacklist
+               self.compcheck = compcheck
 
                if len(self.files) > 1:
                        self.save = True
@@ -410,13 +479,18 @@ class Entropy(object):
                                self.plugins._load_plugins()
 
                        if self.binwalk:
-                               self.binwalk.display.header(file_name=file_name, description=self.DESCRIPTION)
+                               if self.compcheck:
+                                       desc = self.ALT_DESCRIPTION
+                               else:
+                                       desc = self.DESCRIPTION
+
+                               self.binwalk.display.header(file_name=file_name, description=desc)
 
-                       with FileEntropy(file_name=file_name, binwalk=self.binwalk, offset=self.offset, length=self.length, block=self.block, plugins=self.plugins) as e:
+                       with FileEntropy(file_name=file_name, binwalk=self.binwalk, offset=self.offset, length=self.length, block=self.block, plugins=self.plugins, file_results=overlay, compcheck=self.compcheck) as e:
                                (x, y, average) = e.analyze(self.algorithm)
                                
                                if self.plot or self.save:
-                                       e.plot(x, y, average, overlay, self.legend, self.save)
+                                       e.plot(x, y, average, self.legend, self.save)
                                
                                results[file_name] = (x, y, average)
 
index f0573c3..02dc4ec 100644 (file)
@@ -56,7 +56,7 @@ class Extractor:
                self.enabled = False
                self.delayed = False
                self.verbose = verbose
-               self.extract_rules = {}
+               self.extract_rules = []
                self.remove_after_execute = False
                self.extract_path = os.getcwd()
 
@@ -110,12 +110,8 @@ class Extractor:
                                r['cmd'] = cmd
 
                        # Verify that the match string and file extension were retrieved.
-                       # Only add the rule if it is a new one (first come, first served).
-                       if match and r['extension'] and not self.extract_rules.has_key(match):
-                               self.extract_rules[match] = {}
-                               self.extract_rules[match]['cmd'] = r['cmd']
-                               self.extract_rules[match]['extension'] = r['extension']
-                               self.extract_rules[match]['regex'] = r['regex']
+                       if match and r['extension']:
+                               self.extract_rules.append(r.copy())
                                # Once any rule is added, set self.enabled to True
                                self.enabled = True
 
@@ -127,13 +123,16 @@ class Extractor:
 
                Returns the number of rules removed.
                '''
-               i = 0
+               rm = []
 
-               for key in self.extract_rules.keys():
-                       if self.extract_rules[key]['regex'].match(text):
-                               del self.extract_rules[key]
-                               i += 1
-               return i
+               for i in range(0, len(self.extract_rules)):
+                       if self.extract_rules[i]['regex'].match(text):
+                               rm.append(i)
+               
+               for i in rm:
+                       self.extract_rules.pop(i)
+
+               return len(rm)
 
        def clear_rules(self):
                '''
@@ -141,12 +140,12 @@ class Extractor:
 
                Returns None.
                '''
-               self.extract_rules = {}
+               self.extract_rules = []
                self.enabled = False
 
        def get_rules(self):
                '''
-               Returns a dictionary of all extraction rules.
+               Returns a list of all extraction rules.
                '''
                return self.extract_rules
 
@@ -247,10 +246,18 @@ class Extractor:
                
                if os.path.isfile(file_path):
                        os.chdir(self.extract_path)
+                       
+                       rules = self._match(description)
+                               
+                       # Loop through each extraction rule until one succeeds
+                       for i in range(0, len(rules)):
+                               rule = rules[i]
 
-                       rule = self._match(description)
-                       if rule is not None:
+                               # Copy out the data to disk, if we haven't already
                                fname = self._dd(file_path, offset, size, rule['extension'], output_file_name=name)
+
+                               # If there was a command specified for this rule, try to execute it.
+                               # If execution fails, the next rule will be attempted.
                                if rule['cmd']:
 
                                        # Many extraction utilities will extract the file to a new file, just without
@@ -263,7 +270,7 @@ class Extractor:
                                                        cleanup_extracted_fname = False
        
                                        # Execute the specified command against the extracted file
-                                       self._execute(rule['cmd'], fname)
+                                       extract_ok = self._execute(rule['cmd'], fname)
 
                                        # Only clean up files if remove_after_execute was specified                             
                                        if self.remove_after_execute:
@@ -282,11 +289,28 @@ class Extractor:
                                                                os.unlink(extracted_fname)
                                                        except:
                                                                pass
-       
-                               fname = os.path.join(self.extract_path, fname)
+                                       
+                                       # If the command executed OK, don't try any more rules
+                                       if extract_ok:
+                                               break
+                                       # Else, remove the extracted file if this isn't the last rule in the list.
+                                       # If it is the last rule, leave the file on disk for the user to examine.
+                                       elif i != len(rules):
+                                               try:
+                                                       os.unlink(fname)
+                                               except:
+                                                       pass
+
+                               # If there was no command to execute, just use the first rule
+                               else:
+                                       break
 
                        os.chdir(original_dir)
-       
+
+               # If a file was extracted, return the full path to that file    
+               if fname:
+                       fname = os.path.join(self.extract_path, fname)
+
                return fname
 
        def delayed_extract(self, results, file_name, size):
@@ -355,12 +379,13 @@ class Extractor:
                Returns the associated rule dictionary if a match is found.
                Returns None if no match is found.
                '''
+               rules = []
                description = description.lower()
 
-               for (m, rule) in self.extract_rules.iteritems():
+               for rule in self.extract_rules:
                        if rule['regex'].search(description):
-                               return rule
-               return None
+                               rules.append(rule)
+               return rules
 
        def _parse_rule(self, rule):
                '''
@@ -432,9 +457,10 @@ class Extractor:
                @cmd   - Command to execute.
                @fname - File to run command against.
 
-               Returns None.
+               Returns True on success, False on failure.
                '''
                tmp = None
+               retval = True
 
                try:
                        if callable(cmd):
@@ -450,10 +476,16 @@ class Extractor:
                                # Execute.
                                subprocess.call(shlex.split(cmd), stdout=tmp, stderr=tmp)
                except Exception, e:
-                       sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (cmd, str(e)))
+                       # Silently ignore no such file or directory errors. Why? Because these will inevitably be raised when
+                       # making the switch to the new firmware mod kit directory structure. We handle this elsewhere, but it's
+                       # annoying to see this spammed out to the console every time.
+                       if e.errno != 2:
+                               sys.stderr.write("WARNING: Extractor.execute failed to run '%s': %s\n" % (str(cmd), str(e)))
+                       retval = False
                
                if tmp is not None:
                        tmp.close()
 
+               return retval
        
 
index a65e535..e26288b 100644 (file)
@@ -155,7 +155,7 @@ class MagicFilter:
                if self.INVALID_RESULT in common.strip_quoted_strings(self.smart._strip_tags(data)):
                        return True
 
-               # There should be no non-printable data in any of the data
+               # There should be no non-printable characters in any of the data
                if self.NON_PRINTABLE_RESULT in data:
                        return True
 
diff --git a/binwalk/hexdiff.py b/binwalk/hexdiff.py
new file mode 100644 (file)
index 0000000..fdd574c
--- /dev/null
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import string
+import curses
+import platform
+
+class HexDiff(object):
+
+       ALL_SAME = 0
+       ALL_DIFF = 1
+       SOME_DIFF = 2
+
+       DEFAULT_DIFF_SIZE = 0x100
+       DEFAULT_BLOCK_SIZE = 16
+
+       COLORS = {
+               'red'   : '31',
+               'green' : '32',
+               'blue'  : '34',
+       }
+
+       def __init__(self, binwalk=None):
+               self.block_hex = ""
+               self.printed_alt_text = False
+
+               if binwalk:
+                       self._pprint = binwalk.display._pprint
+                       self._show_header = binwalk.display.header
+                       self._footer = binwalk.display.footer
+                       self._display_result = binwalk.display.results
+                       self._grep = binwalk.filter.grep
+               else:
+                       self._pprint = sys.stdout.write
+                       self._show_header = self._print
+                       self._footer = self._simple_footer
+                       self._display_result = self._print
+                       self._grep = None
+
+               if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty() and platform.system() != 'Windows':
+                       curses.setupterm()
+                       self.colorize = self._colorize
+               else:
+                       self.colorize = self._no_colorize
+
+       def _no_colorize(self, c, color="red", bold=True):
+               return c
+
+       def _colorize(self, c, color="red", bold=True):
+               attr = []
+
+               attr.append(self.COLORS[color])
+               if bold:
+                       attr.append('1')
+
+               return "\x1b[%sm%s\x1b[0m" % (';'.join(attr), c)
+
+       def _print_block_hex(self, alt_text="*"):
+               printed = False
+
+               if self._grep is None or self._grep(self.block_hex):
+                       self._pprint(self.block_hex)
+                       self.printed_alt_text = False
+                       printed = True
+               elif not self.printed_alt_text:
+                       self._pprint("%s\n" % alt_text)
+                       self.printed_alt_text = True
+                       printed = True
+
+               self.block_hex = ""
+               return printed
+
+       def _build_block(self, c, highlight=None):
+               if highlight == self.ALL_DIFF:
+                       self.block_hex += self.colorize(c, color="red")
+               elif highlight == self.ALL_SAME:
+                       self.block_hex += self.colorize(c, color="green")
+               elif highlight == self.SOME_DIFF:
+                       self.block_hex += self.colorize(c, color="blue")
+               else:
+                       self.block_hex += c
+
+       def _simple_footer(self):
+               print ""
+
+       def _header(self, files, block):
+               header = "OFFSET    "
+               for i in range(0, len(files)):
+                       f = files[i]
+                       header += "%s" % os.path.basename(f)
+                       if i != len(files)-1:
+                               header += " " * ((block*4) + 10 - len(os.path.basename(f)))
+               self._show_header(header=header)
+
+       def display(self, files, offset=0, size=DEFAULT_DIFF_SIZE, block=DEFAULT_BLOCK_SIZE, show_first_only=False):
+               i = 0
+               data = {}
+               delim = '/'
+
+               if show_first_only:
+                       self._header([files[0]], block)
+               else:
+                       self._header(files, block)
+
+               for f in files:
+                       fp = open(f, 'rb')
+                       fp.seek(offset)
+                       data[f] = fp.read(size)
+                       fp.close()
+
+
+               while i < size:
+                       diff_same = {}
+                       alt_text = "*" + " " * 6
+
+                       self._build_block("%.08X  " % i)
+
+                       # For each byte in this block, is the byte the same in all files, the same in some files, or different in all files?
+                       for j in range(0, block):
+                               byte_list = []
+
+                               try:
+                                       c = data[files[0]][j+i]
+                               except:
+                                       c = None
+
+                               for f in files:
+                                       try:
+                                               c = data[f][j+i]
+                                       except Exception, e:
+                                               c = None
+
+                                       if c not in byte_list:
+                                               byte_list.append(c)
+
+                               if len(byte_list) == 1:
+                                       diff_same[j] = self.ALL_SAME
+                               elif len(byte_list) == len(files):
+                                       diff_same[j] = self.ALL_DIFF
+                               else:
+                                       diff_same[j] = self.SOME_DIFF
+
+                       for index in range(0, len(files)):
+                               if show_first_only and index > 0:
+                                       break
+                       
+                               f = files[index]
+
+                               alt_text += " " * (3 + (3 * block) + 3 + block + 3)
+                               alt_text += delim
+
+                               for j in range(0, block):
+                                       try:
+                                               self._build_block("%.2X " % ord(data[f][j+i]), highlight=diff_same[j])
+                                       except:
+                                               self._build_block("   ")
+
+                                       if (j+1) == block:
+                                               self._build_block(" |")
+                                               for k in range(0, block):
+                                                       try:
+                                                               if data[f][k+i] in string.printable and data[f][k+i] not in string.whitespace:
+                                                                       self._build_block(data[f][k+i], highlight=diff_same[k])
+                                                               else:
+                                                                       self._build_block('.', highlight=diff_same[k])
+                                                       except:
+                                                               self._build_block(' ')
+
+                                               if index == len(files)-1 or (show_first_only and index == 0):
+                                                       self._build_block("|\n")
+                                               else:
+                                                       self._build_block('|   %s   ' % delim)
+
+                       if self._print_block_hex(alt_text=alt_text[:-1].strip()):
+                               if delim == '\\':
+                                       delim = '/'
+                               else:
+                                       delim = '\\'
+
+                       i += block
+               
+               self._footer()
+
+if __name__ == "__main__":
+       HexDiff().display(sys.argv[1:])
+
index 717bfc1..6695ef2 100644 (file)
 0      beshort 0xE8BD          ARMEB instructions, function epilogue
 0      leshort 0xE8BD          ARM instructions, function epilogue{offset-adjust:-2}
 
+# Ubicom32 prologue
+# move.4 -4($sp)++, $ra
+0      belong  0x02FF6125      Ubicom32 instructions, function prologue
+
+# Ubicom32 epilogues
+# calli $ra, 0($ra)
+# ret ($sp)4++
+0      belong  0xF0A000A0      Ubicom32 instructions, function epilogue
+0      belong  0x000022E1      Ubicom32 instructions, function epilogue
+
+# AVR8 prologue
+# push r28
+# push r29
+0      belong  0x93CF93DF      AVR8 instructions, function prologue
+0      belong  0x93DF93CF      AVR8 instructions, function prologue
+
+# AVR32 prologue
+# pushm   r7,lr
+# mov r7,sp
+0      string  \xEB\xCD\x40\x80\x1A\x97        AVR32 instructions, function prologue
+
+# SPARC eiplogue
+# ret
+# restore XX
+0      string  \x81\xC7\xE0\x08\x81\xE8        SPARC instructions, function epilogue
+
 # x86 epilogue
 # push ebp
 # move ebp, esp
index bf5ad01..2e97bd9 100644 (file)
 
 0       string          070701          ASCII cpio archive (SVR4 with no CRC),
 >110   byte            0               invalid
->110   byte            !0x2F
->>110  string          !TRAILER!!!     invalid
+#>110  byte            !0x2F
+#>>110 string          !TRAILER!!!     invalid
 >110   string          x               file name: "%s"
+>54    string          x               file size: "0x%.8s"
 >54    string          x               {jump-to-offset:0x%.8s+112}
 
 0       string          070702          ASCII cpio archive (SVR4 with CRC)
 >110   byte            0               invalid
->110   byte            !0x2F
->>110  string          !TRAILER!!!     invalid
+#>110  byte            !0x2F
+#>>110 string          !TRAILER!!!     invalid
 >110   string          x               file name: "%s"
+>54    string          x               file size: "0x%.8s"
 >54    string          x               {jump-to-offset:0x%.8s+112}
 
 
 >>4    lelong          =694224000      \b, invalid date:
 >>4    lelong          >694224000      \b, last modified:
 >4      ledate          x               %s
+>4     lelong          x               \b{epoch:%d}
 >8      byte            2               \b, max compression
 >8      byte            4               \b, max speed
 
 >15     string          SIGNED\040MESSAGE-      signed message
 >15     string          PGP\040SIGNATURE-       signature
 
+0      string          Salted__                OpenSSL encryption, salted,
+>8     belong          x                       salt: 0x%X
+>12    belong          x                       \b%X
 
 #------------------Standard file formats------------------------------------
 
 #0       string  MZ    Microsoft
 #>0x18  leshort <0x40 MS-DOS executable
 
-0 string MZ\0\0\0\0\0\0\0\0\0\0PE\0\0  Microsoft PE for MS Windows
+0      string          MZ\0\0\0\0\0\0\0\0\0\0
+>12    string          PE\0\0  Microsoft PE
+>0x18   leshort        <0x40   MS-DOS executable
 >>&18   leshort&0x2000  >0      (DLL)
 >>&88   leshort         0       (unknown subsystem)
 >>&88   leshort         1       (native)
 # 
 # Additional fields added by Craig Heffner
 #
-0       string          bFLT            BFLT executable 
+0       string          bFLT            BFLT executable
+>4     belong          <1              invalid
+>4     belong          >4              invalid
 >4      belong          x               version %ld, 
 >4      belong          4
 >8     belong          x               code offset: 0x%.8X, 
 # cramfs filesystem - russell@coker.com.au
 0       lelong  0x28cd3d45      CramFS filesystem, little endian
 >4     lelong  <0              invalid
+>4     lelong  >1073741824     invalid
 >4      lelong  x              size %lu
 >8      lelong  &1             version #2
 >8      lelong  &2             sorted_dirs
 
 0       belong  0x28cd3d45      CramFS filesystem, big endian
 >4     belong  <0              invalid
+>4     lelong  >536870912000   invalid
 >4      belong  x              size %lu
 >8      belong  &1             version #2
 >8      belong  &2             sorted_dirs
 >28     beshort >3      compression:
 >>20    beshort 1       \bgzip,
 >>20    beshort 2       \blzma,
+>>20   beshort 3       \bgzip (non-standard type definition),
+>>20   beshort 4       \blzma (non-standard type definition),
 >>20    beshort 0       \binvalid,
 >>20    beshort >4      \binvalid,
 >28     beshort <3
 >30    leshort >10     invalid
 >28     leshort x       version %d.
 >30     leshort x       \b%d,
->28    leshort >3      compression: 
+>28    leshort >3      compression:
 >>20   leshort 1       \bgzip,
 >>20   leshort 2       \blzma,
+>>20   leshort 3       \bgzip (non-standard type definition),
+>>20   leshort 4       \blzma (non-standard type definition),
 >>20   leshort 0       \binvalid,
 >>20   leshort >4      \binvalid,
 >28     leshort <3
 >28     beshort >3      compression:
 >>20    beshort 1       \bgzip,
 >>20    beshort 2       \blzma,
+>>20   beshort 3       \bgzip (non-standard type definition),
+>>20   beshort 4       \blzma (non-standard type definition),
 >>20    beshort 0       \binvalid,
 >>20    beshort >4      \binvalid,
 >28     beshort <3
 >28     beshort >3      compression:
 >>20    beshort 1       \bgzip,
 >>20    beshort 2       \blzma,
+>>20   beshort 3       \bgzip (non-standard type definition),
+>>20   beshort 4       \blzma (non-standard type definition),
 >>20    beshort 0       \binvalid,
 >>20    beshort >4      \binvalid,
 >28     beshort <3
 >28     beshort >3      compression:
 >>20    beshort 1       \bgzip,
 >>20    beshort 2       \blzma,
+>>20   beshort 3       \bgzip (non-standard type definition),
+>>20   beshort 4       \blzma (non-standard type definition),
 >>20    beshort 0       \binvalid,
 >>20    beshort >4      \binvalid,
 >28     beshort <3
 >28     leshort >3      compression:
 >>20    leshort 1       \bgzip,
 >>20    leshort 2       \blzma,
+>>20   leshort 3       \bgzip (non-standard type definition),
+>>20   leshort 4       \blzma (non-standard type definition),
 >>20    leshort 0       \binvalid,
 >>20    leshort >4      \binvalid,
 >28     leshort <3
 >28     leshort >3      compression:
 >>20    leshort 1       \bgzip,
 >>20    leshort 2       \blzma,
+>>20   leshort 3       \bgzip (non-standard type definition),
+>>20   leshort 4       \blzma (non-standard type definition),
 >>20    leshort 0       \binvalid,
 >>20    leshort >4      \binvalid,
 >28     leshort <3
 >32    belong  2                                       plain text,
 >36    belong  x                                       %d files
 
-
 # netboot image - Juan Cespedes <cespedes@debian.org>
 0      lelong                  0x1b031336L     Netboot image,
 >4     lelong&0xFFFFFF00       0
 >>4    lelong&0x100            0x100           mode 3
 >4     lelong&0xFFFFFF00       !0              unknown mode (invalid)
 
+0      string                  WDK\x202.0\x00  WDK file system, version 2.0{offset-adjust:-18}
+
+0      string          CD001                                           ISO{offset-adjust:-32769}
+>6144  string          !NSR0                                           9660 CD-ROM filesystem data,
+>6144  string          NSR0                                            UDF filesystem data,
+>6148  string          1                                               version 1.0,
+>6148  string          2                                               version 2.0,
+>6148  string          3                                               version 3.0
+>6148  byte            >0x33                                           invalid version,
+>6148  byte            <0x31                                           invalid version,
+>38    string          >\0                                             volume name: "%s",
+>2047  string          \000CD001\001EL\x20TORITO\x20SPECIFICATION      bootable
+
+
 #--------------------------Firmware Formats---------------------------
 
 # uImage file     
 >48    string          x               root device: "%s"
 
 # trx image file
-0      string          HDR0            TRX firmware header, little endian, header size: 28 bytes, 
+0      string          HDR0            TRX firmware header, little endian, header size: 28 bytes,
+>4     lelong          <1              invalid
 >4     lelong          x               image size: %d bytes,
 >8     lelong          x               CRC32: 0x%X
->12    lelong          x               flags/version: 0x%X
+>12    leshort         x               flags: 0x%X,
+>14    leshort         x               version: %d
 
 0      string          0RDH            TRX firmware header, big endian, header size: 28 bytes,
+>4     belong          <1              invalid
 >4     belong          x               image size: %d bytes,
 >8     belong          x               CRC32: 0x%X
->12    belong          x               flags/version: 0x%X
+>12    beshort         x               flags: 0x%X,
+>14    beshort         x               version: %d
 
 
 # Ubicom firmware image
 >58    leshort         x                               firmware version: 0x%X,
 >60    leshort         x                               starting code segment: 0x%X,
 >62    leshort         x                               code size: 0x%X
+
+# NPK firmware header, used by Mikrotik
+0              belong          0x1EF1D0BA              NPK firmware header,
+>4             lelong          <0                      invalid
+>4             lelong          x                       image size: %d,
+>14            string          x                       image name: "%s",
+>(48.l+58)     string          x                       description: "%s
+>(48.l+121)    string          x                       \b%s"
+
+# Ubiquiti firmware signatures
+0      string          UBNT            Ubiquiti firmware header,
+>0x104 belong          x               ~CRC32: 0x%X,
+>4     string          x               version: "%s"
+
+0      string          GEOS            Ubiquiti firmware header,
+>0x104 belong          x               ~CRC32: 0x%X,
+>4     string          x               version: "%s"
+
+# Too many false positives...
+#0     string          OPEN            Ubiquiti firmware header, third party,
+#>0x104        belong          x               ~CRC32: 0x%X,
+#>4    string          x               version: "%s"
+
+0      string          PARTkernel      Ubiquiti firmware kernel partition
+0      string          PARTcramfs      Ubiquiti firmware CramFS partition
+0      string          PARTrootfs      Ubiquiti firmware rootfs partition
+
+# Found in DIR-100 firmware
+0       string         AIH0N           AIH0N firmware header, header size: 48,
+>12     belong         x               size: %d,
+>8      belong         !0              executable code,
+>>8     belong         x               load address: 0x%X,
+>32     string         x               version: "%s"
+
+0      belong          0x5EA3A417      SEAMA firmware header, big endian,
+>6     beshort         x               meta size: %d,
+>8     belong          x               size: %d
+
+0      lelong          0x5EA3A417      SEAMA firmware header, little endian,
+>6     leshort         x               meta size: %d,
+>8     lelong          x               size: %d
+
+0      belong          0x4D544443      NSP firmware header, big endian,
+>16    belong          x               header size: %d,
+>20    belong          x               image size: %d,
+>4     belong          x               kernel offset: %d,
+>12    belong          x               header version: %d,
+
+0      lelong          0x4D544443      NSP firmware header, little endian,
+>16    lelong          x               header size: %d,
+>20    lelong          x               image size: %d,
+>4     lelong          x               kernel offset: %d,
+>12    lelong          x               header version: %d,
+
+# http://www.openwiz.org/wiki/Firmware_Layout#Beyonwiz_.wrp_header_structure
+0      string          WizFwPkgl       Beyonwiz firmware header,
+>20    string          x               version: "%s"
+
+0      string          BLI223WJ0       Thompson/Alcatel encoded firmware,
+>32    byte            x               version: %d.
+>33    byte            x               \b%d.
+>34    byte            x               \b%d.
+>35    byte            x               \b%d,
+>44    belong          x               size: %d,
+>48    belong          x               crc: 0x%.8X,
+>35    byte            x               try decryption tool from:
+>35    byte            x               http://download.modem-help.co.uk/mfcs-A/Alcatel/Modems/Misc/
+
 # Tag Image File Format, from Daniel Quinlan (quinlan@yggdrasil.com)
 # The second word of TIFF files is the TIFF version number, 42, which has
 # never changed.  The TIFF specification recommends testing for it.
 # PC bitmaps (OS/2, Windows BMP files)  (Greg Roelofs, newt@uchicago.edu)
 0       string          BM
 >14     leshort         12              PC bitmap, OS/2 1.x format
+>>18   lelong          <1              invalid
+>>18   lelong          >1000000        invalid
 >>18    leshort         x               \b, %d x
+>>20   lelong          <1              invalid
+>>20   lelong          >1000000        invalid
 >>20    leshort         x               %d
 >14     leshort         64              PC bitmap, OS/2 2.x format
+>>18   lelong          <1              invalid
+>>18   lelong          >1000000        invalid
 >>18    leshort         x               \b, %d x
+>>20   lelong          <1              invalid
+>>20   lelong          >1000000        invalid
 >>20    leshort         x               %d
 >14     leshort         40              PC bitmap, Windows 3.x format
+>>18   lelong          <1              invalid
+>>18   lelong          >1000000        invalid
 >>18    lelong          x               \b, %d x
+>>22   lelong          <1              invalid
+>>22   lelong          >1000000        invalid
 >>22    lelong          x               %d x
+>>28   lelong          <1              invalid
+>>28   lelong          >1000000        invalid
 >>28    leshort         x               %d
 >14     leshort         128             PC bitmap, Windows NT/2000 format
+>>18   lelong          >1000000        invalid
+>>18   lelong          <1              invalid
 >>18    lelong          x               \b, %d x
+>>22   lelong          <1              invalid
+>>22   lelong          >1000000        invalid
 >>22    lelong          x               %d x
+>>28   lelong          <1              invalid
+>>28   lelong          >1000000        invalid
 >>28    leshort         x               %d
 
 #------------------------------------------------------------------------------
diff --git a/binwalk/magic/zlib b/binwalk/magic/zlib
new file mode 100644 (file)
index 0000000..f91e156
--- /dev/null
@@ -0,0 +1,4 @@
+0      beshort         0x7801          Zlib header, no compression
+0      beshort         0x789c          Zlib header, default compression
+0      beshort         0x78da          Zlib header, best compression
+0      beshort         0x785e          Zlib header
diff --git a/binwalk/maths.py b/binwalk/maths.py
new file mode 100644 (file)
index 0000000..2e7de27
--- /dev/null
@@ -0,0 +1,233 @@
+#!/usr/bin/env python
+# Routines to perform Monte Carlo Pi approximation and Chi Squared tests. 
+# Used for fingerprinting unknown areas of high entropy (e.g., is this block of high entropy data compressed or encrypted?).
+# Inspired by people who actually know what they're doing: http://www.fourmilab.ch/random/
+
+import math
+
+class MonteCarloPi(object):
+       '''
+       Performs a Monte Carlo Pi approximation.
+       '''
+
+       def __init__(self):
+               '''
+               Class constructor.
+               
+               Returns None.
+               '''
+               self.reset()
+
+       def reset(self):
+               '''
+               Reset state to the beginning.
+               '''
+               self.pi = 0
+               self.error = 0
+               self.m = 0
+               self.n = 0
+
+       def update(self, data):
+               '''
+               Update the pi approximation with new data.
+
+               @data - A string of bytes to update (length must be >= 6).
+
+               Returns None.
+               '''
+               c = 0
+               dlen = len(data)
+
+               while (c+6) < dlen:
+                       # Treat 3 bytes as an x coordinate, the next 3 bytes as a y coordinate.
+                       # Our box is 1x1, so divide by 2^24 to put the x y values inside the box.
+                       x = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
+                       c += 3
+                       y = ((ord(data[c]) << 16) + (ord(data[c+1]) << 8) + ord(data[c+2])) / 16777216.0
+                       c += 3
+       
+                       # Does the x,y point lie inside the circle inscribed within our box, with diameter == 1?
+                       if ((x**2) + (y**2)) <= 1:
+                               self.m += 1
+                       self.n += 1
+       
+       def montecarlo(self):
+               '''
+               Approximates the value of Pi based on the provided data.
+               
+               Returns a tuple of (approximated value of pi, percent deviation).
+               '''
+               if self.n:
+                       self.pi = (float(self.m) / float(self.n) * 4.0)
+
+               if self.pi:
+                       self.error = math.fabs(1.0 - (math.pi / self.pi)) * 100.0
+                       return (self.pi, self.error)
+               else:
+                       return (0.0, 0.0)
+
+class ChiSquare(object):
+       '''
+       Performs a Chi Squared test against the provided data.
+       '''
+
+       IDEAL = 256.0
+
+       def __init__(self):
+               '''
+               Class constructor.
+
+               Returns None.
+               '''
+               self.bytes = {}
+               self.freedom = self.IDEAL - 1 
+               
+               # Initialize the self.bytes dictionary with keys for all possible byte values (0 - 255)
+               for i in range(0, int(self.IDEAL)):
+                       self.bytes[chr(i)] = 0
+               
+               self.reset()
+
+       def reset(self):
+               self.xc2 = 0.0
+               self.byte_count = 0
+
+               for key in self.bytes.keys():
+                       self.bytes[key] = 0             
+
+       def update(self, data):
+               '''
+               Updates the current byte counts with new data.
+
+               @data - String of bytes to update.
+
+               Returns None.
+               '''
+               # Count the number of occurances of each byte value
+               for i in data:
+                       self.bytes[i] += 1
+
+               self.byte_count += len(data)
+
+       def chisq(self):
+               '''
+               Calculate the Chi Square critical value.
+
+               Returns the critical value.
+               '''
+               expected = self.byte_count / self.IDEAL
+
+               if expected:
+                       for byte in self.bytes.values():
+                               self.xc2 += ((byte - expected) ** 2 ) / expected
+
+               return self.xc2
+
+class MathAnalyzer(object):
+       '''
+       Class wrapper aroung ChiSquare and MonteCarloPi.
+       Performs analysis and attempts to interpret the results.
+       '''
+
+       # Data blocks must be in multiples of 6 for the monte carlo pi approximation
+       BLOCK_SIZE = 32
+       CHI_CUTOFF = 512
+
+       def __init__(self, fp, start, length):
+               '''
+               Class constructor.
+
+               @fp     - A seekable, readable, file object that will be the data source.
+               @start  - The start offset to begin analysis at.
+               @length - The number of bytes to analyze.
+
+               Returns None.
+               '''
+               self.fp = fp
+               self.start = start
+               self.length = length
+
+       def analyze(self):
+               '''
+               Perform analysis and interpretation.
+
+               Returns a descriptive string containing the results and attempted interpretation.
+               '''
+               i = 0
+               num_error = 0
+               analyzer_results = []
+
+               chi = ChiSquare()
+
+               self.fp.seek(self.start)
+               while i < self.length:
+                       rsize = self.length - i
+                       if rsize > self.BLOCK_SIZE:
+                               rsize = self.BLOCK_SIZE
+
+                       chi.reset()
+                       chi.update(self.fp.read(rsize))
+
+                       if chi.chisq() >= self.CHI_CUTOFF:
+                               num_error += 1
+
+                       i += rsize
+
+               if num_error > 0:
+                       verdict = 'Low/medium entropy data block'
+               else:
+                       verdict = 'High entropy data block'
+
+               result = '%s, %d low entropy blocks' % (verdict, num_error)
+
+               return result
+
+if __name__ == "__main__":
+       import sys
+
+       rsize = 0
+       largest = (0, 0)
+       num_error = 0
+       data = open(sys.argv[1], 'rb').read()
+
+       try:
+               block_size = int(sys.argv[2], 0)
+       except:
+               block_size = 32
+
+       chi = ChiSquare()
+       
+       while rsize < len(data):
+               chi.reset()
+
+               d = data[rsize:rsize+block_size]
+               if d < block_size:
+                       break
+
+               chi.update(d)
+               if chi.chisq() >= 512:
+                       sys.stderr.write("0x%X -> %d\n" % (rsize, chi.xc2))
+                       num_error += 1
+               if chi.xc2 >= largest[1]:
+                       largest = (rsize, chi.xc2)
+
+               rsize += block_size
+
+       sys.stderr.write("Number of deviations: %d\n" % num_error)
+       sys.stderr.write("Largest deviation: %d at offset 0x%X\n" % (largest[1], largest[0]))
+
+       print "Data:",
+       if num_error != 0:
+               print "Compressed"
+       else:
+               print "Encrypted"
+
+       print "Confidence:",
+       if num_error >= 5 or num_error == 0:
+               print "High"
+       elif num_error in [3,4]:
+               print "Medium"
+       else:
+               print "Low"
+
+
diff --git a/binwalk/plugins/zlib.py b/binwalk/plugins/zlib.py
new file mode 100644 (file)
index 0000000..d978784
--- /dev/null
@@ -0,0 +1,42 @@
+import zlib
+from binwalk.plugins import *
+
+class Plugin:
+       '''
+       Finds and validates zlib compressed data.
+       '''
+
+       ENABLED = False
+
+       def __init__(self, binwalk):
+               self.fd = None
+               self.enabled = False
+
+               if binwalk.scan_type == binwalk.BINWALK:
+                       # Add the zlib file to the list of magic files
+                       binwalk.magic_files.append(binwalk.config.paths['system'][binwalk.config.ZLIB_MAGIC_FILE])
+                       self.enabled = True
+       
+       def pre_scan(self, fd):
+               if self.enabled:
+                       self.fd = open(fd.name, 'rb')
+
+       def callback(self, result):
+
+               # If this result is a zlib signature match, try to decompress the data
+               if self.fd and result['description'].lower().startswith('zlib'):
+                       try:
+                               # With a lot of zlib matches on large files, this can result in a lot of disk I/O
+                               # and a lot of memory usage. This is why this module is disabled by default.
+                               self.fd.seek(result['offset'])
+                               zlib.decompress(self.fd.read())
+                       except:
+                               # If decompression raised an exception, this is probably not valid zlib data
+                               return (PLUGIN_NO_DISPLAY | PLUGIN_NO_EXTRACT)
+               
+               return PLUGIN_CONTINUE
+
+       def post_scan(self, fd):
+               if self.fd:
+                       self.fd.close()
+
index 2d66dbb..8829a23 100644 (file)
@@ -220,11 +220,12 @@ class PrettyPrint:
                self._pprint("Target File:   %s\n" % file_name)
                self._pprint("MD5 Checksum:  %s\n" % self._file_md5(file_name))
 
-       def header(self, file_name=None, description=DEFAULT_DESCRIPTION_HEADER):
+       def header(self, file_name=None, header=None, description=DEFAULT_DESCRIPTION_HEADER):
                '''
                Prints the binwalk header, typically used just before starting a scan.
 
                @file_name   - If specified, and if self.verbose > 0, then detailed file info will be included in the header.
+               @header      - If specified, this is a custom header to display at the top of the output.
                @description - The description header text to display (default: "DESCRIPTION")
 
                Returns None.
@@ -232,7 +233,13 @@ class PrettyPrint:
                if self.verbose and file_name is not None:
                        self.file_info(file_name)
 
-               self._pprint("\nDECIMAL   \tHEX       \t%s\n" % description)
+               self._pprint("\n")
+
+               if not header:
+                       self._pprint("DECIMAL   \tHEX       \t%s\n" % description)
+               else:
+                       self._pprint(header + "\n")
+               
                self._pprint("-" * self.HEADER_WIDTH + "\n")
 
        def footer(self, bwalk=None, file_name=None):
@@ -243,13 +250,14 @@ class PrettyPrint:
                '''
                self._pprint("\n")
 
-       def results(self, offset, results):
+       def results(self, offset, results, formatted=False):
                '''
                Prints the results of a scan. Suitable for use as a callback function for Binwalk.scan().
 
-               @offset  - The offset at which the results were found.
-               @results - A list of libmagic result strings.
-               
+               @offset    - The offset at which the results were found.
+               @results   - A list of libmagic result strings.
+               @formatted - Set to True if the result description has already been formatted properly.
+
                Returns None.
                '''
                offset_printed = False
@@ -257,10 +265,13 @@ class PrettyPrint:
                for info in results:
                        # Check for any grep filters before printing
                        if self.binwalk.filter.grep(info['description']):
+                               if not formatted:
                                # Only display the offset once per list of results
-                               if not offset_printed:
-                                       self._pprint("%-10d\t0x%-8X\t%s\n" % (offset, offset, self._format(info['description'])))
-                                       offset_printed = True
+                                       if not offset_printed:
+                                               self._pprint("%-10d\t0x%-8X\t%s\n" % (offset, offset, self._format(info['description'])))
+                                               offset_printed = True
+                                       else:
+                                               self._pprint("%s\t  %s\t%s\n" % (' '*10, ' '*8, self._format(info['description'])))
                                else:
-                                       self._pprint("%s\t  %s\t%s\n" % (' '*10, ' '*8, self._format(info['description'])))
+                                       self._pprint(info['description'])
 
index f17f417..7b7a45e 100644 (file)
@@ -24,6 +24,8 @@ class SmartSignature:
                'raw-size'              : '%sraw-string-length:' % KEYWORD_DELIM_START,
                'adjust'                : '%soffset-adjust:' % KEYWORD_DELIM_START,
                'delay'                 : '%sextract-delay:' % KEYWORD_DELIM_START,
+               'year'                  : '%syear:' % KEYWORD_DELIM_START,
+               'epoch'                 : '%sepoch:' % KEYWORD_DELIM_START,
 
                'raw-replace'           : '%sraw-replace%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
                'one-of-many'           : '%sone-of-many%s' % (KEYWORD_DELIM_START, KEYWORD_DELIM_END),
@@ -39,6 +41,7 @@ class SmartSignature:
                Returns None.
                '''
                self.filter = filter
+               self.invalid = False
                self.last_one_of_many = None
                self.ignore_smart_signatures = ignore_smart_signatures
 
@@ -51,16 +54,21 @@ class SmartSignature:
                Returns a dictionary of parsed values.
                '''
                results = {
-                       'offset'        : '',   # Offset where the match was found, filled in by Binwalk.single_scan.
-                       'description'   : '',   # The libmagic data string, stripped of all keywords
-                       'name'          : '',   # The original name of the file, if known
-                       'delay'         : '',   # Extract delay description
-                       'extract'       : '',   # Name of the extracted file, filled in by Binwalk.single_scan.
-                       'jump'          : 0,    # The relative offset to resume the scan from
-                       'size'          : 0,    # The size of the file, if known
-                       'adjust'        : 0,    # The relative offset to add to the reported offset
+                       'offset'        : '',           # Offset where the match was found, filled in by Binwalk.single_scan.
+                       'description'   : '',           # The libmagic data string, stripped of all keywords
+                       'name'          : '',           # The original name of the file, if known
+                       'delay'         : '',           # Extract delay description
+                       'extract'       : '',           # Name of the extracted file, filled in by Binwalk.single_scan.
+                       'jump'          : 0,            # The relative offset to resume the scan from
+                       'size'          : 0,            # The size of the file, if known
+                       'adjust'        : 0,            # The relative offset to add to the reported offset
+                       'year'          : 0,            # The file's creation/modification year, if reported in the signature
+                       'epoch'         : 0,            # The file's creation/modification epoch time, if reported in the signature
+                       'invalid'       : False,        # Set to True if parsed numerical values appear invalid
                }
 
+               self.invalid = False
+
                # If smart signatures are disabled, or the result data is not valid (i.e., potentially malicious), 
                # don't parse anything, just return the raw data as the description.
                if self.ignore_smart_signatures or not self._is_valid(data):
@@ -79,6 +87,16 @@ class SmartSignature:
                        except:
                                pass
 
+                       try:
+                               results['year'] = str2int(self._get_keyword_arg(data, 'year'))
+                       except:
+                               pass
+                       
+                       try:
+                               results['epoch'] = str2int(self._get_keyword_arg(data, 'epoch'))
+                       except:
+                               pass
+
                        results['delay'] = self._get_keyword_arg(data, 'delay')
 
                        # Parse the string for the jump-to-offset keyword.
@@ -91,6 +109,8 @@ class SmartSignature:
                                results['name'] = self._get_keyword_arg(data, 'filename').strip('"')
                                results['description'] = self._strip_tags(data)
 
+               results['invalid'] = self.invalid
+
                return results
 
        def _is_valid(self, data):
@@ -171,7 +191,7 @@ class SmartSignature:
                                try:
                                        value += str2int(string_int)
                                except:
-                                       pass
+                                       self.invalid = True
 
                return value                    
 
index 9cf4fd3..d51658d 100644 (file)
@@ -23,8 +23,14 @@ class FileStrings(object):
        PUNCTUATION = [x for x in string.punctuation]
        NEWLINES = ['\r', '\n', '\x0b', '\x0c']
        VOWELS = ['A', 'E', 'I', 'O', 'U', 'a', 'e', 'i', 'o', 'u']
-       NON_ALPHA_EXCEPTIONS = ['%', '.', '/', '>', '<'] + NUMBERS
-
+       NON_ALPHA_EXCEPTIONS = ['%', '.', '/', '-', '_']
+       BRACKETED = {
+                       '[' : ']',
+                       '<' : '>',
+                       '{' : '}',
+                       '(' : ')',
+       }
+       
        def __init__(self, file_name, binwalk, length=0, offset=0, n=MIN_STRING_LENGTH, block=0, algorithm=None, plugins=None):
                '''
                Class constructor. Preferred to be invoked from the Strings class instead of directly.
@@ -59,7 +65,8 @@ class FileStrings(object):
                        self.n = self.MIN_STRING_LENGTH
 
                # Perform an entropy analysis over the entire file (anything less may generate poor entropy data).
-               with entropy.FileEntropy(self.file_name, block=block) as e:
+               # Give fake file results list to prevent FileEntropy from doing too much analysis.
+               with entropy.FileEntropy(self.file_name, block=block, file_results=['foo']) as e:
                        (self.x, self.y, self.average_entropy) = e.analyze()
                        for i in range(0, len(self.x)):
                                self.entropy[self.x[i]] = self.y[i]
@@ -105,8 +112,17 @@ class FileStrings(object):
                '''
                offset = self.total_read + self.start
 
-               # Ignore all blocks which have a higher than average or higher than MAX_ENTROPY entropy
+               # Ignore blocks which have a higher than average or higher than MAX_ENTROPY entropy
                while self.entropy.has_key(offset):
+                       # Don't ignore blocks that border on an entropy rising/falling edge
+                       try:
+                               if self.entropy[offset-self.block] <= self.MAX_ENTROPY:
+                                       break
+                               if self.entropy[offset+self.block] <= self.MAX_ENTROPY:
+                                       break
+                       except KeyError:
+                               break
+
                        if self.entropy[offset] > self.average_entropy or self.entropy[offset] > self.MAX_ENTROPY:
                                self.total_read += self.block
                                offset = self.total_read + self.start
@@ -170,20 +186,43 @@ class FileStrings(object):
                                c += 1
                return c
 
+       def _is_bracketed(self, data):
+               '''
+               Checks if a string is bracketed by special characters.
+
+               @data - The data string to check.
+
+               Returns True if bracketed, False if not.
+               '''
+               return self.BRACKETED.has_key(data[0]) and data.endswith(self.BRACKETED[data[0]])
+
        def _non_alpha_count(self, data):
                '''
                Returns the number of non-english letters in data.
                '''
                c = 0
+               dlen = len(data)
+
+               # No exceptions for very short strings
+               if dlen <= self.SUSPECT_STRING_LENGTH:
+                       exceptions = []
+               else:
+                       exceptions = self.NON_ALPHA_EXCEPTIONS
+
                for i in range(0, len(data)):
-                       if data[i] not in self.LETTERS and data[i] not in self.NON_ALPHA_EXCEPTIONS:
-                               c += 1
+                       if data[i] not in self.LETTERS and data[i] not in self.NUMBERS and data[i] not in exceptions:
+                                       c += 1
                return c
 
        def _too_many_special_chars(self, data):
                '''
                Returns True if the ratio of special characters in data is too high, otherwise returns False.
                '''
+               # If an open bracket exists, we expect a close bracket as well
+               for (key, value) in self.BRACKETED.iteritems():
+                       if key in data and not value in data:
+                               return True
+
                # For better filtering of false positives, require a lower ratio of special characters for very short strings
                if len(data) <= self.SUSPECT_STRING_LENGTH:
                        return (float(self._non_alpha_count(data)) / len(data)) >= self.SUSPECT_SPECIAL_CHARS_RATIO
@@ -244,13 +283,20 @@ class FileStrings(object):
                        if r is not None:
                                return r
 
+               # Large strings are automatically considered valid/interesting
                if strlen >= self.MAX_STRING_LENGTH:
                        return True
                elif strlen >= self.n:
-                       if self._has_vowels(string):
+                       # The chances of a random string being bracketed is pretty low.
+                       # If the string is bracketed, consider it valid.
+                       if self._is_bracketed(string):
+                               return True
+                       # Else, do some basic sanity checks on the string
+                       elif self._has_vowels(string):
                                if not self._too_many_special_chars(string):
                                        if not self._fails_grammar_rules(string):
                                                return True
+       
                return False
                
        def _add_string(self, offset, string, plug_pre):
index 5d3c978..c46758f 100644 (file)
@@ -1,3 +1,4 @@
+import os
 import urllib2
 from config import *
 
@@ -31,6 +32,7 @@ class Update:
                self.update_bincast()
                self.update_binarch()
                self.update_extract()
+               self.update_zlib()
 
        def _do_update_from_svn(self, prefix, fname):
                '''
@@ -41,6 +43,14 @@ class Update:
 
                Returns None.
                '''
+               # Get the local http proxy, if any
+               # csoban.kesmarki
+               proxy_url = os.getenv('HTTP_PROXY')
+               if proxy_url:
+                       proxy_support = urllib2.ProxyHandler({'http' : proxy_url})
+                       opener = urllib2.build_opener(proxy_support)
+                       urllib2.install_opener(opener)
+
                url = self.BASE_URL + prefix + fname
                
                try:
@@ -72,6 +82,14 @@ class Update:
                Returns None.
                '''
                self._do_update_from_svn(self.MAGIC_PREFIX, self.config.BINARCH_MAGIC_FILE)
+       
+       def update_zlib(self):
+               '''
+               Updates the zlib signature file.
+
+               Returns None.
+               '''
+               self._do_update_from_svn(self.MAGIC_PREFIX, self.config.ZLIB_MAGIC_FILE)
 
        def update_extract(self):
                '''
@@ -81,3 +99,4 @@ class Update:
                '''
                self._do_update_from_svn(self.CONFIG_PREFIX, self.config.EXTRACT_FILE)
 
+
index 89aa73b..44b3f48 100755 (executable)
@@ -1,14 +1,42 @@
-#!/bin/sh
+#!/bin/bash
+# Easy installer script for Debian-based systems.
+
+# The appropriate unrar package goes under different names in Debian vs Ubuntu
+sudo apt-get -y install unrar-nonfree
+if [ "$?" != "0" ]
+then
+       echo "WARNING: Failed to install 'unrar-nonfree' package, trying 'unrar' instead..."
+       sudo apt-get -y install unrar
+fi
 
 # Install binwalk/fmk pre-requisites and extraction tools
-sudo apt-get install subversion build-essential mtd-utils zlib1g-dev liblzma-dev gzip bzip2 tar unrar arj p7zip openjdk-6-jdk python-magic python-matplotlib
+sudo apt-get -y install git build-essential mtd-utils zlib1g-dev liblzma-dev gzip bzip2 tar arj p7zip p7zip-full openjdk-6-jdk python-magic python-matplotlib
 
 # Get and build the firmware mod kit
+sudo rm -rf /opt/firmware-mod-kit/
 sudo mkdir -p /opt/firmware-mod-kit
 sudo chmod a+rwx /opt/firmware-mod-kit
-rm -rf /opt/firmware-mod-kit/trunk
-svn checkout http://firmware-mod-kit.googlecode.com/svn/trunk /opt/firmware-mod-kit/trunk
-cd /opt/firmware-mod-kit/trunk/src && ./configure && make && cd -
+git clone https://code.google.com/p/firmware-mod-kit /opt/firmware-mod-kit/
+
+cd /opt/firmware-mod-kit/src
+./configure && sudo make
+if [ "$1" == "--sumount" ]
+then
+       # The following will allow you - and others - to mount/unmount file systems without root permissions.
+       # This may be problematic, especially on a multi-user system, so think about it first.
+       sudo chown root ./mountcp/mountsu
+       sudo chmod u+s ./mountcp/mountsu
+       sudo chmod o-w ./mountcp/mountsu
+
+       sudo chown root ./mountcp/umountsu
+       sudo chmod u+s ./mountcp/umountsu
+       sudo chmod o-w ./mountcp/umountsu
+
+       sudo chown root ./jffs2/sunjffs2
+        sudo chmod u+s ./jffs2/sunjffs2
+        sudo chmod o-w ./jffs2/sunjffs2
+fi
+cd -
 
 # Install binwalk
 sudo python setup.py install
index 947ffe5..f0c75a6 100644 (file)
@@ -15,7 +15,18 @@ DESCRIPTION
 
 INSTALLATION
 
-       To install binwalk, run the following command from the src directory:
+       For Debian/Ubuntu users, the easiest way to install binwalk and all dependencies/extraction utilities is to 
+       run the debian_quick_install.sh script from the src directory:
+
+               $ ./debian_quick_install.sh
+
+       You may optionally specify the --sumount option to the install script, which will allow you (and others) to
+       mount and unmount file systems without root privileges. This will allow you to extract file systems without
+       the need to run binwalk as root, but it may also be undesirable, especially on multi-user systems:
+
+               $ ./debian_quick_install.sh --sumount
+
+       To install just binwalk, run the following command from the src directory (see below for installing dependencies):
 
                $ sudo python setup.py install
 
index ce85d46..9cdea57 100644 (file)
 
 0       string          070701          ASCII cpio archive (SVR4 with no CRC),
 >110   byte            0               invalid
->110   byte            !0x2F
->>110  string          !TRAILER!!!     invalid
+#>110  byte            !0x2F
+#>>110 string          !TRAILER!!!     invalid
 >110   string          x               file name: "%s"
+>54    string          x               file size: "0x%.8s"
 >54    string          x               {jump-to-offset:0x%.8s+112}
 
 0       string          070702          ASCII cpio archive (SVR4 with CRC)
 >110   byte            0               invalid
->110   byte            !0x2F
->>110  string          !TRAILER!!!     invalid
+#>110  byte            !0x2F
+#>>110 string          !TRAILER!!!     invalid
 >110   string          x               file name: "%s"
+>54    string          x               file size: "0x%.8s"
 >54    string          x               {jump-to-offset:0x%.8s+112}
 
 
index debcfe6..6698ede 100644 (file)
 >>4    lelong          =694224000      \b, invalid date:
 >>4    lelong          >694224000      \b, last modified:
 >4      ledate          x               %s
+>4     lelong          x               \b{epoch:%d}
 >8      byte            2               \b, max compression
 >8      byte            4               \b, max speed
 
index 6def7c3..8c452a0 100644 (file)
@@ -97,3 +97,6 @@
 >15     string          SIGNED\040MESSAGE-      signed message
 >15     string          PGP\040SIGNATURE-       signature
 
+0      string          Salted__                OpenSSL encryption, salted,
+>8     belong          x                       salt: 0x%X
+>12    belong          x                       \b%X
index d186ac1..213dd6c 100644 (file)
 #0       string  MZ    Microsoft
 #>0x18  leshort <0x40 MS-DOS executable
 
-0 string MZ\0\0\0\0\0\0\0\0\0\0PE\0\0  Microsoft PE for MS Windows
+0      string          MZ\0\0\0\0\0\0\0\0\0\0
+>12    string          PE\0\0  Microsoft PE
+>0x18   leshort        <0x40   MS-DOS executable
 >>&18   leshort&0x2000  >0      (DLL)
 >>&88   leshort         0       (unknown subsystem)
 >>&88   leshort         1       (native)
 # 
 # Additional fields added by Craig Heffner
 #
-0       string          bFLT            BFLT executable 
+0       string          bFLT            BFLT executable
+>4     belong          <1              invalid
+>4     belong          >4              invalid
 >4      belong          x               version %ld, 
 >4      belong          4
 >8     belong          x               code offset: 0x%.8X, 
index d8bd6ea..355a3bd 100644 (file)
@@ -60,6 +60,7 @@
 # cramfs filesystem - russell@coker.com.au
 0       lelong  0x28cd3d45      CramFS filesystem, little endian
 >4     lelong  <0              invalid
+>4     lelong  >1073741824     invalid
 >4      lelong  x              size %lu
 >8      lelong  &1             version #2
 >8      lelong  &2             sorted_dirs
@@ -75,6 +76,7 @@
 
 0       belong  0x28cd3d45      CramFS filesystem, big endian
 >4     belong  <0              invalid
+>4     lelong  >536870912000   invalid
 >4      belong  x              size %lu
 >8      belong  &1             version #2
 >8      belong  &2             sorted_dirs
 >28     beshort >3      compression:
 >>20    beshort 1       \bgzip,
 >>20    beshort 2       \blzma,
+>>20   beshort 3       \bgzip (non-standard type definition),
+>>20   beshort 4       \blzma (non-standard type definition),
 >>20    beshort 0       \binvalid,
 >>20    beshort >4      \binvalid,
 >28     beshort <3
 >30    leshort >10     invalid
 >28     leshort x       version %d.
 >30     leshort x       \b%d,
->28    leshort >3      compression: 
+>28    leshort >3      compression:
 >>20   leshort 1       \bgzip,
 >>20   leshort 2       \blzma,
+>>20   leshort 3       \bgzip (non-standard type definition),
+>>20   leshort 4       \blzma (non-standard type definition),
 >>20   leshort 0       \binvalid,
 >>20   leshort >4      \binvalid,
 >28     leshort <3
 >28     beshort >3      compression:
 >>20    beshort 1       \bgzip,
 >>20    beshort 2       \blzma,
+>>20   beshort 3       \bgzip (non-standard type definition),
+>>20   beshort 4       \blzma (non-standard type definition),
 >>20    beshort 0       \binvalid,
 >>20    beshort >4      \binvalid,
 >28     beshort <3
 >28     beshort >3      compression:
 >>20    beshort 1       \bgzip,
 >>20    beshort 2       \blzma,
+>>20   beshort 3       \bgzip (non-standard type definition),
+>>20   beshort 4       \blzma (non-standard type definition),
 >>20    beshort 0       \binvalid,
 >>20    beshort >4      \binvalid,
 >28     beshort <3
 >28     beshort >3      compression:
 >>20    beshort 1       \bgzip,
 >>20    beshort 2       \blzma,
+>>20   beshort 3       \bgzip (non-standard type definition),
+>>20   beshort 4       \blzma (non-standard type definition),
 >>20    beshort 0       \binvalid,
 >>20    beshort >4      \binvalid,
 >28     beshort <3
 >28     leshort >3      compression:
 >>20    leshort 1       \bgzip,
 >>20    leshort 2       \blzma,
+>>20   leshort 3       \bgzip (non-standard type definition),
+>>20   leshort 4       \blzma (non-standard type definition),
 >>20    leshort 0       \binvalid,
 >>20    leshort >4      \binvalid,
 >28     leshort <3
 >28     leshort >3      compression:
 >>20    leshort 1       \bgzip,
 >>20    leshort 2       \blzma,
+>>20   leshort 3       \bgzip (non-standard type definition),
+>>20   leshort 4       \blzma (non-standard type definition),
 >>20    leshort 0       \binvalid,
 >>20    leshort >4      \binvalid,
 >28     leshort <3
 >32    belong  2                                       plain text,
 >36    belong  x                                       %d files
 
-
 # netboot image - Juan Cespedes <cespedes@debian.org>
 0      lelong                  0x1b031336L     Netboot image,
 >4     lelong&0xFFFFFF00       0
 >>4    lelong&0x100            0x000           mode 2
 >>4    lelong&0x100            0x100           mode 3
 >4     lelong&0xFFFFFF00       !0              unknown mode (invalid)
+
+0      string                  WDK\x202.0\x00  WDK file system, version 2.0{offset-adjust:-18}
+
+0      string          CD001                                           ISO{offset-adjust:-32769}
+>6144  string          !NSR0                                           9660 CD-ROM filesystem data,
+>6144  string          NSR0                                            UDF filesystem data,
+>6148  string          1                                               version 1.0,
+>6148  string          2                                               version 2.0,
+>6148  string          3                                               version 3.0
+>6148  byte            >0x33                                           invalid version,
+>6148  byte            <0x31                                           invalid version,
+>38    string          >\0                                             volume name: "%s",
+>2047  string          \000CD001\001EL\x20TORITO\x20SPECIFICATION      bootable
+
index b9ce312..9428d35 100644 (file)
 >48    string          x               root device: "%s"
 
 # trx image file
-0      string          HDR0            TRX firmware header, little endian, header size: 28 bytes, 
+0      string          HDR0            TRX firmware header, little endian, header size: 28 bytes,
+>4     lelong          <1              invalid
 >4     lelong          x               image size: %d bytes,
 >8     lelong          x               CRC32: 0x%X
->12    lelong          x               flags/version: 0x%X
+>12    leshort         x               flags: 0x%X,
+>14    leshort         x               version: %d
 
 0      string          0RDH            TRX firmware header, big endian, header size: 28 bytes,
+>4     belong          <1              invalid
 >4     belong          x               image size: %d bytes,
 >8     belong          x               CRC32: 0x%X
->12    belong          x               flags/version: 0x%X
+>12    beshort         x               flags: 0x%X,
+>14    beshort         x               version: %d
 
 
 # Ubicom firmware image
 >58    leshort         x                               firmware version: 0x%X,
 >60    leshort         x                               starting code segment: 0x%X,
 >62    leshort         x                               code size: 0x%X
+
+# NPK firmware header, used by Mikrotik
+0              belong          0x1EF1D0BA              NPK firmware header,
+>4             lelong          <0                      invalid
+>4             lelong          x                       image size: %d,
+>14            string          x                       image name: "%s",
+>(48.l+58)     string          x                       description: "%s
+>(48.l+121)    string          x                       \b%s"
+
+# Ubiquiti firmware signatures
+0      string          UBNT            Ubiquiti firmware header,
+>0x104 belong          x               ~CRC32: 0x%X,
+>4     string          x               version: "%s"
+
+0      string          GEOS            Ubiquiti firmware header,
+>0x104 belong          x               ~CRC32: 0x%X,
+>4     string          x               version: "%s"
+
+# Too many false positives...
+#0     string          OPEN            Ubiquiti firmware header, third party,
+#>0x104        belong          x               ~CRC32: 0x%X,
+#>4    string          x               version: "%s"
+
+0      string          PARTkernel      Ubiquiti firmware kernel partition
+0      string          PARTcramfs      Ubiquiti firmware CramFS partition
+0      string          PARTrootfs      Ubiquiti firmware rootfs partition
+
+# Found in DIR-100 firmware
+0       string         AIH0N           AIH0N firmware header, header size: 48,
+>12     belong         x               size: %d,
+>8      belong         !0              executable code,
+>>8     belong         x               load address: 0x%X,
+>32     string         x               version: "%s"
+
+0      belong          0x5EA3A417      SEAMA firmware header, big endian,
+>6     beshort         x               meta size: %d,
+>8     belong          x               size: %d
+
+0      lelong          0x5EA3A417      SEAMA firmware header, little endian,
+>6     leshort         x               meta size: %d,
+>8     lelong          x               size: %d
+
+0      belong          0x4D544443      NSP firmware header, big endian,
+>16    belong          x               header size: %d,
+>20    belong          x               image size: %d,
+>4     belong          x               kernel offset: %d,
+>12    belong          x               header version: %d,
+
+0      lelong          0x4D544443      NSP firmware header, little endian,
+>16    lelong          x               header size: %d,
+>20    lelong          x               image size: %d,
+>4     lelong          x               kernel offset: %d,
+>12    lelong          x               header version: %d,
+
+# http://www.openwiz.org/wiki/Firmware_Layout#Beyonwiz_.wrp_header_structure
+0      string          WizFwPkgl       Beyonwiz firmware header,
+>20    string          x               version: "%s"
+
+0      string          BLI223WJ0       Thompson/Alcatel encoded firmware,
+>32    byte            x               version: %d.
+>33    byte            x               \b%d.
+>34    byte            x               \b%d.
+>35    byte            x               \b%d,
+>44    belong          x               size: %d,
+>48    belong          x               crc: 0x%.8X,
+>35    byte            x               try decryption tool from:
+>35    byte            x               http://download.modem-help.co.uk/mfcs-A/Alcatel/Modems/Misc/
+
index d517ee0..d3f55f5 100644 (file)
 # PC bitmaps (OS/2, Windows BMP files)  (Greg Roelofs, newt@uchicago.edu)
 0       string          BM
 >14     leshort         12              PC bitmap, OS/2 1.x format
+>>18   lelong          <1              invalid
+>>18   lelong          >1000000        invalid
 >>18    leshort         x               \b, %d x
+>>20   lelong          <1              invalid
+>>20   lelong          >1000000        invalid
 >>20    leshort         x               %d
 >14     leshort         64              PC bitmap, OS/2 2.x format
+>>18   lelong          <1              invalid
+>>18   lelong          >1000000        invalid
 >>18    leshort         x               \b, %d x
+>>20   lelong          <1              invalid
+>>20   lelong          >1000000        invalid
 >>20    leshort         x               %d
 >14     leshort         40              PC bitmap, Windows 3.x format
+>>18   lelong          <1              invalid
+>>18   lelong          >1000000        invalid
 >>18    lelong          x               \b, %d x
+>>22   lelong          <1              invalid
+>>22   lelong          >1000000        invalid
 >>22    lelong          x               %d x
+>>28   lelong          <1              invalid
+>>28   lelong          >1000000        invalid
 >>28    leshort         x               %d
 >14     leshort         128             PC bitmap, Windows NT/2000 format
+>>18   lelong          >1000000        invalid
+>>18   lelong          <1              invalid
 >>18    lelong          x               \b, %d x
+>>22   lelong          <1              invalid
+>>22   lelong          >1000000        invalid
 >>22    lelong          x               %d x
+>>28   lelong          <1              invalid
+>>28   lelong          >1000000        invalid
 >>28    leshort         x               %d
 
 #------------------------------------------------------------------------------
index 7275885..99bfa4e 100755 (executable)
--- a/setup.py
+++ b/setup.py
@@ -5,39 +5,45 @@ from distutils.core import setup
 
 WIDTH = 115
 
-# Check for pre-requisite modules
-print "checking pre-requisites"
-try:
-       import magic
+# Check for pre-requisite modules only if --no-prereq-checks was not specified
+if "--no-prereq-checks" not in sys.argv:
+       print "checking pre-requisites"
        try:
-               magic.MAGIC_NO_CHECK_TEXT
+               import magic
+               try:
+                       magic.MAGIC_NO_CHECK_TEXT
+               except Exception, e:
+                       print "\n", "*" * WIDTH
+                       print "Pre-requisite failure:", str(e)
+                       print "It looks like you have an old or incompatible magic module installed."
+                       print "Please install the official python-magic module, or download and install it from source: ftp://ftp.astron.com/pub/file/"
+                       print "*" * WIDTH, "\n"
+                       sys.exit(1)
        except Exception, e:
                print "\n", "*" * WIDTH
                print "Pre-requisite failure:", str(e)
-               print "It looks like you have an old or incompatible magic module installed."
-               print "Please install the official python-magic module, or download and install it from source: ftp://ftp.astron.com/pub/file/"
+               print "Please install the python-magic module, or download and install it from source: ftp://ftp.astron.com/pub/file/"
                print "*" * WIDTH, "\n"
                sys.exit(1)
-except Exception, e:
-       print "\n", "*" * WIDTH
-       print "Pre-requisite failure:", str(e)
-       print "Please install the python-magic module, or download and install it from source: ftp://ftp.astron.com/pub/file/"
-       print "*" * WIDTH, "\n"
-       sys.exit(1)
 
-try:
-       import matplotlib.pyplot
-except Exception, e:
-       print "\n", "*" * WIDTH
-       print "Pre-requisite check warning:", str(e)
-       print "To take advantage of this tool's entropy plotting capabilities, please install the python-matplotlib module."
-       print "*" * WIDTH, "\n"
+       try:
+               import matplotlib
+               matplotlib.use('Agg')
+               import matplotlib.pyplot
+               import numpy
+       except Exception, e:
+               print "\n", "*" * WIDTH
+               print "Pre-requisite check warning:", str(e)
+               print "To take advantage of this tool's entropy plotting capabilities, please install the python-matplotlib module."
+               print "*" * WIDTH, "\n"
+       
+               if raw_input('Continue installation without this module (Y/n)? ').lower().startswith('n'):
+                       print 'Quitting...\n'
+                       sys.exit(1)
+else:
+       # This is super hacky.
+       sys.argv.pop(sys.argv.index("--no-prereq-checks"))
        
-       if raw_input('Continue installation without this module (Y/n)? ').lower().startswith('n'):
-               print 'Quitting...\n'
-               sys.exit(1)
-               
-
 # Generate a new magic file from the files in the magic directory
 print "generating binwalk magic file"
 magic_files = listdir("magic")
@@ -54,7 +60,7 @@ install_data_files = ["magic/*", "config/*", "plugins/*"]
 
 # Install the binwalk module, script and support files
 setup( name = "binwalk",
-       version = "1.2",
+       version = "1.2.2-1",
        description = "Firmware analysis tool",
        author = "Craig Heffner",
        url = "http://binwalk.googlecode.com",
diff --git a/support/lzma_gen.py b/support/lzma_gen.py
deleted file mode 100755 (executable)
index 9e4fbd9..0000000
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/usr/bin/env python
-# Generates LZMA signatures for each valid LZMA property in the properties list.
-
-
-properties = [
-       0x01,
-       0x02,
-       0x03,
-       0x04,
-       0x09,
-       0x0A,
-       0x0B,
-       0x0C,
-       0x12,
-       0x13,
-       0x14,
-       0x1B,
-       0x1C,
-       0x24,
-       0x2D,
-       0x2E,
-       0x2F,
-       0x30,
-       0x31,
-       0x36,
-       0x37,
-       0x38,
-       0x39,
-       0x3F,
-       0x40,
-       0x41,
-       0x48,
-       0x49,
-       0x51,
-       0x5A,
-       0x5B,
-       0x5C,
-       0x5D,
-       0x5E,
-       0x63,
-       0x64,
-       0x65,
-       0x66,
-       0x6C,
-       0x6D,
-       0x6E,
-       0x75,
-       0x76,
-       0x7E,
-       0x87,
-       0x88,
-       0x89,
-       0x8A,
-       0x8B,
-       0x90,
-       0x91,
-       0x92,
-       0x93,
-       0x99,
-       0x9A,
-       0x9B,
-       0xA2,
-       0xA3,
-       0xAB,
-       0xB4,
-       0xB5,
-       0xB6,
-       0xB7,
-       0xB8,
-       0xBD,
-       0xBE,
-       0xBF,
-       0xC0,
-       0xC6,
-       0xC7,
-       0xC8,
-       0xCF,
-       0xD0,
-       0xD8,
-]
-
-min_properties = 0x40
-common_properties = [0x5D, 0x6D]
-
-dictionary_sizes = [
-       65536,
-       131072,
-       262144,
-       524288,
-       1048576,
-       2097152,
-       4194304,
-       8388608,
-       16777216,
-       33554432,
-]
-
-for fbyte in properties:
-       if fbyte < min_properties:
-               fexclude = '{filter-exclude}'
-               continue
-       else:
-               fexclude = ''
-
-       sig = '\n# ------------------------------------------------------------------\n'
-       sig += '# Signature for LZMA compressed data with valid properties byte 0x%.2X\n' % fbyte
-       sig += '# ------------------------------------------------------------------\n'
-       sig += '0\t\tstring\t\\x%.2X\\x00\\x00\tLZMA compressed data, properties: 0x%.2X,%s\n' % (fbyte, fbyte, fexclude)
-
-       sig += '\n# These are all the valid dictionary sizes supported by LZMA utils.\n'
-       for i in range(0, len(dictionary_sizes)):
-               if i < 6:
-                       indent = '\t\t'
-               else:
-                       indent = '\t'
-
-               if i == len(dictionary_sizes)-1:
-                       invalid = 'invalid'
-               else:
-                       invalid = ''
-
-               sig += '%s1%slelong\t!%d\t%s\n' % ('>'*(i+1), indent, dictionary_sizes[i], invalid)
-
-       sig += '>1\t\tlelong\tx\t\tdictionary size: %d bytes,\n'
-       
-       sig += '\n# Assume that a valid size will be greater than 32 bytes and less than 1GB (a value of -1 IS valid).\n# This could technically be valid, but is unlikely.\n'
-       sig += '>5\t\tlequad\t!-1\n'
-       sig += '>>5\t\tlequad\t<32\t\tinvalid\n'
-       sig += '>>5\t\tlequad\t>0x40000000\tinvalid\n'
-
-       sig += '\n# These are not 100%. The uncompressed size could be exactly the same as the dicionary size, but it is unlikely.\n'
-       sig += '# Since most false positives are the result of repeating sequences of bytes (such as executable instructions),\n'
-       sig += '# marking matches with the same uncompressed and dictionary sizes as invalid eliminates much of these false positives.\n'
-
-       for dsize in dictionary_sizes:
-               if dsize < 16777216:
-                       indent = '\t\t'
-               else:
-                       indent = '\t'
-
-               sig += '>1\t\tlelong\t%d\n' % dsize
-               sig += '>>5\t\tlequad\t%d%sinvalid\n' % (dsize, indent)
-
-       sig += '>5\t\tlequad\tx\t\tuncompressed size: %lld bytes\n'
-       
-       print sig