Wikia

CHDK Wiki

Srsa 4c/GPL:disassemblev7.pl

Talk0
573pages on
this wiki

< User:Srsa 4c

Script to disassemble ARMv7 Thumb-2 dumps. Original version is GPL:disassemble.pl.

Version 0.2, new options added
See page history for earlier versions.

Some notes:

Objdump will choke on some interworking related code parts, due to the presence of ARM instructions (the script forces thumb disassembly).

For example:

wrong:
loc_fc2514b8:
fc2514b8: 	f004 e51f 			; <UNDEFINED> instruction: 0xf004e51f
fc2514bc: 	8f85      	ldrh	r5, [r0, #60]	; 0x3c
fc2514be: 	010c      	lsls	r4, r1, #4

correct:
loc_fc2514b8:
fc2514b8: 	e51ff004        ldr pc, [pc, #-4]       ; execution continues on 0x10c8f85
fc2514bc: 	010c8f85        .long 0x10c8f85         ; CPU switches to thumb mode due to bit0 of the address is set

arm_farcall_thumb_arm.sh (binutils) a bugreport with some explanation

#!/usr/bin/perl
 
# modified fork of http://chdk.wikia.com/wiki/GPL:disassemble.pl
# only meant to be used on dumps which are only using Thumb(-2) instructions
# it expects lowercase disassembly listing (which is what objdump should output)
# registers r10, r11, r12 are expected to be named sl, fp, ip
# v0.2  new option (h): convert decimal immediate values to hex
#       new option (r): try converting adr and ldr instructions to position independent ldr (may not be 100% secure)
# v0.12 special handling of tbb, tbh jumptables, bugfixes
# v0.11 tbb, tbh jumptables: try to find element count
# v0.1 initial version
 
# original copyright notice and notes below
 
# disassemble alien binary blobs
# look for "ldr .., [pc + #nn]" etc. 
# and add strings and values it refers to
#
# (c) 2008 chr
# GPL V3+
#
# v0.2.1:
# * create labels for branch targets
# v0.2:
# * catch unaligned strings
# * note on strings
# * check for integer overflow
 
# use Data::Dumper;
# $Data::Dumper::Sortkeys = 1;
 
# Added to support execution of disassembler.pl
# when not in the same folder as binary file to
# be disassembled.
use Cwd;
$firmware_basepath = getcwd;
 
# adjust these for your needs (note final slash):
#$path = "";
$path = "$ENV{'HOME'}/bin/arm-none48/bin/";
 
# note on "strings": default is a minimum length of 4 chars.
# So if u are hunting for e.g. "FI2" add -n3
# However, it gives a lot of false positive.
$strdump = $path."arm-none-eabi-strings -t x";
$objdump = $path."arm-none-eabi-objdump";
$objcopy = $path."arm-none-eabi-objcopy";
$strip = $path."arm-none-eabi-strip";
 
# transform immediate constants from objdump's decimal format to hexadecimal?
$opt_immhex = 0;
# output more "ready-to-use" disassembly (transform ldr and adr instructions to position-independent ldr)
$opt_rtu = 0;
 
if (@ARGV < 2) {
    die("Utility to disassemble ARMv7/Thumb-2 binaries\n\nUsage: $0 0x<offset> <dump.bin> [options]\nread script for options\n");
}
else {
    if ($ARGV[2] =~ /.*[hH].*/) {
        $opt_immhex = 1;
    }
    if ($ARGV[2] =~ /.*[rR].*/) {
        $opt_rtu = 1;
    }
}
 
$offset  = $ARGV[0];
$binfile = $ARGV[1];
$firmware_file_path = "$firmware_basepath/$ARGV[1]";
 
# check if we wrap over
die "error stat($firmware_file_path): $!" unless ($flen = (stat($firmware_file_path))[7]);
 
if ( hex($offset) + $flen - 1 > 0xffffffff) {
    die "offset + filesize - 1 > 0xffffffff. We can't wrap around!\n\ngame over"
}
 
#####
print "string dump\n";
my %strings;
open(IN, "$strdump \"$firmware_file_path\" |") or die "cannot start $strdump \"$firmware_file_path\": $!";
open(OUT,">$firmware_file_path.strings") or die "cannot write to $firmware_file_path.strings: $!";
while (<IN>) {
    /^ *([[:xdigit:]]*) (.*)/;
    my $addr     = hex($1) + hex($offset);
    my $addr_str = sprintf("%x", $addr);
    $strings{$addr_str} = $2;
 
    print OUT "$addr_str $2\n"; 
 
    # align string address so unaligned strings appear in disassembly
    $addr_str = sprintf("%x", $addr & ~0x3);
    my $offs = $addr & 0x3;
    $strings{$addr_str} = '.' x $offs . $2;
 
}
close IN;
close OUT;
 
#$strings{'ff810164'} = "TEST test";
#$strings{'ff810420'} = "add test";
#print Dumper(\%strings);
#exit;
 
#####
print "create elf file\n";
 
`$objcopy --change-addresses=$offset -I binary -O elf32-littlearm -B arm \"$firmware_file_path\" \"$firmware_file_path.elf\"`;
`$objcopy --set-section-flags .data=code,load,alloc,content \"$firmware_file_path.elf\"`;
#strip added to remove irritating comments
`$strip $firmware_file_path.elf`;
 
#####
print "label scan\n";
my %labels;
open(IN, "$objdump -Mforce-thumb -d \"$firmware_file_path.elf\" |")
      or die "cannot start $objdump \"$firmware_file_path\": $!";
open(OUT,">$firmware_file_path.labels") or die "cannot write to $firmware_file_path.labels: $!";
 
while (<IN>) {
        if (my ($addr, $dest) = $_ =~ /^ *([[:xdigit:]]+):.+\tb[[:alpha:]\.]*\t0x([[:xdigit:]]+)/) {
                if ($labels{$dest} lt 1) {
                        print OUT "$dest ($addr)\n";
                }
                $labels{$dest} += 1;
                print "\r0x$addr  ";
        }
        elsif (my ($addr, $dest) = $_ =~ /^ *([[:xdigit:]]+):.+\tcbn?z\tr.+0x([[:xdigit:]]+)/) {
                if ($labels{$dest} lt 1) {
                        print OUT "$dest ($addr)\n";
                }
                $labels{$dest} += 1;
                print "\r0x$addr  ";
        }
}
close IN;
close OUT;
 
#####
print "\ndisassemble and string lookup\n";
# fifo for previous lines, 12 elements
@hist = (" "," "," "," "," "," "," "," "," "," "," "," ");
open(IN, "$objdump -Mforce-thumb -d \"$firmware_file_path.elf\" |")
      or die "cannot start $objdump \"$firmware_file_path\": $!";
open(OUT,">$firmware_file_path.dis") or die "cannot write to $firmware_file_path.dis: $!";
open(OUTL,">>$firmware_file_path.labels") or die "cannot write to $firmware_file_path.labels: $!";
open(BIN, "<$firmware_file_path") or die "cannot read $firmware_file_path";
 
binmode BIN;
 
# comment out lines before this address
$cmmnt = 0;
$cmmntstr;
 
while (<IN>) {
    if ($_ eq " ...\n") { print OUT $_; next;}
    my ($addr, $words, $iline, $scrap, $scrap2, $target) = $_ =~ /^ *([[:xdigit:]]*):\t([[:xdigit:] ]*)\t(.*)(\t; )\((.*)0x([[:xdigit:]]*)\)/;
    if ($addr eq "") {
        ($addr, $words, $iline, $scrap, $scrap2, $target) = $_ =~ /^ *([[:xdigit:]]*):\t([[:xdigit:] ]*)\t(.*)(\t; )(.*)0x([[:xdigit:]]*)/; 
    }
    if ($addr eq "") {
        ($addr, $words, $iline) = $_ =~ /^ *([[:xdigit:]]*):\t([[:xdigit:] ]*)\t(.*)/; 
        my $scrap, $scrap2, $target;
    }
    if ($addr eq "") {
        next;
    }
    my $line = "\t".$words."\t".$iline;
 
    my $additions = "";
 
    # 800b698:	4919      	ldr	r1, [pc, #100]	; (0x800b700)
    #    800c:	f8df d054 	ldr.w	sp, [pc, #84]	; 0x8064 
    if (
        ($target ne "") &&
        ($iline =~ /^(ldr(.*?)\t(.+?)\[pc, #([-\d]+).*)/)
    ) {
        $line .= $scrap;
        my $off = hex($target) - hex($offset);
        my $point = $target;
        my $value = &get_word($off);
        if ($opt_rtu eq 1) {
            my $s1 = $2;
            my $svalue = sprintf("%x",hex($value));
            if ($s1 eq ".w") {$s1 = ""}
            $line = "\t".$words."\t"."ldr".$s1."\t".$3."=0x$svalue".$scrap
        }
        else {
            $line .= "0x$point: ($value) ";
        }
        if (my $str = $strings{$point}) {
            # add pointed string
            $line .= qq| *"$str"|;
        }
        elsif (my $str = $strings{$value}) {
            # pointer to pointer ...
            $line .= qq| **"$str"|;
        }
        elsif ( (( hex($point) & 1) == 1) &&
                (( hex($point) & 0xf0000000) == ( hex($addr) & 0xf0000000)) &&
                ( hex($point) < ((hex($addr)+0x2000000) & 0xff000000))
        ) {
            # could be a subroutine address
            $line .= "...sub?";
        }
        elsif ( (( hex($value) & 1) == 1) &&
                (( hex($value) & 0xf0000000) == ( hex($addr) & 0xf0000000)) &&
                ( hex($value) < ((hex($addr)+0x2000000) & 0xff000000))
        ) {
            # could be a subroutine address
            $line .= "...sub?";
        }
    }
    # fc036fd6: 	a1ff      	add	r1, pc, #1020	; (adr r1, 0xfc0373d4)
    # the add -> adr conversion may be incorrect when 'add' is conditional
    elsif (
        ($target ne "") &&
        ($iline =~ /^(add(.*?)\t(.+?)pc, #([-\d]+))/)
    ) {
        $line .= $scrap;
        my $off = hex($target) - hex($offset);
        my $point = $target;
        my $value = &get_word($off);
        if ($opt_rtu eq 1) {
            my $s1 = $2;
            if ($s1 eq "w") {$s1 = ""}
            my $spoint = sprintf("%x",hex($point));
            #$line = "\t".$words."\t"."ldr".$s1."\t".$3."=0x$value".$scrap
            $line = "\t".$words."\t"."ldr".$s1."\t".$3."=0x$spoint".$scrap."(".$iline.")"
        }
        else {
            $line .= "0x$point: ($value) ";
        }
        if (my $str = $strings{$point}) {
            # add pointed string
            $line .= qq| *"$str"|;
        }
        elsif (my $str = $strings{$value}) {
            # pointer to pointer ...
            $line .= qq| **"$str"|;
        }
        elsif ( (( hex($point) & 1) == 1) &&
                (( hex($point) & 0xf0000000) == ( hex($addr) & 0xf0000000)) &&
                ( hex($point) < ((hex($addr)+0x2000000) & 0xff000000))
        ) {
            # could be a subroutine address
            $line .= "...sub?";
        }
        elsif ( (( hex($value) & 1) == 1) &&
                (( hex($value) & 0xf0000000) == ( hex($addr) & 0xf0000000)) &&
                ( hex($value) < ((hex($addr)+0x2000000) & 0xff000000))
        ) {
            # could be a subroutine address
            $line .= "...sub?";
        }
    } 
    # fc020922: 	f2af 112c 	subw	r1, pc, #300	; 0x12c
    # the sub -> adr conversion may be incorrect when 'sub' is conditional
    # in theory, destination calculation is: (pc+4-offset)&0xfffffffc
    elsif ($iline =~ /^(sub(.*?)\t(.+?)pc, #([-\d]+))/)
    {
        my $s2 = $4;
        if ($scrap eq "") {
            $scrap = "\t; ";
        }
        $line .= $scrap;
        my $align1 = hex($addr) & 0x2;
        if ($align1 == 0) {
            $align1 = 2;
        }
        else {
            $align1 = 0;
        }
        # align2: bit0 might be the thumb bit, these references are for subroutines(?)
        my $align2 = ($s2+0) & 0x1;
        my $off = hex($addr) - hex($offset) - $s2 + 2 + $align1 - $align2;
        my $point = sprintf("%08x", hex($addr) - $s2 + 2 + $align1 - $align2);
        my $spoint = sprintf("%x",hex($point));
        my $value = &get_word($off);
        if (($align2 == 1) && ($value >= hex($offset))) {
            if ($opt_rtu eq 1) {
                my $s1 = $2;
                if ($s1 eq "w") {$s1 = ""}
                # $line = "\t".$words."\t"."ldr".$s1."\t".$3."=0x$value".$scrap
                $line = "\t".$words."\t"."ldr".$s1."\t".$3."=0x$spoint".$scrap."(".$iline.")"
            }
            else {
                $line .= "sub_$point: ($value) ";
            }
        }
        else {
            if ($opt_rtu eq 1) {
                my $s1 = $2;
                if ($s1 eq "w") {$s1 = ""}
                # $line = "\t".$words."\t"."ldr".$s1."\t".$3."=0x$value".$scrap
                $line = "\t".$words."\t"."ldr".$s1."\t".$3."=0x$spoint".$scrap."(".$iline.")"
            }
            else {
                $line .= "0x$point: ($value) ";
            }
        }
        if (my $str = $strings{$point}) {
            $line .= qq| *"$str"|;
        }
        elsif (my $str = $strings{$value}) {
            $line .= qq| **"$str"|;
        }
    }
    # fc0d08b6: 	f181 ea34 	blx	0xfc251d20
    # fc0d08c8: 	d035      	beq.n	0xfc0d0936
    # fc0d08f0: 	f7ff fdc2 	bl	0xfc0d0478
    # bkpt is special cased
    # fc055f14: 	beec      	bkpt	0x00ec
    elsif ($line =~ /^(.*\t(b[[:alpha:]\.]*)\t)0x([[:xdigit:]]+)/) {
        if ($2 ne "bkpt") {
            $line = "$1loc_$3"
        }
    }
    # fc02015a: 	b14c      	cbz	r4, 0xfc020170
    elsif ($line =~ /^(.*\tcbn?z\tr.+)0x([[:xdigit:]]+)/) {
            $line = "$1loc_$2"
    }
    # fc06fa58: 	e8df f001 	tbb	[pc, r1]
    # fc06e5a2: 	e8df f010 	tbh	[pc, r0, lsl #1]
    # pc based table branch
    elsif ($line =~ /^(.*\ttb([bh])\t\[pc, ([rlsfi][[:digit:]rlp]).*)/) {
            my $guess = &get_regcmp($3);
            $line = "$1\t; (jumptable: $3, $guess elements)";
            # print a nice jumptable
            my $o = ($2 eq "b") ? 1 : 2;
            if ($o == 2) {
                $cmmnt = $guess*2;
            }
            else {
                $cmmnt = ($guess + 1) & 0x3fe;
            }
            # determine next real instruction address
            $cmmnt += hex($addr) + 4;
            my $n1, $m, $p, $pp, $ppp;
            for ($n1 = 0; $n1 < $guess; $n1++) {
                $m = get_mem(hex($addr) - hex($offset) + 4 + $o*$n1, $o);
                $p = hex($addr) + 4 + hex($m) * 2;
                $pp = sprintf("%x", $p);
                $ppp = sprintf("%x", hex($addr)+$n1*$o+4);
                if ($labels{$pp} lt 1) {
                        # add new label to the labels file
                        print OUTL "$pp ($addr)\n";
                }
                # add label to the list so it can be picked up later
                $labels{$pp} += 1;
                # the jumptable text can't be printed right now, let's buffer it
                $additions .= "$ppp: \t; jump to\tloc_$pp (case ".$n1.")\n";
            }
    }
    # table branch with non-pc base
    elsif ($line =~ /^(.*\ttb([bh])\t\[.+, ([rlsfi][[:digit:]rlp]).*)/) {
        my $guess = &get_regcmp($3);
        $line = "$1\t; (jumptable: $3, $guess elements)"
    }
    # last category, convert decimal immediate values to hex if requested
    elsif (($opt_immhex eq 1) && ($iline =~ /(.+?#)([[:digit:]]+)(.*)/)) {
        if ($2+0 > 9) {
            my $h = sprintf("0x%x", $2+0);
            $line = "\t".$words."\t".$1.$h.$3;
        }
    }
    # insert label
    if ($labels{$addr} gt 1) {
            print OUT "loc_$addr: ; $labels{$addr} refs\n";
    } elsif ($labels{$addr} gt 0) {
            print OUT "loc_$addr:\n";
    }
    # add string comment
    if (my $str = $strings{$addr}) {
        print OUT qq|"$str":\n|;
    }
 
    # line is not marked as commented (logic behind this could be smarter)
    if (hex($addr) >= $cmmnt) {
        $cmmntstr = "";
    }
    # write current disassembly line into the target file
    print OUT "$addr: $line$cmmntstr\n";
    # print added text lines, if any
    if ($additions ne "") {
        print OUT $additions;
    }
    # if current line is commented, add comment text (which is fixed at the moment)
    if (hex($addr) < $cmmnt) {
        $cmmntstr = " \t; (jumptable offsets)";
    }
    print "\r0x$addr  ";
 
    # add line to history
    unshift(@hist, $_);
    # remove last element of history
    pop(@hist);
}
close IN;
close OUT;
close OUTL;
 
#####
print "\njob complete!\n";
 
sub get_word {
    my $off = shift;
    my $ret;
 
    seek(BIN, $off, 0);
    my $c = read(BIN, $ret, 4);# or die "off: $off $! ($ret)";
    return ($c > 0 ? sprintf("%08x", unpack("I", $ret)) : '???');
}
 
sub get_mem {
    my $off = shift;
    my $len = shift;
    my $ret;
 
    my $unpsize;
    if ($len == 1) {
        $unpsize = "C";
    }
    elsif ($len == 2) {
        $unpsize = "S";
    }
    else {
        $unpsize = "I";
    }
 
    seek(BIN, $off, 0);
    my $c = read(BIN, $ret, $len);# or die "off: $off $! ($ret)";
    return ($c > 0 ? sprintf("%0".$len."x", unpack($unpsize, $ret)) : '???');
}
 
sub get_regcmp {
    # try to work out the number of jumptable elements based on the register
    # and previous instructions
    # (method is not 100% reliable)
    local $found1, $found2, $add;
    $add = -1;
    foreach(@hist) {
        # determine condition flag used in the nearest branch instruction
        # only encountered 'cs' or 'hi' so far, depending on compiler
        if ($add < 0) {
            $found1 = $_ =~ /^.*\tb(..).*\t.+/;
            if ($1 eq "cs") {
                $add = 0;
            }
            elsif ($1 eq "hi") {
                $add = 1;
            }
        }
        $found2 = $_ =~ /^.*\tcmp.*\t$_[0], #([[:digit:]]+)/;
        if ($found2) {
            if ($add<0) {
                $add = 0;
            }
            return $1 + $add;
        }
    }
    return 0;
}

Around Wikia's network

Random Wiki