#!/usr/bin/perl -w
#
# (C) Copyright IBM Corporation 2005.
#	Released under GPL v2.
#	Author : Ram Pai (linuxram@us.ibm.com)
#	         Malahal Naineni (malahal@us.ibm.com)
#
# Contributions: Russ Weight (rweight@us.ibm.com)
# 

use strict;
use Cwd;

my $version=0.9;

sub usage {
	print "Usage: $0 kernelmodule\n";
	print "version=$version\n";
	exit 1;
}

sub get_arch_crc_size {
	my $mod = shift;
	my $archbyte = (grep {s(^\s+Class:.*ELF(\d+)$)($1)} 
			qx(readelf -h $mod | grep 'Class'))[0];
	chomp $archbyte; 
	my $arch = (grep {s(^\s+Data:.*, (\w+)\sendian$)($1)} 
			qx(readelf -h $mod | grep 'Data:'))[0];
	chomp $arch; 
	return ($archbyte/8, $arch);
}


sub print_header {
	my $mod = shift;
	my @obj= grep {s(^\s+(.*:\s+([a-f0-9]{2} ){1,16}).*$)($1)} 
			qx(objdump -j .modinfo -d $mod);
	@obj= split /\s+/, "@obj";
	my $print=0;
	my $count=0;
	print "\tMODULE_NAME=$mod\n";
	for (my $i=0; $i < @obj; $i++) {
		if ($obj[$i] =~ /(\w+):/) {
			if (hex($1) ne $count and $print) {
				print "\n";$print=0;
			}
			$count = hex($1);
			next;
		}
		$count++;
		if ($obj[$i] eq "00") { 
			next unless($print);
			print "\n"; $print=0;
			next;
		}
		print "\t" unless ($print);
		$print=1;
		print chr(hex($obj[$i]));
	}
	print "\n" if ($print);
}

sub print_26_abi {
	my $mod = shift;
	my ($archbyte, $arch) = get_arch_crc_size($mod);
	my $section = (grep {s(\s*\[\s*(\d+)\s*\]\s+__versions\s*.*)($1)} qx (readelf -S $mod))[0];
	if (not defined $section) {
		die("no __versions section in a 2.6 module: $mod");
	}
	chomp $section;
	my $cmd = "readelf -W -x $section $mod";
	open(VERSIONS, "$cmd |") or die("$cmd failed: $!");

	# The __versions section contains all the symbols that are
	# needed by the module. Each entry has a fixed size of 64 bytes.
	# The first DWORD (4 bytes on 32 bit modules and 8 bytes in the
	# case of 64 bit modules) contains the symbol checksum and the
	# remaining bytes contain the actual symbol name itself.
	#
	# readelf itself gives offset and the string representation for
	# each 16 bytes on a line. E.g on a 64 bit big endian system:
	#
	# OFFSET     -------- ACTUAL DATA --------------  readelf string repr.
	# 0x00000000 00000000 a097d82e 73747275 63745f6d  ........struct_m
	# 0x00000010 6f64756c 65000000 00000000 00000000  odule...........
	# 0x00000020 00000000 00000000 00000000 00000000  ................
	# 0x00000030 00000000 00000000 00000000 00000000  ................
	# 
	# The symbol name is "struct_module" and the KABI checksum is
	# a097d82e.
	#
	# The readelf output on a little endian systems seem to be
	# reversed for each line with some versions of readelf program.
	#
	# For example, (tux1's 2.16.x gives reversed output from
	# gentoo's 2.18 version).  The only thing that is really
	# consistent is its own string interpretation output in the last
	# column. Get the symbol from the last column and verify if it
	# matches with the actual dump. If it does, then process it as
	# "normal", otherwise, process it as "reversed" case.
	my $readelf_type = "none";
	while (<VERSIONS>) {
		next unless (/^\s+0x/);
		# Read all the lines that belong to the same symbol.
		# The last column related to checksum may have 'spaces'
		# so, LIMIT the split.
		my @line1 = split(' ', $_, 6); chomp($line1[5]);
		my @line2 = split(' ', <VERSIONS>, 6); chomp($line2[5]);
		my @line3 = split(' ', <VERSIONS>, 6); chomp($line3[5]);
		my @line4 = split(' ', <VERSIONS>, 6); chomp($line4[5]);

		# Use the readelf's last column as that is consistent
		# across versions and architectures for the symbol name.
		my $symbol = join('', $line1[5], $line2[5],
			       	      $line3[5], $line4[5]);
		# checksum is at the beginning and is usually $archbyte
		# long, but if there are some 'SPACES' in the checksum,
		# they would be eaten by our above split. We can
		# actually split just based on a single SPACE as opposed
		# to all white space but we don't want to depend on
		# readelf behavior related to this single space between
		# fields. So, we actually see the length of the string
		# and it should be exactly 16 bytes.
		if (length($line1[5]) gt 16) {
			die("incorrect length for string: $line1[5]");
		}
		my $len = $archbyte - (16 - length($line1[5]));
		$symbol = substr($symbol, $len); # remove checksum part
		$symbol =~ s/\.+$//;	# remove silly dots at the end.
		# Find out if the readelf output is "normal" or
		# "reversed". We do this by comparing readelf's last
		# column symbol with the actual dumped data. If the
		# dumped data interpretation is same as its string
		# representation, we assume it is normal.
		# The first symbol always is 'struct_module', and
		# fortunately is not a palindrome. This should tell us
		# if the readelf output is "normal" or "reversed"
		# unambiguously.
		if ($readelf_type eq "none") {
			# Assume it is a "normal" output and interpret
			# the symbol dump data.
			my $findex = ($archbyte == 4) ? 2 : 3;
			my $str = join('', @line1[$findex..4], @line2[1..4],
					   @line3[1..4], @line4[1..4]);
			$str = pack('H*', $str); # convert to string repr.
			$str =~ s/\x00+$//;	# remove nulls at the end.
			if ($str eq $symbol) {
				$readelf_type = "normal";
			} else {
				# Shall we check if it is really reverse or
				# just assume?
				$readelf_type = "reverse";
			}
		}

		my $checksum;
		if ($readelf_type eq "normal") {
			# The checksum index is very vague on 64 bit
			# modules.  Some versions of readelf behaves
			# differently. All we know is that one 4byte
			# field is all zeros and the other is valid!
			# TODO: fix correctly if there is a better
			# method.
			$checksum = $line1[1];
			if ($archbyte == 8 and $line1[1] eq "00000000") {
				$checksum = $line1[2];
			}
	                # SLES11 has special way of calculating checksum.
        	        # The bytes of checksum obtained from readelf is
                	# left circular shifted four times to obtain the
	                # checksum for SLES11 symbols.
                        $checksum=join('',reverse($checksum =~ /(..)/g)) 
				if (-e "/etc/SuSE-release" and (split /\s+/, 
				 qx(grep -i "SUSE LINUX Enterprise" /etc/SuSE-release))[4] == 11);
		} elsif ($readelf_type eq "reverse") {
			$checksum = $line1[4];
			# TODO: The following reverse is needed on some
			# versions of readelf and not on others! Get the
			# facts about readelf before fixing this. For
			# now, it works without reverse on RHEL5 x86_64!
			#$checksum = join('', reverse($checksum =~ /(..)/g));
		}

		# NOTE: On power, a function symbol's name always starts
		# with a "." (hex 2E), where as a data symbol doesn't.
		# A symbol's name never starts with a "dot" anyway, so
		# we just remove one "dot", if it is there.
		$symbol =~ s/^\.//;
		print("0x$checksum\t\t$symbol\n");
	}
}

sub print_24_abi {
	my $module = shift;
	#print_header($module);
        my @symbols = grep { s(.*\s+UND\s+(.*_R.*)$)($1) } qx(readelf -W -s $module);
	for (@symbols) {
		print "$2\t\t$1\n" if(/^([\w_\.]*)_(R(\w+_)*[a-f0-9]{7,8})$/);
	}
}

sub module_is_26_version {
	my $mod = shift;
	#check if the module is identifiable
	qx(readelf -h $mod 2>&1 > /dev/null);
	if ($?) {
		print "$mod not identified\n";
		exit 1;
	}
	qx(readelf -S $mod | grep '__versions');
	return ($? == 0);
}

usage() if (@ARGV ne 1);
my $mod=$ARGV[0];

if (module_is_26_version($mod)) {
	print_26_abi($mod);
} else {
	print_24_abi($mod);
}

exit 0;
