(intel instruction set: refer to http://www.arl.wustl.edu/~lockwood/class/cs306/books/artofasm/Chapter_6/CH06-1.html)
Editing, compiling, running, and debugging a C program in Linux.
Understanding ASM code: where is local variable, stack change during call/ret instruction, β¦.
Understanding the process image.
CALL, RET instruction#include <stdio.h>
void main(){
int x;
x=30;
printf("x is %d\n", x);
}
$ gcc βm32 -o ex1 ex1.c
$ ./ex1
x is 30
$ objdump -D -M intel ex1 > ex1.txt
$ vi ex1.txt
/main
repeat / until you find β<main>:β
080483c4 <main>:
80483c4: 55 push ebp
80483c5: 89 e5 mov ebp, esp
80483c7: 83 e4 f0 and esp, 0xfffffff0
80483ca: 83 ec 20 sub esp, 0x20
80483cd: c7 44 24 1c 1e 00 00 00 mov DWORD PTR [esp+0x1c], 0x1e
80483d5: b8 b4 84 04 08 mov eax, 0x80484b4
80483da: 8b 54 24 1c mov edx, DOWRD PTR [esp+0x1c]
80483de: 89 54 24 04 mov DWORD PTR [esp+0x4], edx
80483e2: 89 04 24 mov DWORD PTR [esp], eax
80483e5: e8 0a ff ff ff call 80482f4
80483ea: c9 leave
80483eb: c3 ret
......
push x
esp = esp β 4
mem[esp] ο x
pop x
x ο mem[esp]
esp = esp + 4
mov reg1, data
reg1 ο data
and reg, data
reg ο reg AND data
sub reg, data
reg ο reg β data
mov DWORD PTR [addr], data
4 byte in mem[addr] ο data
call x
push return-addr (the address of the instruction after "call x")
jump to x
leave
esp ο ebp
pop ebp
ret
eip ο mem[esp]
esp = esp + 4
$ vi ex1.c
$ gcc βm32 -o ex1 ex1.c
$ ./ex1
x is 30

ex1.txt as above and show the asm code for main.$ objdump -D -M intel ex1 > ex1.txt
$ vi ex1.txt
/main
objdump λͺ
λ Ήμ΄λ₯Ό μ΄μ©ν΄ κΈ°κ³μ΄ νμΌ ex1μ intel formatμΌλ‘ λ³Ό μ μλλ‘ λ³ν ν, βtxtβ νμμΌλ‘ μ μ₯νμλ€.

ebp : μ€ν μμμ§μ μ£Όμ μ μ₯
esp : μ€ν λμ§μ μ£Όμ μ μ₯
push/pop λ°λΌ esp κ°μ 4byte μ© λμλ€κ° μ€μλ€ ν¨DWORD PTR : WORDλ 2byte, DoubleWORDλ 2*2byte
mov DWORD PTR [1000], 0x21 instruction will store 0x21 in the 4 byte at memory address 1000.eax : accumulator register β μ°μ , λ
Όλ¦¬μ°μ° μν + ν¨μ λ°νκ° μ¬κΈ°μ μ μ₯λ¨edx : λ°μ΄ν° λ³΅μ¬ μ, λͺ©μ μ§μ μ£Όμ μ μ₯eip : λ€μμ μ€νν λͺ
λ Ήμ΄λ₯Ό κ°λ¦¬ν€λ ν¬μΈν°, λͺ
λ Ή μ€ν ν μλμΌλ‘ μ
λ°μ΄νΈ λ¨ret instruction.Assume esp = 0xbffff63c and ebp = 0xbffff6b8 in the beginning of main.
μ°μ , <main> asm μ½λλ μλμ κ°λ€.
<main> asm code0804841c <main>:
804841c: 55 push ebp
804841d: 89 e5 mov ebp,esp
804841f: 83 e4 f0 and esp,0xfffffff0
8048422: 83 ec 20 sub esp,0x20
8048425: c7 44 24 1c 1e 00 00 mov DWORD PTR [esp+0x1c],0x1e
804842c: 00
804842d: 8b 44 24 1c mov eax,DWORD PTR [esp+0x1c]
8048431: 89 44 24 04 mov DWORD PTR [esp+0x4],eax
8048435: c7 04 24 e4 84 04 08 mov DWORD PTR [esp],0x80484e4
804843c: e8 af fe ff ff call 80482f0 <printf@plt>
8048441: c9 leave
8048442: c3 ret
8048443: 66 90 xchg ax,ax
8048445: 66 90 xchg ax,ax
8048447: 66 90 xchg ax,ax
8048449: 66 90 xchg ax,ax
804844b: 66 90 xchg ax,ax
804844d: 66 90 xchg ax,ax
804844f: 90 nop
main
mainμ΄ 804841cλΆν° μμνλ―λ‘ eipλ 804841cκ° λλ€.push ebp
espλ₯Ό 4λ§νΌ λ΄λ¦¬κ³ espκ° κ°λ¦¬ν€λ λ©λͺ¨λ¦¬μ ebpλ₯Ό κΈ°λ‘νλ€.eipκ° λ€μμΌλ‘ νλ μ΄λνλ€.mov ebp, esp
esp κ°μ ebpμ μ μ₯(move)νλ€.and esp,0xfffffff0
espκ°κ³Ό 0xfffffff0 κ°μ λΉνΈ λ³λ‘ and μ°μ°νκ³ espμ μ§μ΄λ£λλ€esp λ 2μ§μλ‘ 10111111111111111111011000111000,0xfffffff0λ 2μ§μλ‘ 11111111111111111111111111110000 μ΄λ―λ‘and μ°μ°μ νλ©΄ 10111111111111111111011000110000 == 0xbffff630μ΄λ€.sub esp,0x20
-espμμ 0x20λ§νΌ λΊ κ°μ espμ μ μ₯νλ€.
esp κ°μΈ 0xbffff630μμ 0x20λ§νΌ λΊ κ°μ 0xbffff610μ΄λ€.mov DWORD PTR [esp+0x1c], 0x1e
esp κ°μ 0xbffff610μ΄λ€.esp κ°μ 0x1cλ₯Ό λν κ°μ 0xbffff62cμ΄λ€.mov eax, DWORD PTR [esp+0x1c]
DWORD PTR [esp+0x1c] κ°μ eaxμ μ μ₯νλ€.esp κ°μ 0xbffff610espμ 0x1cλ₯Ό λνλ©΄ 0xbffff62cDWORD PTRμ΄λ―λ‘ 4byteeaxλ 0x1eμ΄λ€.mov DWORD PTR [esp+0x4], eax
esp + 0x4 = 0xbffff614DWORD PTRμ΄λ―λ‘ 4byte0xbffff614μ 0x1eλ₯Ό λ£λλ€.mov DWORD PTR [esp], 0x80484e4
call 80482f0 <printf@plt>
0x80482f0μ μλ μ½λ μ€ν μ ,call λͺ
λ Ήμ΄μ λ€μ λͺ
λ Ήμ΄μ μμΉμΈ 0x8048441λ₯Ό push ν ν0x80482f0λ‘ jump νλ€.push κ³Όμ μμ espλ 4λ§νΌ κ°μλκ³ ,espκ° κ°λ¦¬ν€λ λ©λͺ¨λ¦¬μ 0x8048441κ° κΈ°λ‘λλ©°,jump to 0x80482f0λ‘ μΈν΄ eipλ 0x80482f0λ‘ μ
λ°μ΄νΈλλ€.0x80482f0μ μλ <printf@plt>μ λͺ
λ Ήμ΄λ₯Ό μ€ννκΈ° μμνλ€.<printf@plt> asm μ½λλ μλμ κ°λ€.080482f0 <printf@plt>:
80482f0: ff 25 0c a0 04 08 jmp DWORD PTR ds:0x804a00c
80482f6: 68 00 00 00 00 push 0x0
80482fb: e9 e0 ff ff ff jmp 80482e0 <_init+0x2c>

callμ΄ μ’
λ£λλ©΄ eipλ espμ μ μ₯ν΄λ 0x8048441λ‘ μ΄λνλ€.leave
move esp, ebpλ₯Ό μννμ¬ espμ ebpμ bffff638μ΄ μ μ₯λλ€. (ebp -> esp = ebp)pop ebpλ₯Ό μννμ¬ ebpμ espκ° κ°λ¦¬ν€λ λ©λͺ¨λ¦¬μ κΈ°λ‘λ dataλ₯Ό ebpμ κΈ°λ‘ν νespλ₯Ό 4byteλ§νΌ μ¦κ°μμΌ espλ bffff63cμ΄ λλ€.ret
eipλ₯Ό νμ¬ espμΈ bffff63cκ° κ°λ¦¬ν€λ return addressλ‘ μ΄λμν€κ³ espμ κ°μ 4λ₯Ό λνλ€.espκ° κ°λ¦¬ν€λ μμΉμ μλ κ°μ μ μ μκΈ°μ μ νν eip κ°μ μ μ μλ€.)x=30; and printf("x is %d\n",x); in the ASM code.x=30;x=30;μμ 10μ§μ 30μ 16μ§μλ‘ λ³ννλ©΄ 0x1eμ΄λ€.
<main> asm μ½λμμ 1eλ₯Ό μ°Ύμ보면 μλμ κ°λ€.
0804841c <main>:
...... ...... ... ......
8048425: c7 44 24 1c 1e 00 00 mov DWORD PTR [esp+0x1c],
...... ...... ... ......
0x08048425μμ mov DWORD PTR [esp+0x1c] μ΄λ―λ‘ esp+0x1cμ 30μ μ μ₯ νλ€. μ¦, c7 44 24 1c 1e 00 00κ° x=30;μ λνλΈλ€.
printf("x is %d\n",x);0804841c <main>:
...... ...... ... ......
804842d: 8b 44 24 1c mov eax,DWORD PTR [esp+0x1c]
8048431: 89 44 24 04 mov DWORD PTR [esp+0x4],eax
8048435: c7 04 24 e4 84 04 08 mov DWORD PTR [esp],0x80484e4
...... ...... ... ......
80484e4λ₯Ό νμΈνλ©΄ μλμ κ°λ€.

78 20 69 73 20 25 64 0a 00μ ν΄μνλ©΄ λ€μκ³Ό κ°λ€.x [space] is space %d LFμ΄λ€.espμ μ£Όμ 곡κ°μ λ¬Έμ₯μ μ μ₯νλ€.0x08048431μμλ xλ₯Ό μ μ₯,0x08048435μμ x [space] is space %d LFλ₯Ό μ μ₯,0x0804843cμμ printλ₯Ό νΈμΆ νλ κ²μ μ μ μλ€.x?0804841c <main>:
...... ...... ... ......
8048425: c7 44 24 1c 1e 00 00 mov DWORD PTR [esp+0x1c],
...... ...... ... ......
μ Exercise (4)μ λμΌνκ² esp + 0x1cμΈ κ²μ μ μ μλ€.
"x is %d\n" is stored. Confirm the ascii codes for "x is %d\n" at that address.
78 20 69 73 20 25 64 0a 00μ ν΄μνλ©΄ λ€μκ³Ό κ°λ€.x [space] is space %d LFμ΄λ―λ‘ 0x080484e4, 0x080484e6μ μ μ₯ λλ κ²μ μ μ μλ€.main() begins.<main> asm codeλ₯Ό νμΈν΄λ³΄λ©΄ 0x0804841cκ° λ©λͺ¨λ¦¬ μ£ΌμμΈ κ²μ νμΈν μ μλ€.
-m32 (for 32 bit environment) and -g (for gdb) option$ gcc -m32 -g -o ex1 ex1.c
.gdbinit to configure gdb$ cp ../../linuxer1/.gdbinit .
gdb$ gdb ex1
....................
gdb$ set disassembly-flavor intel # to see asm output in intel syntax
gdb$ disassemble main # disassemble main() and show asm code for main
Dump of assembler code for function main:
0x804841c <+0>: push ebp # first instruction of main
....................
End of assembler dump.
gdb$ display $esp # display the value of esp after each ni
gdb$ display $ebp
gdb$ display $eax
gdb$ b *0x804841c # set break point at addr=0x804841c (first instr addr of main)
....................
gdb$ r # start running the program
[0x002B:0xFFFFD5EC]------------------------------------------------------[stack]
0xFFFFD63C : 20 83 04 08 00 00 00 00 - F0 5D D0 44 79 D7 D2 44 ........].Dy..D
0xFFFFD62C : 00 00 00 00 00 00 00 00 - 00 00 00 00 01 00 00 00 ................
0xFFFFD61C : 00 00 00 00 00 00 00 00 - 5D 83 CC CE 2B 26 D7 94 ........]...+&..
0xFFFFD60C : 02 00 00 00 02 00 00 00 - 00 60 EC 44 00 00 00 00 .........`.D....
0xFFFFD5FC : B0 C6 FF F7 01 00 00 00 - 01 00 00 00 00 00 00 00 ................
0xFFFFD5EC : 65 D8 D2 44 01 00 00 00 - 84 D6 FF FF 8C D6 FF FF e..D............
--------------------------------------------------------------------------[code]
=> 0x804841c <main>: push ebp
0x804841d <main+1>: mov ebp,esp
0x804841f <main+3>: and esp,0xfffffff0
0x8048422 <main+6>: sub esp,0x20
--------------------------------------------------------------------------------
Breakpoint 1, main () at ex1.c:2
2 void main(){
3: $eax = 0x1
2: $ebp = (void *) 0x0
1: $esp = (void *) 0xffffd5ec
gdb$ ni # execute next instruction ("push ebp")
gdb$ ni # execute next instruction ("mov ebp, esp")
gdb$ ni # execute next instruction ("and esp, 0xfffffff0")
....................
gdb$ ni # execute "sub esp, 0x20"
....................
gdb$ ni # execute "mov dword ptr [esp+0x1c], 0x1e
gdb$ ni # execute "mov eax, DWORD PTR [esp+0x1c]
....................
gdb$ ni # execute "DWORD PTR [esp+0x4], eax
....................
gdb$ ni # execute DWORD PTR [esp], 0x80484e4
....................
gdb$ si # execute "call printf" with si to enter the function
main(). You should indicate the changed part in your picture (the captured output screen from gdb) for all instructions one by one. For βcallβ instruction use si command to enter the function and show the changes in the stack and register.
μ»΄νμΌ μ, -g μ΅μ
μ μΆκ°νμ¬ debugκ° κ°λ₯νλλ‘ νμλ€.
.gdbinit μ λ³΅μ¬ ν gdbλ₯Ό μ€ννμλ€.

set disassembly-flavor intelλ‘ ASMμ΄ intel formatμΌλ‘ μΆλ ₯μ΄ λλλ‘ νλ©°, disassemble main λͺ
λ Ήμ΄λ‘ mainμ ASM μ½λλ₯Ό νμΈνμλ€.

break pointλ₯Ό main ν¨μμ λ©λͺ¨λ¦¬ μ£ΌμμΈ 0x0804841cλ‘ νμλ€.
display $esp, ebp, eax, eipλ‘ λͺ
λ Ήμ΄λ₯Ό μ€ν ν λλ§λ€ κ°μ΄ μΆλ ₯ λλλ‘ νμλ€.

rλ‘ λλ²κΉ
μ μμ νμΌλ©°, 0x804841cμμ breakpointλ₯Ό λ§λ λ©μ·μΌλ©°, stackκ³Ό ASM codeλ₯Ό νμΈ ν μ μμλ€. κ·Έλ¦¬κ³ μ€μ ν eip, eax, ebp, esp κ°μ νμΈ ν μ μμλ€.
niλ‘ λ€μ μ€μ μ€ν νμλ€.

push ebpλ 1) esp = esp-4, 2) ebp = esp
μ κ³Όμ μ κ±°μ³ espκ° 4λ§νΌ κ°μ κ°μνλ©°, eipκ° μ΄λ ν κ²μ μ μ μλ€.
κ·Έλ¦¬κ³ stack 0xffffd4f8μ 00 00 00 00μ΄ λ κ²μ μ μ μλ€.

and esp,0xfffffff0 esp κ° esp and 0xfffffff0μ κ° (0xffffd4f8)μ κ°μ§κ² λλ€.

sub esp,0x20 λͺ
λ Ήμ΄λ‘
espκ° esp-0x20 κ°μ κ°μ§κ² λλ€. κ·Έλ¬λ―λ‘ espλ 0xffffd4f0μ΄ λλ€.

mov DWORD PTR [esp+0x1c],0x1eμ΄λ―λ‘ 4byteμ 0x1eλ₯Ό μ μ₯νλ€.
[esp+0x1c]μ 0x1eκ° μ μ₯ λλ κ²μ μ μ μλ€.

mov eax,DWORD PTR [esp+0x1c]μ΄λ―λ‘ [esp+0x1c] λΆλΆμ 0x1e κ°μ΄ μ±μμ ΈμμκΈ° λλ¬Έμ, eax κ°μ΄ 30(0x1e)μ΄ λλ€.

mov DWORD PTR [esp+0x4],eaxμ΄λ―λ‘ [esp+0x4]μ eaxλ₯Ό μ μ₯νλ€.

mov DWORD PTR [esp],0x80484e4μ΄λ―λ‘ esp μμΉμ 0x80484e4λ₯Ό μ μ₯νλ€.

call 0x80482f0 <printf@plt>μ΄λ―λ‘ μ°μ espκ° 4 κ°μ νλ©°, return address 0x08048441λ₯Ό μ μ₯
νλ€.
jmp DWORD PTR ds:0x804a00c
push 0x0μ΄λ―λ‘ esp-4(0x0)μ΄ μ μ₯ λλ κ²μ μ μ μλ€.
jmp 0x80482e0μ΄λ―λ‘ eipκ° jumpνλ€.
push DWORD PTR ds:0x804a004μ΄λ―λ‘ espλ 4κ° λ΄λ €κ°λ©° stackμ μ μ₯λλ€.
jmp DWORD PTR ds:0x804a008μ΄λ―λ‘ eipκ° jumpνλ€.
gdb commandsgdb$ b *addr # break at addr
gdb$ b funcname # break at function "funcname"
gdb$ r # rerun
gdb$ bt # backtrack stack frames
gdb$ p expr # print the value of expr ex) p $sp or p/x $eax (in hexa)
gdb$ nexti # run next instruction (do not go into a function). same as ni.
gdb$ stepi # run next instruction (go inside a function). same as si.
gdb$ info f # show the stack frame of the current function
gdb$ display $eip # show the value of eip after every gdb command
gdb$ display $esp # show the value of esp after every gdb command
gdb$ info registers # show the value of all registers
gdb$ info registers eip # show the value of eip
gdb$ info line # memory address of the current function
gdb$ info line main # memory address of function main
gdb$ x/8xb addr # show 8 bytes in hexa starting from addr
gdb$ x/20xh addr # show 20 half words (2 bytes) in hexa starting from addr
gdb$ x/13xw addr # show 13 words (4 bytes) in hexa starting from addr
rsp, rip, rbp, β¦) instead of esp, eip, ebp, β¦rdi, rsi, rdx, rcx, r8, r9 (in that order left to right), while xmm0, xmm1, xmm2, .., xmm7 are used for floating point arguments.