I had below C program:
__int64_t fib_tr_const (__int64_t n, __int64_t n_1, __int64_t n_2)
{
if (n == 0x42 || n == 0x43 ) {
return n_1 + n_2;
}
return 0x31 + fib_tr_const (n-1, n_2, n_1 + n_2);
}
void main (int argc, char ** argv)
{
__int64_t n = atoi (argv[1]);
// printf("fib(%ld) = %ld\n", n, fib(n));
// printf("fib_tr(%ld) = %ld\n", n, fib_tr(n, 0, 1));
__int64_t volatile result = 0x4567;
printf ("11111\n");
printf ("11111\n");
result = fib_tr_const(n, 0x123, 0x234);
printf ("22222\n");
printf ("22222\n");
printf("fib_tr_const(%ld) = %ld\n", n, result);
}
And I built it with -O2
.
gcc tail_call.c -O2 -o tail_call_const_O2
I disassembled the binary:
The main():
00000000000010a0 <main>:
10a0: f3 0f 1e fa endbr64
10a4: 41 54 push %r12
10a6: ba 0a 00 00 00 mov $0xa,%edx
10ab: 48 83 ec 10 sub $0x10,%rsp
10af: 48 8b 7e 08 mov 0x8(%rsi),%rdi
10b3: 31 f6 xor %esi,%esi
10b5: e8 c6 ff ff ff callq 1080 <strtol@plt>
10ba: 48 8d 3d 43 0f 00 00 lea 0xf43(%rip),%rdi # 2004 <_IO_stdin_used+0x4>
10c1: 48 c7 44 24 08 67 45 movq $0x4567,0x8(%rsp)
10c8: 00 00
10ca: 4c 63 e0 movslq %eax,%r12
10cd: e8 9e ff ff ff callq 1070 <puts@plt>
10d2: 48 8d 3d 2b 0f 00 00 lea 0xf2b(%rip),%rdi # 2004 <_IO_stdin_used+0x4>
10d9: e8 92 ff ff ff callq 1070 <puts@plt>
------------------START-------------------
10de: 49 8d 44 24 ff lea -0x1(%r12),%rax
10e3: 48 83 f8 01 cmp $0x1,%rax
10e7: 76 75 jbe 115e <main+0xbe>
10e9: 4c 89 e0 mov %r12,%rax
10ec: 31 c9 xor %ecx,%ecx
10ee: ba 01 00 00 00 mov $0x1,%edx
10f3: 31 ff xor %edi,%edi
10f5: eb 0c jmp 1103 <main+0x63>
10f7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
10fe: 00 00
1100: 48 89 f2 mov %rsi,%rdx
1103: 48 8d 34 17 lea (%rdi,%rdx,1),%rsi
1107: 48 89 c7 mov %rax,%rdi
110a: 48 83 e8 01 sub $0x1,%rax
110e: 48 01 f9 add %rdi,%rcx
1111: 48 89 d7 mov %rdx,%rdi
1114: 48 83 f8 02 cmp $0x2,%rax
1118: 75 e6 jne 1100 <main+0x60>
111a: 48 01 f2 add %rsi,%rdx
111d: 48 01 d1 add %rdx,%rcx
1120: 48 8d 3d e3 0e 00 00 lea 0xee3(%rip),%rdi # 200a <_IO_stdin_used+0xa>
1127: 48 89 4c 24 08 mov %rcx,0x8(%rsp)
------------------END-------------------
112c: e8 3f ff ff ff callq 1070 <puts@plt>
1131: 48 8d 3d d2 0e 00 00 lea 0xed2(%rip),%rdi # 200a <_IO_stdin_used+0xa>
1138: e8 33 ff ff ff callq 1070 <puts@plt>
113d: 48 8b 4c 24 08 mov 0x8(%rsp),%rcx
1142: 48 83 c4 10 add $0x10,%rsp
1146: 31 c0 xor %eax,%eax
1148: 4c 89 e2 mov %r12,%rdx
114b: 48 8d 35 be 0e 00 00 lea 0xebe(%rip),%rsi # 2010 <_IO_stdin_used+0x10>
1152: bf 01 00 00 00 mov $0x1,%edi
1157: 41 5c pop %r12
1159: e9 32 ff ff ff jmpq 1090 <__printf_chk@plt>
115e: ba 01 00 00 00 mov $0x1,%edx
1163: 31 c9 xor %ecx,%ecx
1165: eb b6 jmp 111d <main+0x7d>
1167: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
116e: 00 00
The fib_tr_const():
00000000000012e0 <fib_tr_const>:
12e0: f3 0f 1e fa endbr64
12e4: 48 8d 47 be lea -0x42(%rdi),%rax
12e8: 48 89 f9 mov %rdi,%rcx
12eb: 48 83 f8 01 cmp $0x1,%rax
12ef: 77 0a ja 12fb <fib_tr_const+0x1b>
12f1: eb 35 jmp 1328 <fib_tr_const+0x48>
12f3: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
12f8: 48 89 c2 mov %rax,%rdx
12fb: 48 83 ef 01 sub $0x1,%rdi
12ff: 48 8d 04 16 lea (%rsi,%rdx,1),%rax
1303: 48 89 d6 mov %rdx,%rsi
1306: 48 83 ff 43 cmp $0x43,%rdi
130a: 75 ec jne 12f8 <fib_tr_const+0x18>
130c: 48 8d 34 49 lea (%rcx,%rcx,2),%rsi
1310: 48 01 c2 add %rax,%rdx
1313: 48 c1 e6 04 shl $0x4,%rsi
1317: 48 8d 8c 31 2d f3 ff lea -0xcd3(%rcx,%rsi,1),%rcx
131e: ff
131f: 48 8d 04 0a lea (%rdx,%rcx,1),%rax
1323: c3 retq
1324: 0f 1f 40 00 nopl 0x0(%rax)
1328: 48 89 d0 mov %rdx,%rax
132b: 48 89 f2 mov %rsi,%rdx
132e: 31 c9 xor %ecx,%ecx
1330: 48 01 c2 add %rax,%rdx
1333: 48 8d 04 0a lea (%rdx,%rcx,1),%rax
1337: c3 retq
1338: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
133f: 00
With the landmarks printf ("11111\n");
and printf ("22222\n");
, and thus puts
in assembly. I believe the part between START
and END
should be where the main()
calls fib_tr_const()
.
I don't see any call
or jmp
instructions that branch from main() to the fib_tr_const(). So how is this function called?
I compared the assembly between the marked points with the assembly of the fib_tr_const(). The asm code looks to be similar. It seems the fib_tr_const() function is inlined.
Another question arises - if the function is inlined, why does the code still exist in the final binary? It would save space if it were removed, so why doesn't this happen?
I had below C program:
__int64_t fib_tr_const (__int64_t n, __int64_t n_1, __int64_t n_2)
{
if (n == 0x42 || n == 0x43 ) {
return n_1 + n_2;
}
return 0x31 + fib_tr_const (n-1, n_2, n_1 + n_2);
}
void main (int argc, char ** argv)
{
__int64_t n = atoi (argv[1]);
// printf("fib(%ld) = %ld\n", n, fib(n));
// printf("fib_tr(%ld) = %ld\n", n, fib_tr(n, 0, 1));
__int64_t volatile result = 0x4567;
printf ("11111\n");
printf ("11111\n");
result = fib_tr_const(n, 0x123, 0x234);
printf ("22222\n");
printf ("22222\n");
printf("fib_tr_const(%ld) = %ld\n", n, result);
}
And I built it with -O2
.
gcc tail_call.c -O2 -o tail_call_const_O2
I disassembled the binary:
The main():
00000000000010a0 <main>:
10a0: f3 0f 1e fa endbr64
10a4: 41 54 push %r12
10a6: ba 0a 00 00 00 mov $0xa,%edx
10ab: 48 83 ec 10 sub $0x10,%rsp
10af: 48 8b 7e 08 mov 0x8(%rsi),%rdi
10b3: 31 f6 xor %esi,%esi
10b5: e8 c6 ff ff ff callq 1080 <strtol@plt>
10ba: 48 8d 3d 43 0f 00 00 lea 0xf43(%rip),%rdi # 2004 <_IO_stdin_used+0x4>
10c1: 48 c7 44 24 08 67 45 movq $0x4567,0x8(%rsp)
10c8: 00 00
10ca: 4c 63 e0 movslq %eax,%r12
10cd: e8 9e ff ff ff callq 1070 <puts@plt>
10d2: 48 8d 3d 2b 0f 00 00 lea 0xf2b(%rip),%rdi # 2004 <_IO_stdin_used+0x4>
10d9: e8 92 ff ff ff callq 1070 <puts@plt>
------------------START-------------------
10de: 49 8d 44 24 ff lea -0x1(%r12),%rax
10e3: 48 83 f8 01 cmp $0x1,%rax
10e7: 76 75 jbe 115e <main+0xbe>
10e9: 4c 89 e0 mov %r12,%rax
10ec: 31 c9 xor %ecx,%ecx
10ee: ba 01 00 00 00 mov $0x1,%edx
10f3: 31 ff xor %edi,%edi
10f5: eb 0c jmp 1103 <main+0x63>
10f7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
10fe: 00 00
1100: 48 89 f2 mov %rsi,%rdx
1103: 48 8d 34 17 lea (%rdi,%rdx,1),%rsi
1107: 48 89 c7 mov %rax,%rdi
110a: 48 83 e8 01 sub $0x1,%rax
110e: 48 01 f9 add %rdi,%rcx
1111: 48 89 d7 mov %rdx,%rdi
1114: 48 83 f8 02 cmp $0x2,%rax
1118: 75 e6 jne 1100 <main+0x60>
111a: 48 01 f2 add %rsi,%rdx
111d: 48 01 d1 add %rdx,%rcx
1120: 48 8d 3d e3 0e 00 00 lea 0xee3(%rip),%rdi # 200a <_IO_stdin_used+0xa>
1127: 48 89 4c 24 08 mov %rcx,0x8(%rsp)
------------------END-------------------
112c: e8 3f ff ff ff callq 1070 <puts@plt>
1131: 48 8d 3d d2 0e 00 00 lea 0xed2(%rip),%rdi # 200a <_IO_stdin_used+0xa>
1138: e8 33 ff ff ff callq 1070 <puts@plt>
113d: 48 8b 4c 24 08 mov 0x8(%rsp),%rcx
1142: 48 83 c4 10 add $0x10,%rsp
1146: 31 c0 xor %eax,%eax
1148: 4c 89 e2 mov %r12,%rdx
114b: 48 8d 35 be 0e 00 00 lea 0xebe(%rip),%rsi # 2010 <_IO_stdin_used+0x10>
1152: bf 01 00 00 00 mov $0x1,%edi
1157: 41 5c pop %r12
1159: e9 32 ff ff ff jmpq 1090 <__printf_chk@plt>
115e: ba 01 00 00 00 mov $0x1,%edx
1163: 31 c9 xor %ecx,%ecx
1165: eb b6 jmp 111d <main+0x7d>
1167: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1)
116e: 00 00
The fib_tr_const():
00000000000012e0 <fib_tr_const>:
12e0: f3 0f 1e fa endbr64
12e4: 48 8d 47 be lea -0x42(%rdi),%rax
12e8: 48 89 f9 mov %rdi,%rcx
12eb: 48 83 f8 01 cmp $0x1,%rax
12ef: 77 0a ja 12fb <fib_tr_const+0x1b>
12f1: eb 35 jmp 1328 <fib_tr_const+0x48>
12f3: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
12f8: 48 89 c2 mov %rax,%rdx
12fb: 48 83 ef 01 sub $0x1,%rdi
12ff: 48 8d 04 16 lea (%rsi,%rdx,1),%rax
1303: 48 89 d6 mov %rdx,%rsi
1306: 48 83 ff 43 cmp $0x43,%rdi
130a: 75 ec jne 12f8 <fib_tr_const+0x18>
130c: 48 8d 34 49 lea (%rcx,%rcx,2),%rsi
1310: 48 01 c2 add %rax,%rdx
1313: 48 c1 e6 04 shl $0x4,%rsi
1317: 48 8d 8c 31 2d f3 ff lea -0xcd3(%rcx,%rsi,1),%rcx
131e: ff
131f: 48 8d 04 0a lea (%rdx,%rcx,1),%rax
1323: c3 retq
1324: 0f 1f 40 00 nopl 0x0(%rax)
1328: 48 89 d0 mov %rdx,%rax
132b: 48 89 f2 mov %rsi,%rdx
132e: 31 c9 xor %ecx,%ecx
1330: 48 01 c2 add %rax,%rdx
1333: 48 8d 04 0a lea (%rdx,%rcx,1),%rax
1337: c3 retq
1338: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
133f: 00
With the landmarks printf ("11111\n");
and printf ("22222\n");
, and thus puts
in assembly. I believe the part between START
and END
should be where the main()
calls fib_tr_const()
.
I don't see any call
or jmp
instructions that branch from main() to the fib_tr_const(). So how is this function called?
I compared the assembly between the marked points with the assembly of the fib_tr_const(). The asm code looks to be similar. It seems the fib_tr_const() function is inlined.
Another question arises - if the function is inlined, why does the code still exist in the final binary? It would save space if it were removed, so why doesn't this happen?
Share Improve this question edited 17 hours ago user2138149 17.1k30 gold badges145 silver badges287 bronze badges asked 18 hours ago smwikipediasmwikipedia 64.3k96 gold badges328 silver badges504 bronze badges 3 |2 Answers
Reset to default 12TL;DR: It's inlined here, but the compiler doesn't know that it isn't used elsewhere.
When you compile a program, it could be for a library, in which case it would be bad to remove a function just because it isn't used elsewhere internally. The compiler doesn't know what the full executable contains or uses so it can't skip generating fib_tr_const
. For example:
// foo.c
float square_helper(float x){
return x * x;
}
float hypot2(float x,float y){
return square_helper(x) + square_helper(y);
}
//bar.c
float square_helper(float);
float hypot2(float,float);
int main(void){
printf("%.2f",hypot2(square_helper(3.4f),2.2f);
return 0;
}
Here, the compiler can't remove square_helper
from foo.c
even though it might be inlined because bar.c
depends on it. The compiler works on one TU(.c
file) at a time, so it has no way of knowing whether such a bar.c
exists.
There are three solutions to the problem:
- mark it as
static
.static
functions are local to the TU it's declared in. This means that nothing outside can see the function, so the compiler is free to remove it after it inlines all the usages everywhere it can be seen (which is within that.c
file). - Tell the linker to optimize it out. gcc has
-flto -fuse-linker-plugin
which tells the linker to do Link Time Optimization, including removing functions that are neither exported (if you are compiling a DLL) nor used anywhere in the full program. The linker sees thatfib_tr_const
is not used anywhere after the compiler has inlined its use, so the linker removes it. - Use
-fwhole-program
. This will only work if the program is a single TU(one.c
file), and it tells gcc that this file is the whole program; there's nothing elsewhere that could possibly use the functions here. This allows gcc to remove the inlined functions. Usually this is not the best solution because option #2 is better in most cases and it limits your program to a single file.
It has to do with the concept of translation units.
A translation unit (or TU for short) is the "unit" that the compiler deals with, and can be seen as a single source file with all its included header files.
Since that's all the compiler know about, it can only inline functions from the current TU, not from any other TU. But because of this, the compiler can't know if the functions might be used from another TU, so the function actually needs to exists, so the linker can find it if it's called from another TU.
return 0x31 + fib_tr (n-1, n_2, n_1 + n_2);
bereturn 0x31 + fib_tr_const (n-1, n_2, n_1 + n_2);
? – Ian Abbott Commented 17 hours ago