最新消息:雨落星辰是一个专注网站SEO优化、网站SEO诊断、搜索引擎研究、网络营销推广、网站策划运营及站长类的自媒体原创博客

c - How is the function get called with -O2? - Stack Overflow

programmeradmin4浏览0评论

I had below C program:

__int64_t fib_tr_const (__int64_t n, __int64_t n_1, __int64_t n_2)
{
    if (n == 0x42 || n == 0x43 ) {
        return n_1 + n_2;
    }

    return 0x31 + fib_tr_const (n-1, n_2, n_1 + n_2);
}


void main (int argc, char ** argv)
{
    __int64_t n = atoi (argv[1]);
    // printf("fib(%ld) = %ld\n", n, fib(n));
    // printf("fib_tr(%ld) = %ld\n", n, fib_tr(n, 0, 1));
    __int64_t volatile result = 0x4567;
    printf ("11111\n");
    printf ("11111\n");
    result = fib_tr_const(n, 0x123, 0x234);
    printf ("22222\n");
    printf ("22222\n");
    printf("fib_tr_const(%ld) = %ld\n", n, result);
}

And I built it with -O2.

gcc tail_call.c -O2 -o tail_call_const_O2

I disassembled the binary:

The main():

00000000000010a0 <main>:
    10a0:   f3 0f 1e fa             endbr64 
    10a4:   41 54                   push   %r12
    10a6:   ba 0a 00 00 00          mov    $0xa,%edx
    10ab:   48 83 ec 10             sub    $0x10,%rsp
    10af:   48 8b 7e 08             mov    0x8(%rsi),%rdi
    10b3:   31 f6                   xor    %esi,%esi
    10b5:   e8 c6 ff ff ff          callq  1080 <strtol@plt>
    10ba:   48 8d 3d 43 0f 00 00    lea    0xf43(%rip),%rdi        # 2004 <_IO_stdin_used+0x4>
    10c1:   48 c7 44 24 08 67 45    movq   $0x4567,0x8(%rsp)
    10c8:   00 00 
    10ca:   4c 63 e0                movslq %eax,%r12
    10cd:   e8 9e ff ff ff          callq  1070 <puts@plt>
    10d2:   48 8d 3d 2b 0f 00 00    lea    0xf2b(%rip),%rdi        # 2004 <_IO_stdin_used+0x4>
    10d9:   e8 92 ff ff ff          callq  1070 <puts@plt>


------------------START-------------------

    10de:   49 8d 44 24 ff          lea    -0x1(%r12),%rax
    10e3:   48 83 f8 01             cmp    $0x1,%rax
    10e7:   76 75                   jbe    115e <main+0xbe>
    10e9:   4c 89 e0                mov    %r12,%rax
    10ec:   31 c9                   xor    %ecx,%ecx
    10ee:   ba 01 00 00 00          mov    $0x1,%edx
    10f3:   31 ff                   xor    %edi,%edi
    10f5:   eb 0c                   jmp    1103 <main+0x63>
    10f7:   66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
    10fe:   00 00 
    1100:   48 89 f2                mov    %rsi,%rdx
    1103:   48 8d 34 17             lea    (%rdi,%rdx,1),%rsi
    1107:   48 89 c7                mov    %rax,%rdi
    110a:   48 83 e8 01             sub    $0x1,%rax
    110e:   48 01 f9                add    %rdi,%rcx
    1111:   48 89 d7                mov    %rdx,%rdi
    1114:   48 83 f8 02             cmp    $0x2,%rax
    1118:   75 e6                   jne    1100 <main+0x60>
    111a:   48 01 f2                add    %rsi,%rdx
    111d:   48 01 d1                add    %rdx,%rcx
    1120:   48 8d 3d e3 0e 00 00    lea    0xee3(%rip),%rdi        # 200a <_IO_stdin_used+0xa>
    1127:   48 89 4c 24 08          mov    %rcx,0x8(%rsp)

------------------END-------------------


    112c:   e8 3f ff ff ff          callq  1070 <puts@plt>
    1131:   48 8d 3d d2 0e 00 00    lea    0xed2(%rip),%rdi        # 200a <_IO_stdin_used+0xa>
    1138:   e8 33 ff ff ff          callq  1070 <puts@plt>
    113d:   48 8b 4c 24 08          mov    0x8(%rsp),%rcx
    1142:   48 83 c4 10             add    $0x10,%rsp
    1146:   31 c0                   xor    %eax,%eax
    1148:   4c 89 e2                mov    %r12,%rdx
    114b:   48 8d 35 be 0e 00 00    lea    0xebe(%rip),%rsi        # 2010 <_IO_stdin_used+0x10>
    1152:   bf 01 00 00 00          mov    $0x1,%edi
    1157:   41 5c                   pop    %r12
    1159:   e9 32 ff ff ff          jmpq   1090 <__printf_chk@plt>
    115e:   ba 01 00 00 00          mov    $0x1,%edx
    1163:   31 c9                   xor    %ecx,%ecx
    1165:   eb b6                   jmp    111d <main+0x7d>
    1167:   66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
    116e:   00 00 

The fib_tr_const():

00000000000012e0 <fib_tr_const>:
    12e0:   f3 0f 1e fa             endbr64 
    12e4:   48 8d 47 be             lea    -0x42(%rdi),%rax
    12e8:   48 89 f9                mov    %rdi,%rcx
    12eb:   48 83 f8 01             cmp    $0x1,%rax
    12ef:   77 0a                   ja     12fb <fib_tr_const+0x1b>
    12f1:   eb 35                   jmp    1328 <fib_tr_const+0x48>
    12f3:   0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
    12f8:   48 89 c2                mov    %rax,%rdx
    12fb:   48 83 ef 01             sub    $0x1,%rdi
    12ff:   48 8d 04 16             lea    (%rsi,%rdx,1),%rax
    1303:   48 89 d6                mov    %rdx,%rsi
    1306:   48 83 ff 43             cmp    $0x43,%rdi
    130a:   75 ec                   jne    12f8 <fib_tr_const+0x18>
    130c:   48 8d 34 49             lea    (%rcx,%rcx,2),%rsi
    1310:   48 01 c2                add    %rax,%rdx
    1313:   48 c1 e6 04             shl    $0x4,%rsi
    1317:   48 8d 8c 31 2d f3 ff    lea    -0xcd3(%rcx,%rsi,1),%rcx
    131e:   ff 
    131f:   48 8d 04 0a             lea    (%rdx,%rcx,1),%rax
    1323:   c3                      retq   
    1324:   0f 1f 40 00             nopl   0x0(%rax)
    1328:   48 89 d0                mov    %rdx,%rax
    132b:   48 89 f2                mov    %rsi,%rdx
    132e:   31 c9                   xor    %ecx,%ecx
    1330:   48 01 c2                add    %rax,%rdx
    1333:   48 8d 04 0a             lea    (%rdx,%rcx,1),%rax
    1337:   c3                      retq   
    1338:   0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
    133f:   00 

With the landmarks printf ("11111\n"); and printf ("22222\n");, and thus puts in assembly. I believe the part between START and END should be where the main() calls fib_tr_const().

I don't see any call or jmp instructions that branch from main() to the fib_tr_const(). So how is this function called?

I compared the assembly between the marked points with the assembly of the fib_tr_const(). The asm code looks to be similar. It seems the fib_tr_const() function is inlined.

Another question arises - if the function is inlined, why does the code still exist in the final binary? It would save space if it were removed, so why doesn't this happen?

I had below C program:

__int64_t fib_tr_const (__int64_t n, __int64_t n_1, __int64_t n_2)
{
    if (n == 0x42 || n == 0x43 ) {
        return n_1 + n_2;
    }

    return 0x31 + fib_tr_const (n-1, n_2, n_1 + n_2);
}


void main (int argc, char ** argv)
{
    __int64_t n = atoi (argv[1]);
    // printf("fib(%ld) = %ld\n", n, fib(n));
    // printf("fib_tr(%ld) = %ld\n", n, fib_tr(n, 0, 1));
    __int64_t volatile result = 0x4567;
    printf ("11111\n");
    printf ("11111\n");
    result = fib_tr_const(n, 0x123, 0x234);
    printf ("22222\n");
    printf ("22222\n");
    printf("fib_tr_const(%ld) = %ld\n", n, result);
}

And I built it with -O2.

gcc tail_call.c -O2 -o tail_call_const_O2

I disassembled the binary:

The main():

00000000000010a0 <main>:
    10a0:   f3 0f 1e fa             endbr64 
    10a4:   41 54                   push   %r12
    10a6:   ba 0a 00 00 00          mov    $0xa,%edx
    10ab:   48 83 ec 10             sub    $0x10,%rsp
    10af:   48 8b 7e 08             mov    0x8(%rsi),%rdi
    10b3:   31 f6                   xor    %esi,%esi
    10b5:   e8 c6 ff ff ff          callq  1080 <strtol@plt>
    10ba:   48 8d 3d 43 0f 00 00    lea    0xf43(%rip),%rdi        # 2004 <_IO_stdin_used+0x4>
    10c1:   48 c7 44 24 08 67 45    movq   $0x4567,0x8(%rsp)
    10c8:   00 00 
    10ca:   4c 63 e0                movslq %eax,%r12
    10cd:   e8 9e ff ff ff          callq  1070 <puts@plt>
    10d2:   48 8d 3d 2b 0f 00 00    lea    0xf2b(%rip),%rdi        # 2004 <_IO_stdin_used+0x4>
    10d9:   e8 92 ff ff ff          callq  1070 <puts@plt>


------------------START-------------------

    10de:   49 8d 44 24 ff          lea    -0x1(%r12),%rax
    10e3:   48 83 f8 01             cmp    $0x1,%rax
    10e7:   76 75                   jbe    115e <main+0xbe>
    10e9:   4c 89 e0                mov    %r12,%rax
    10ec:   31 c9                   xor    %ecx,%ecx
    10ee:   ba 01 00 00 00          mov    $0x1,%edx
    10f3:   31 ff                   xor    %edi,%edi
    10f5:   eb 0c                   jmp    1103 <main+0x63>
    10f7:   66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
    10fe:   00 00 
    1100:   48 89 f2                mov    %rsi,%rdx
    1103:   48 8d 34 17             lea    (%rdi,%rdx,1),%rsi
    1107:   48 89 c7                mov    %rax,%rdi
    110a:   48 83 e8 01             sub    $0x1,%rax
    110e:   48 01 f9                add    %rdi,%rcx
    1111:   48 89 d7                mov    %rdx,%rdi
    1114:   48 83 f8 02             cmp    $0x2,%rax
    1118:   75 e6                   jne    1100 <main+0x60>
    111a:   48 01 f2                add    %rsi,%rdx
    111d:   48 01 d1                add    %rdx,%rcx
    1120:   48 8d 3d e3 0e 00 00    lea    0xee3(%rip),%rdi        # 200a <_IO_stdin_used+0xa>
    1127:   48 89 4c 24 08          mov    %rcx,0x8(%rsp)

------------------END-------------------


    112c:   e8 3f ff ff ff          callq  1070 <puts@plt>
    1131:   48 8d 3d d2 0e 00 00    lea    0xed2(%rip),%rdi        # 200a <_IO_stdin_used+0xa>
    1138:   e8 33 ff ff ff          callq  1070 <puts@plt>
    113d:   48 8b 4c 24 08          mov    0x8(%rsp),%rcx
    1142:   48 83 c4 10             add    $0x10,%rsp
    1146:   31 c0                   xor    %eax,%eax
    1148:   4c 89 e2                mov    %r12,%rdx
    114b:   48 8d 35 be 0e 00 00    lea    0xebe(%rip),%rsi        # 2010 <_IO_stdin_used+0x10>
    1152:   bf 01 00 00 00          mov    $0x1,%edi
    1157:   41 5c                   pop    %r12
    1159:   e9 32 ff ff ff          jmpq   1090 <__printf_chk@plt>
    115e:   ba 01 00 00 00          mov    $0x1,%edx
    1163:   31 c9                   xor    %ecx,%ecx
    1165:   eb b6                   jmp    111d <main+0x7d>
    1167:   66 0f 1f 84 00 00 00    nopw   0x0(%rax,%rax,1)
    116e:   00 00 

The fib_tr_const():

00000000000012e0 <fib_tr_const>:
    12e0:   f3 0f 1e fa             endbr64 
    12e4:   48 8d 47 be             lea    -0x42(%rdi),%rax
    12e8:   48 89 f9                mov    %rdi,%rcx
    12eb:   48 83 f8 01             cmp    $0x1,%rax
    12ef:   77 0a                   ja     12fb <fib_tr_const+0x1b>
    12f1:   eb 35                   jmp    1328 <fib_tr_const+0x48>
    12f3:   0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
    12f8:   48 89 c2                mov    %rax,%rdx
    12fb:   48 83 ef 01             sub    $0x1,%rdi
    12ff:   48 8d 04 16             lea    (%rsi,%rdx,1),%rax
    1303:   48 89 d6                mov    %rdx,%rsi
    1306:   48 83 ff 43             cmp    $0x43,%rdi
    130a:   75 ec                   jne    12f8 <fib_tr_const+0x18>
    130c:   48 8d 34 49             lea    (%rcx,%rcx,2),%rsi
    1310:   48 01 c2                add    %rax,%rdx
    1313:   48 c1 e6 04             shl    $0x4,%rsi
    1317:   48 8d 8c 31 2d f3 ff    lea    -0xcd3(%rcx,%rsi,1),%rcx
    131e:   ff 
    131f:   48 8d 04 0a             lea    (%rdx,%rcx,1),%rax
    1323:   c3                      retq   
    1324:   0f 1f 40 00             nopl   0x0(%rax)
    1328:   48 89 d0                mov    %rdx,%rax
    132b:   48 89 f2                mov    %rsi,%rdx
    132e:   31 c9                   xor    %ecx,%ecx
    1330:   48 01 c2                add    %rax,%rdx
    1333:   48 8d 04 0a             lea    (%rdx,%rcx,1),%rax
    1337:   c3                      retq   
    1338:   0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
    133f:   00 

With the landmarks printf ("11111\n"); and printf ("22222\n");, and thus puts in assembly. I believe the part between START and END should be where the main() calls fib_tr_const().

I don't see any call or jmp instructions that branch from main() to the fib_tr_const(). So how is this function called?

I compared the assembly between the marked points with the assembly of the fib_tr_const(). The asm code looks to be similar. It seems the fib_tr_const() function is inlined.

Another question arises - if the function is inlined, why does the code still exist in the final binary? It would save space if it were removed, so why doesn't this happen?

Share Improve this question edited 17 hours ago user2138149 17.1k30 gold badges145 silver badges287 bronze badges asked 18 hours ago smwikipediasmwikipedia 64.3k96 gold badges328 silver badges504 bronze badges 3
  • Should return 0x31 + fib_tr (n-1, n_2, n_1 + n_2); be return 0x31 + fib_tr_const (n-1, n_2, n_1 + n_2);? – Ian Abbott Commented 17 hours ago
  • What input do you run this with? – Lundin Commented 14 hours ago
  • I managed to get a funny warning from clang: "warning: iteration 9223372036854775820 invokes undefined behavior" – Lundin Commented 13 hours ago
Add a comment  | 

2 Answers 2

Reset to default 12

TL;DR: It's inlined here, but the compiler doesn't know that it isn't used elsewhere.

When you compile a program, it could be for a library, in which case it would be bad to remove a function just because it isn't used elsewhere internally. The compiler doesn't know what the full executable contains or uses so it can't skip generating fib_tr_const. For example:

// foo.c
float square_helper(float x){
    return x * x;
}
float hypot2(float x,float y){
    return square_helper(x) + square_helper(y);
}
//bar.c
float square_helper(float);
float hypot2(float,float);
int main(void){
    printf("%.2f",hypot2(square_helper(3.4f),2.2f);
    return 0;
}

Here, the compiler can't remove square_helper from foo.c even though it might be inlined because bar.c depends on it. The compiler works on one TU(.c file) at a time, so it has no way of knowing whether such a bar.c exists.

There are three solutions to the problem:

  1. mark it as static. static functions are local to the TU it's declared in. This means that nothing outside can see the function, so the compiler is free to remove it after it inlines all the usages everywhere it can be seen (which is within that .c file).
  2. Tell the linker to optimize it out. gcc has -flto -fuse-linker-plugin which tells the linker to do Link Time Optimization, including removing functions that are neither exported (if you are compiling a DLL) nor used anywhere in the full program. The linker sees that fib_tr_const is not used anywhere after the compiler has inlined its use, so the linker removes it.
  3. Use -fwhole-program. This will only work if the program is a single TU(one .c file), and it tells gcc that this file is the whole program; there's nothing elsewhere that could possibly use the functions here. This allows gcc to remove the inlined functions. Usually this is not the best solution because option #2 is better in most cases and it limits your program to a single file.

It has to do with the concept of translation units.

A translation unit (or TU for short) is the "unit" that the compiler deals with, and can be seen as a single source file with all its included header files.

Since that's all the compiler know about, it can only inline functions from the current TU, not from any other TU. But because of this, the compiler can't know if the functions might be used from another TU, so the function actually needs to exists, so the linker can find it if it's called from another TU.

发布评论

评论列表(0)

  1. 暂无评论