[SR-10166] Optimizer fails to collapse loops to constant return without using unchecked arithmetic #52568

kevints · 2019-03-25T21:06:42Z


Previous ID	SR-10166
Radar	rdar://problem/52530231
Original Reporter	@kevints
Type	Bug

Environment

Reproduced on macOS and Linux with Swift 4.2

Additional Detail from JIRA


Votes	2
Component/s	Compiler
Labels	Bug, Performance
Assignee	shajrawi (JIRA)
Priority	Medium

md5: 3c4a456d5658c054098ca7d3eb5707ec

Issue Description:

I would expect the optimizer to collapse the following equivalent loops into a constant return of an immediate value (1998). However it appears that the checked variant C-style while loop generates less code than the Swift-style for over a range, both checked variants generate actual loops, and both unchecked variants replace the loop with an immediate value (which is the optimization I would expect).

% cat LoopTest.swift                              
func testCStyleWhileLoop() -> CInt {
    var x: CInt = 0
    var i: CInt = 0
    while i < 999 {
        x += 2
        i += 1
    }
    return x
}

func testSwiftRangeLoop() -> CInt {
    var y: CInt = 0
    for _ in 0..<999 {
        y += 2
    }

    return y
}

func testCStyleWhileLoopUnchecked() -> CInt {
    var x: CInt = 0
    var i: CInt = 0
    while i < 999 {
        x &+= 2
        i += 1
    }
    return x
}

func testSwiftRangeLoopUnchecked() -> CInt {
    var x: CInt = 0
    for _ in 0..<999 {
        x &+= 2
    }
    return x
}

% swiftc -O -c LoopTest.swift && objdump -disassemble LoopTest.o | swift demangle

LoopTest.o: file format Mach-O 64-bit x86-64

Disassembly of section __TEXT,__text:
_main:
       0:   55  pushq   %rbp
       1:   48 89 e5    movq    %rsp, %rbp
       4:   31 c0   xorl    %eax, %eax
       6:   5d  popq    %rbp
       7:   c3  retq
       8:   0f 1f 84 00 00 00 00 00     nopl    (%rax,%rax)

LoopTest.testCStyleWhileLoop() -> Swift.Int32:
      10:   55  pushq   %rbp
      11:   48 89 e5    movq    %rsp, %rbp
      14:   31 c0   xorl    %eax, %eax
      16:   31 c9   xorl    %ecx, %ecx
      18:   0f 1f 84 00 00 00 00 00     nopl    (%rax,%rax)
      20:   83 c1 09    addl    $9, %ecx
      23:   83 c0 12    addl    $18, %eax
      26:   81 f9 e7 03 00 00   cmpl    $999, %ecx
      2c:   72 f2   jb  -14 <LoopTest.testCStyleWhileLoop() -> Swift.Int32+0x10>
      2e:   5d  popq    %rbp
      2f:   c3  retq

LoopTest.testSwiftRangeLoop() -> Swift.Int32:
      30:   55  pushq   %rbp
      31:   48 89 e5    movq    %rsp, %rbp
      34:   b8 02 00 00 00  movl    $2, %eax
      39:   66 0f 6e c8     movd    %eax, %xmm1
      3d:   66 0f ef c0     pxor    %xmm0, %xmm0
      41:   b8 e0 03 00 00  movl    $992, %eax
      46:   66 0f 6f 15 52 00 00 00     movdqa  82(%rip), %xmm2
      4e:   66 90   nop
      50:   66 0f fe ca     paddd   %xmm2, %xmm1
      54:   66 0f fe c2     paddd   %xmm2, %xmm0
      58:   48 83 c0 e0     addq    $-32, %rax
      5c:   75 f2   jne -14 <LoopTest.testSwiftRangeLoop() -> Swift.Int32+0x20>
      5e:   66 0f fe c1     paddd   %xmm1, %xmm0
      62:   66 0f 70 c8 4e  pshufd  $78, %xmm0, %xmm1
      67:   66 0f fe c8     paddd   %xmm0, %xmm1
      6b:   66 0f 38 02 c9  phaddd  %xmm1, %xmm1
      70:   66 0f 7e c8     movd    %xmm1, %eax
      74:   83 c0 0c    addl    $12, %eax
      77:   5d  popq    %rbp
      78:   c3  retq
      79:   0f 1f 80 00 00 00 00    nopl    (%rax)

LoopTest.testCStyleWhileLoopUnchecked() -> Swift.Int32:
      80:   55  pushq   %rbp
      81:   48 89 e5    movq    %rsp, %rbp
      84:   b8 ce 07 00 00  movl    $1998, %eax
      89:   5d  popq    %rbp
      8a:   c3  retq
      8b:   0f 1f 44 00 00  nopl    (%rax,%rax)

LoopTest.testSwiftRangeLoopUnchecked() -> Swift.Int32:
      90:   55  pushq   %rbp
      91:   48 89 e5    movq    %rsp, %rbp
      94:   b8 ce 07 00 00  movl    $1998, %eax
      99:   5d  popq    %rbp
      9a:   c3  retq

The text was updated successfully, but these errors were encountered:

belkadan · 2019-03-26T16:58:41Z

cc shajrawi (JIRA User)

swift-ci · 2019-04-05T20:32:19Z

Comment by Joe Shajrawi (JIRA)

I have looked at the resulting SIL - in the unchecked case LLVM is turning it into a constant:

// testSwiftRangeLoopUnchecked()
sil hidden @$s8LoopTest014testSwiftRangeA9Uncheckeds5Int32VyF : $@convention(thin) () -> Int32 {
bb0:
  %0 = integer_literal $Builtin.Int32, 0          // user: %7
  %1 = integer_literal $Builtin.Int64, 0          // user: %7
  %2 = integer_literal $Builtin.Int64, 999        // user: %16
  %3 = integer_literal $Builtin.Int1, -1          // user: %12
  %4 = integer_literal $Builtin.Int64, 1          // user: %12
  %5 = integer_literal $Builtin.Int32, 2          // user: %14
  %6 = integer_literal $Builtin.Int1, 0           // user: %14
  br bb2(%0 : $Builtin.Int32, %1 : $Builtin.Int64) // id: %7


bb1:                                              // Preds: bb2
  %8 = struct $Int32 (%15 : $Builtin.Int32)       // user: %9
  return %8 : $Int32                              // id: %9


// %10                                            // user: %14
// %11                                            // user: %12
bb2(%10 : $Builtin.Int32, %11 : $Builtin.Int64):  // Preds: bb3 bb0
  %12 = builtin "sadd_with_overflow_Int64"(%11 : $Builtin.Int64, %4 : $Builtin.Int64, %3 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1) // user: %13
  %13 = tuple_extract %12 : $(Builtin.Int64, Builtin.Int1), 0 // users: %18, %16
  %14 = builtin "sadd_with_overflow_Int32"(%10 : $Builtin.Int32, %5 : $Builtin.Int32, %6 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1) // user: %15
  %15 = tuple_extract %14 : $(Builtin.Int32, Builtin.Int1), 0 // users: %18, %8
  %16 = builtin "cmp_eq_Int64"(%13 : $Builtin.Int64, %2 : $Builtin.Int64) : $Builtin.Int1 // user: %17
  cond_br %16, bb1, bb3                           // id: %17


bb3:                                              // Preds: bb2
  br bb2(%15 : $Builtin.Int32, %13 : $Builtin.Int64) // id: %18
} // end sil function '$s8LoopTest014testSwiftRangeA9Uncheckeds5Int32VyF'

In the unchecked case we are not so lucky:

// testSwiftRangeLoop()
sil hidden @$s8LoopTest014testSwiftRangeA0s5Int32VyF : $@convention(thin) () -> Int32 {
bb0:
  %0 = integer_literal $Builtin.Int32, 0          // user: %6
  %1 = integer_literal $Builtin.Int64, 0          // user: %6
  %2 = integer_literal $Builtin.Int64, 999        // user: %17
  %3 = integer_literal $Builtin.Int1, -1          // users: %13, %11
  %4 = integer_literal $Builtin.Int64, 1          // user: %11
  %5 = integer_literal $Builtin.Int32, 2          // user: %13
  br bb2(%0 : $Builtin.Int32, %1 : $Builtin.Int64) // id: %6


bb1:                                              // Preds: bb2
  %7 = struct $Int32 (%14 : $Builtin.Int32)       // user: %8
  return %7 : $Int32                              // id: %8


// %9                                             // user: %13
// %10                                            // user: %11
bb2(%9 : $Builtin.Int32, %10 : $Builtin.Int64):   // Preds: bb3 bb0
  %11 = builtin "sadd_with_overflow_Int64"(%10 : $Builtin.Int64, %4 : $Builtin.Int64, %3 : $Builtin.Int1) : $(Builtin.Int64, Builtin.Int1) // user: %12
  %12 = tuple_extract %11 : $(Builtin.Int64, Builtin.Int1), 0 // users: %19, %17
  %13 = builtin "sadd_with_overflow_Int32"(%9 : $Builtin.Int32, %5 : $Builtin.Int32, %3 : $Builtin.Int1) : $(Builtin.Int32, Builtin.Int1) // users: %15, %14
  %14 = tuple_extract %13 : $(Builtin.Int32, Builtin.Int1), 0 // users: %19, %7
  %15 = tuple_extract %13 : $(Builtin.Int32, Builtin.Int1), 1 // user: %16
  cond_fail %15 : $Builtin.Int1                   // id: %16
  %17 = builtin "cmp_eq_Int64"(%12 : $Builtin.Int64, %2 : $Builtin.Int64) : $Builtin.Int1 // user: %18
  cond_br %17, bb1, bb3                           // id: %18


bb3:                                              // Preds: bb2
  br bb2(%14 : $Builtin.Int32, %12 : $Builtin.Int64) // id: %19
} // end sil function '$s8LoopTest014testSwiftRangeA0s5Int32VyF'

We are missing a bunch of optimizations that would allow us to "properly" handle this on SIL Level, such as Induction Variable Simplification...

I have tested a quick "cheat" for this problem - changing loop unrolling with a new heuristic that ignores the trip count if we are reasonably sure we can constant propagate the loop:

sil hidden @$s8LoopTest014testSwiftRangeA0s5Int32VyF : $@convention(thin) () -> Int32 {
bb0:
  %0 = integer_literal $Builtin.Int32, 1998       // user: %1
  %1 = struct $Int32 (%0 : $Builtin.Int32)        // user: %2
  return %1 : $Int32                              // id: %2
} // end sil function '$s8LoopTest014testSwiftRangeA0s5Int32VyF'

However, I would not be comfortable committing this heuristic: it is the wrong way to do this - would unnecessarily increase compile time, and, if we are wrong about being able to fold said loop we may blow up code size.

The right way to do this would be adding the missing Loop Transformations to the SIL Optimizer.

kevints · 2019-07-02T18:07:29Z

@swift-ci create

swift-ci transferred this issue from apple/swift-issues Apr 25, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SR-10166] Optimizer fails to collapse loops to constant return without using unchecked arithmetic #52568

[SR-10166] Optimizer fails to collapse loops to constant return without using unchecked arithmetic #52568

kevints mannequin commented Mar 25, 2019

belkadan commented Mar 26, 2019

swift-ci commented Apr 5, 2019

kevints mannequin commented Jul 2, 2019

[SR-10166] Optimizer fails to collapse loops to constant return without using unchecked arithmetic #52568

[SR-10166] Optimizer fails to collapse loops to constant return without using unchecked arithmetic #52568

Comments

kevints mannequin commented Mar 25, 2019

belkadan commented Mar 26, 2019

swift-ci commented Apr 5, 2019

kevints mannequin commented Jul 2, 2019