Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SR-12046] Creating empty OptionSet with empty array literal generates bad code #54482

Closed
Lukasa opened this issue Jan 17, 2020 · 2 comments
Closed
Labels
bug A deviation from expected or documented behavior. Also: expected but undesirable behavior. compiler The Swift compiler in itself performance

Comments

@Lukasa
Copy link
Contributor

Lukasa commented Jan 17, 2020

Previous ID SR-12046
Radar rdar://problem/58861171
Original Reporter @Lukasa
Type Bug
Status Resolved
Resolution Done
Environment

Apple Swift version 5.2 (swiftlang-1103.0.19.1 clang-1103.0.30.1)
Target: x86_64-apple-darwin19.0.0

Additional Detail from JIRA
Votes 1
Component/s Compiler
Labels Bug, Performance
Assignee None
Priority Medium

md5: ec0b9f3f4c9bb66b994c0b235ef4e7d8

Issue Description:

The following program defines a straightforward OptionSet:

struct TestSet: OptionSet {
    var rawValue: UInt8

    init(rawValue: UInt8) {
        self.rawValue = rawValue
    }

    static let one: TestSet = .init(rawValue: 1 << 0)
    static let two: TestSet = .init(rawValue: 1 << 1)
    static let four: TestSet = .init(rawValue: 1 << 2)
}

func test1() -> TestSet {
   return []
}

func test2() -> TestSet {
   return [.one]
}

func test3() -> TestSet {
   return .init()
}

When compiled with -O, the codegen for test2 is basically optimal:

push       rbp
mov        rbp, rsp
mov        al, 0x1
pop        rbp
ret

However, the equivalent function test1 that attempts to create an empty OptionSet generates 315 extra bytes worth of assembly:

push       rbp
mov        rbp, rsp
mov        rcx, qword [__swiftEmptyArrayStorage_100002120] ; __swiftEmptyArrayStorage_100002120
mov        r8, qword [rcx+0x10]
test       r8, r8
je         loc_100001883

cmp        r8, 0x1f
ja         loc_100001887

xor        eax, eax
xor        esi, esi
jmp        loc_100001995

loc_100001883:
xor        eax, eax                     ; CODE XREF=_$s4test5test1AA7TestSetVyF+18
pop        rbp
ret
; endp

loc_100001887:
mov        rsi, r8                      ; CODE XREF=_$s4test5test1AA7TestSetVyF+24
and        rsi, 0xffffffffffffffe0
lea        rdi, qword [rsi-0x20]
mov        rax, rdi
shr        rax, 0x5
inc        rax
mov        edx, eax
and        edx, 0x3
cmp        rdi, 0x60
jae        loc_1000018bf

pxor       xmm0, xmm0
xor        edi, edi
pxor       xmm1, xmm1
test       rdx, rdx
jne        loc_100001935

jmp        loc_10000195a

loc_1000018bf:
sub        rax, rdx                     ; CODE XREF=_$s4test5test1AA7TestSetVyF+69
pxor       xmm0, xmm0
xor        edi, edi
pxor       xmm1, xmm1
nop        dword [rax]

loc_1000018d0:
movdqu     xmm2, xmmword [rcx+rdi+0x20] ; CODE XREF=_$s4test5test1AA7TestSetVyF+206
por        xmm2, xmm0
movdqu     xmm0, xmmword [rcx+rdi+0x30]
por        xmm0, xmm1
movdqu     xmm1, xmmword [rcx+rdi+0x40]
movdqu     xmm3, xmmword [rcx+rdi+0x50]
movdqu     xmm4, xmmword [rcx+rdi+0x60]
por        xmm4, xmm1
por        xmm4, xmm2
movdqu     xmm2, xmmword [rcx+rdi+0x70]
por        xmm2, xmm3
por        xmm2, xmm0
movdqu     xmm0, xmmword [rcx+rdi+0x80]
por        xmm0, xmm4
movdqu     xmm1, xmmword [rcx+rdi+0x90]
por        xmm1, xmm2
sub        rdi, 0xffffffffffffff80
add        rax, 0xfffffffffffffffc
jne        loc_1000018d0

test       rdx, rdx
je         loc_10000195a

loc_100001935:
lea        rax, qword [rcx+rdi+0x30]    ; CODE XREF=_$s4test5test1AA7TestSetVyF+84
neg        rdx
nop        dword [rax]

loc_100001940:
movdqu     xmm2, xmmword [rax-0x10]     ; CODE XREF=_$s4test5test1AA7TestSetVyF+248
por        xmm0, xmm2
movdqu     xmm2, xmmword [rax]
por        xmm1, xmm2
add        rax, 0x20
inc        rdx
jne        loc_100001940

loc_10000195a:
por        xmm0, xmm1                   ; CODE XREF=_$s4test5test1AA7TestSetVyF+90, _$s4test5test1AA7TestSetVyF+211
pshufd     xmm1, xmm0, 0x4e
por        xmm1, xmm0
pshufd     xmm0, xmm1, 0xe5
por        xmm0, xmm1
movdqa     xmm1, xmm0
psrld      xmm1, 0x10
por        xmm1, xmm0
movdqa     xmm0, xmm1
psrlw      xmm0, 0x8
por        xmm0, xmm1
pextrb     eax, xmm0, 0x0
cmp        r8, rsi
je         loc_1000019a1

loc_100001995:
or         al, byte [rcx+rsi+0x20]      ; CODE XREF=_$s4test5test1AA7TestSetVyF+30, _$s4test5test1AA7TestSetVyF+319
inc        rsi
cmp        r8, rsi
jne        loc_100001995

loc_1000019a1:
pop        rbp                          ; CODE XREF=_$s4test5test1AA7TestSetVyF+307
ret
; end

This nasty codegen is specific to the empty array literal; using the .init() construction instead generates optimal code again:

push       rbp
mov        rbp, rsp
xor        eax, eax
pop        rbp
ret

It seems that something is defeating the optimiser's ability to optimise initialisation with the empty array literal, even though that optimisation successfully lands with non-empty array literals.

The code size impact is smaller with -Osize as the compiler doesn't unroll the loop, but neither outcome is anywhere near optimal.

@beccadax
Copy link
Contributor

@swift-ci create

@eeckstein
Copy link
Member

fixed on master in #29428

@swift-ci swift-ci transferred this issue from apple/swift-issues Apr 25, 2022
This issue was closed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug A deviation from expected or documented behavior. Also: expected but undesirable behavior. compiler The Swift compiler in itself performance
Projects
None yet
Development

No branches or pull requests

3 participants