Skip to content

Commit

Permalink
[RISCV] Add pattern for PACK/PACKH in common misaligned load case (#1…
Browse files Browse the repository at this point in the history
…10644)

PACKH is currently only selected for assembling the first two bytes of a
misligned load. A fairly complex RV32-only pattern is added for
producing PACKH+PACKH+PACK to assemble the result of a misaligned 32-bit
load.

Another pattern was added that just covers PACKH for shifted offsets 16
and 24, producing a packh and shift to replace two shifts and an 'or'.
This slightly improves RV64IZKBK for a 64-bit load, but fails to match
for the misaligned 32-bit load because the load of the upper byte is
anyext in the SelectionDAG.

I wrote the patch this way because it was quick and easy and has at
least some benefit, but the "right" approach probably merits further
discussion. Introducing target-specific SDNodes for PACK* and having
custom lowering for unaligned load/stores that introduces those nodes
them seems like it might be attractive. However, adding these patterns does provide benefit - so that's what this patch does for now.
  • Loading branch information
asb authored Oct 1, 2024
1 parent 32ffc9f commit e45b44c
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 35 deletions.
13 changes: 12 additions & 1 deletion llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
Original file line number Diff line number Diff line change
Expand Up @@ -599,15 +599,26 @@ def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)),
def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)),
(zexti8 (XLenVT GPR:$rs1))), 0xFFFF),
(PACKH GPR:$rs1, GPR:$rs2)>;
def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 24)),
(shl (zexti8 (XLenVT GPR:$rs1)), (XLenVT 16))),
(SLLI (PACKH GPR:$rs1, GPR:$rs2), (XLenVT 16))>;

def : Pat<(binop_allhusers<or> (shl GPR:$rs2, (XLenVT 8)),
(zexti8 (XLenVT GPR:$rs1))),
(PACKH GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbkb]

let Predicates = [HasStdExtZbkb, IsRV32] in
let Predicates = [HasStdExtZbkb, IsRV32] in {
def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))),
(PACK GPR:$rs1, GPR:$rs2)>;
def : Pat<(or (or
(shl (zexti8 (XLenVT GPR:$op1rs2)), (XLenVT 24)),
(shl (zexti8 (XLenVT GPR:$op1rs1)), (XLenVT 16))),
(or
(shl (zexti8 (XLenVT GPR:$op0rs2)), (XLenVT 8)),
(zexti8 (XLenVT GPR:$op0rs1)))),
(PACK (PACKH GPR:$op0rs1, GPR:$op0rs2), (PACKH GPR:$op1rs1, GPR:$op1rs2))>;
}

let Predicates = [HasStdExtZbkb, IsRV64] in {
def : Pat<(i64 (or (zexti32 (i64 GPR:$rs1)), (shl GPR:$rs2, (i64 32)))),
Expand Down
75 changes: 41 additions & 34 deletions llvm/test/CodeGen/RISCV/unaligned-load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -97,18 +97,29 @@ define i32 @load_i32(ptr %p) {
; SLOWBASE-NEXT: or a0, a0, a1
; SLOWBASE-NEXT: ret
;
; SLOWZBKB-LABEL: load_i32:
; SLOWZBKB: # %bb.0:
; SLOWZBKB-NEXT: lbu a1, 0(a0)
; SLOWZBKB-NEXT: lbu a2, 1(a0)
; SLOWZBKB-NEXT: lbu a3, 2(a0)
; SLOWZBKB-NEXT: lbu a0, 3(a0)
; SLOWZBKB-NEXT: packh a1, a1, a2
; SLOWZBKB-NEXT: slli a3, a3, 16
; SLOWZBKB-NEXT: slli a0, a0, 24
; SLOWZBKB-NEXT: or a0, a0, a3
; SLOWZBKB-NEXT: or a0, a0, a1
; SLOWZBKB-NEXT: ret
; RV32IZBKB-LABEL: load_i32:
; RV32IZBKB: # %bb.0:
; RV32IZBKB-NEXT: lbu a1, 1(a0)
; RV32IZBKB-NEXT: lbu a2, 2(a0)
; RV32IZBKB-NEXT: lbu a3, 3(a0)
; RV32IZBKB-NEXT: lbu a0, 0(a0)
; RV32IZBKB-NEXT: packh a2, a2, a3
; RV32IZBKB-NEXT: packh a0, a0, a1
; RV32IZBKB-NEXT: pack a0, a0, a2
; RV32IZBKB-NEXT: ret
;
; RV64IZBKB-LABEL: load_i32:
; RV64IZBKB: # %bb.0:
; RV64IZBKB-NEXT: lbu a1, 0(a0)
; RV64IZBKB-NEXT: lbu a2, 1(a0)
; RV64IZBKB-NEXT: lbu a3, 2(a0)
; RV64IZBKB-NEXT: lbu a0, 3(a0)
; RV64IZBKB-NEXT: packh a1, a1, a2
; RV64IZBKB-NEXT: slli a3, a3, 16
; RV64IZBKB-NEXT: slli a0, a0, 24
; RV64IZBKB-NEXT: or a0, a0, a3
; RV64IZBKB-NEXT: or a0, a0, a1
; RV64IZBKB-NEXT: ret
;
; FAST-LABEL: load_i32:
; FAST: # %bb.0:
Expand Down Expand Up @@ -176,20 +187,16 @@ define i64 @load_i64(ptr %p) {
; RV32IZBKB-NEXT: lbu a2, 1(a0)
; RV32IZBKB-NEXT: lbu a3, 2(a0)
; RV32IZBKB-NEXT: lbu a4, 3(a0)
; RV32IZBKB-NEXT: lbu a5, 4(a0)
; RV32IZBKB-NEXT: lbu a6, 5(a0)
; RV32IZBKB-NEXT: lbu a7, 6(a0)
; RV32IZBKB-NEXT: lbu t0, 7(a0)
; RV32IZBKB-NEXT: packh a0, a3, a4
; RV32IZBKB-NEXT: packh a1, a1, a2
; RV32IZBKB-NEXT: slli a3, a3, 16
; RV32IZBKB-NEXT: slli a4, a4, 24
; RV32IZBKB-NEXT: or a3, a4, a3
; RV32IZBKB-NEXT: lbu a2, 4(a0)
; RV32IZBKB-NEXT: lbu a4, 5(a0)
; RV32IZBKB-NEXT: lbu a5, 6(a0)
; RV32IZBKB-NEXT: lbu a6, 7(a0)
; RV32IZBKB-NEXT: or a0, a3, a1
; RV32IZBKB-NEXT: packh a1, a2, a4
; RV32IZBKB-NEXT: slli a5, a5, 16
; RV32IZBKB-NEXT: slli a6, a6, 24
; RV32IZBKB-NEXT: or a2, a6, a5
; RV32IZBKB-NEXT: or a1, a2, a1
; RV32IZBKB-NEXT: pack a0, a1, a0
; RV32IZBKB-NEXT: packh a1, a7, t0
; RV32IZBKB-NEXT: packh a2, a5, a6
; RV32IZBKB-NEXT: pack a1, a2, a1
; RV32IZBKB-NEXT: ret
;
; RV64IZBKB-LABEL: load_i64:
Expand All @@ -199,18 +206,16 @@ define i64 @load_i64(ptr %p) {
; RV64IZBKB-NEXT: lbu a3, 6(a0)
; RV64IZBKB-NEXT: lbu a4, 7(a0)
; RV64IZBKB-NEXT: packh a1, a1, a2
; RV64IZBKB-NEXT: slli a3, a3, 16
; RV64IZBKB-NEXT: slli a4, a4, 24
; RV64IZBKB-NEXT: or a3, a4, a3
; RV64IZBKB-NEXT: lbu a2, 0(a0)
; RV64IZBKB-NEXT: packh a2, a3, a4
; RV64IZBKB-NEXT: lbu a3, 0(a0)
; RV64IZBKB-NEXT: lbu a4, 1(a0)
; RV64IZBKB-NEXT: lbu a5, 2(a0)
; RV64IZBKB-NEXT: lbu a0, 3(a0)
; RV64IZBKB-NEXT: or a1, a3, a1
; RV64IZBKB-NEXT: packh a2, a2, a4
; RV64IZBKB-NEXT: slli a5, a5, 16
; RV64IZBKB-NEXT: slli a0, a0, 24
; RV64IZBKB-NEXT: or a0, a0, a5
; RV64IZBKB-NEXT: slli a2, a2, 16
; RV64IZBKB-NEXT: or a1, a2, a1
; RV64IZBKB-NEXT: packh a2, a3, a4
; RV64IZBKB-NEXT: packh a0, a5, a0
; RV64IZBKB-NEXT: slli a0, a0, 16
; RV64IZBKB-NEXT: or a0, a0, a2
; RV64IZBKB-NEXT: pack a0, a0, a1
; RV64IZBKB-NEXT: ret
Expand Down Expand Up @@ -574,3 +579,5 @@ define void @store_large_constant(ptr %x) {
store i64 18364758544493064720, ptr %x, align 1
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; SLOWZBKB: {{.*}}

0 comments on commit e45b44c

Please sign in to comment.