Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement intrinsic for swapping values #111744

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_cranelift/src/driver/jit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ fn dep_symbol_lookup_fn(
Linkage::NotLinked | Linkage::IncludedFromDylib => {}
Linkage::Static => {
let name = crate_info.crate_name[&cnum];
let mut err = sess.struct_err(&format!("Can't load static lib {}", name));
let mut err = sess.struct_err(format!("Can't load static lib {}", name));
err.note("rustc_codegen_cranelift can only load dylibs in JIT mode.");
err.emit();
}
Expand Down
124 changes: 124 additions & 0 deletions compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,130 @@ fn codegen_regular_intrinsic_call<'tcx>(
// FIXME use emit_small_memset
fx.bcx.call_memset(fx.target_config, dst_ptr, val, count);
}

sym::swap_nonoverlapping_single => {
intrinsic_args!(fx, args => (x_ptr, y_ptr); intrinsic);
let pointee_ty = x_ptr.layout().ty.builtin_deref(true).unwrap().ty;
let pointee_layout = fx.layout_of(pointee_ty);

// ZSTs swap is noop.
if pointee_layout.size != Size::ZERO {
// Probably, it would be better to have dedicated method for this in
// `cranelift_frontend::FunctionBuilder`
// with optimizations based on size and alignment of values.
AngelicosPhosphoros marked this conversation as resolved.
Show resolved Hide resolved

let x_ptr_val = x_ptr.load_scalar(fx);
let y_ptr_val = y_ptr.load_scalar(fx);

let tmp_place = CPlace::new_stack_slot(fx, pointee_layout);
let tmp_ptr_val = tmp_place.to_ptr().get_addr(fx);

let size_bytes = pointee_layout.size.bytes();
let align_bytes: u8 = pointee_layout.align.abi.bytes().try_into().unwrap();
fx.bcx.emit_small_memory_copy(
fx.target_config,
tmp_ptr_val,
x_ptr_val,
size_bytes,
align_bytes,
align_bytes,
true,
MemFlags::trusted(),
);
fx.bcx.emit_small_memory_copy(
fx.target_config,
x_ptr_val,
y_ptr_val,
size_bytes,
align_bytes,
align_bytes,
true,
MemFlags::trusted(),
);
fx.bcx.emit_small_memory_copy(
fx.target_config,
y_ptr_val,
tmp_ptr_val,
size_bytes,
align_bytes,
align_bytes,
true,
MemFlags::trusted(),
);
AngelicosPhosphoros marked this conversation as resolved.
Show resolved Hide resolved
}
}

sym::swap_nonoverlapping_many => {
intrinsic_args!(fx, args => (x_ptr, y_ptr, count); intrinsic);
let pointee_ty = x_ptr.layout().ty.builtin_deref(true).unwrap().ty;
let pointee_layout = fx.layout_of(pointee_ty);

// ZSTs swap is noop.
if pointee_layout.size != Size::ZERO {
let x_ptr_val = x_ptr.load_scalar(fx);
let y_ptr_val = y_ptr.load_scalar(fx);

let count = count.load_scalar(fx);

let tmp_place = CPlace::new_stack_slot(fx, pointee_layout);
let tmp_ptr_val = tmp_place.to_ptr().get_addr(fx);

let elem_size_bytes = pointee_layout.size.bytes();
let align_bytes: u8 = pointee_layout.align.abi.bytes().try_into().unwrap();

let loop_header = fx.bcx.create_block();
let loop_body = fx.bcx.create_block();
let loop_done = fx.bcx.create_block();

let index = fx.bcx.append_block_param(loop_header, fx.pointer_type);
let zero = fx.bcx.ins().iconst(fx.pointer_type, 0);
fx.bcx.ins().jump(loop_header, &[zero]);

fx.bcx.switch_to_block(loop_header);
let is_done = fx.bcx.ins().icmp(IntCC::Equal, index, count);
fx.bcx.ins().brif(is_done, loop_done, &[], loop_body, &[]);

fx.bcx.switch_to_block(loop_body);
let curr_x_ptr_val = fx.bcx.ins().iadd(x_ptr_val, index);
let curr_y_ptr_val = fx.bcx.ins().iadd(y_ptr_val, index);
fx.bcx.emit_small_memory_copy(
fx.target_config,
tmp_ptr_val,
curr_x_ptr_val,
elem_size_bytes,
align_bytes,
align_bytes,
true,
MemFlags::trusted(),
);
fx.bcx.emit_small_memory_copy(
fx.target_config,
curr_x_ptr_val,
curr_y_ptr_val,
elem_size_bytes,
align_bytes,
align_bytes,
true,
MemFlags::trusted(),
);
fx.bcx.emit_small_memory_copy(
fx.target_config,
curr_y_ptr_val,
tmp_ptr_val,
elem_size_bytes,
align_bytes,
align_bytes,
true,
MemFlags::trusted(),
);
let next_index = fx.bcx.ins().iadd_imm(index, 1);
fx.bcx.ins().jump(loop_header, &[next_index]);

fx.bcx.switch_to_block(loop_done);
fx.bcx.ins().nop();
}
}

sym::ctlz | sym::ctlz_nonzero => {
intrinsic_args!(fx, args => (arg); intrinsic);
let val = arg.load_scalar(fx);
Expand Down
49 changes: 49 additions & 0 deletions compiler/rustc_codegen_gcc/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,55 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
self.block.add_eval(None, self.context.new_call(None, memset, &[ptr, fill_byte, size]));
}

fn make_memory_loop<BodyPtrsVisitor, const VAR_COUNT: usize>(
&mut self,
loop_name: &str,
start_ptrs: [Self::Value; VAR_COUNT],
steps: [Size; VAR_COUNT],
iterations: Self::Value,
body_visitor: BodyPtrsVisitor,
) where
BodyPtrsVisitor: FnOnce(&mut Self, &[Self::Value; VAR_COUNT]),
{
assert!(VAR_COUNT > 0, "VAR_COUNT must be bigger than zero.");

for step in steps {
assert_ne!(step.bytes(), 0, "We are iterating over memory, ZSTs unexpected.");
}

let header_bb = self.append_sibling_block(&format!("{}_header", loop_name));
let body_bb = self.append_sibling_block(&format!("{}_body", loop_name));
let next_bb = self.append_sibling_block(&format!("{}_next", loop_name));

let zero = self.const_usize(0);
let additions: [Self::Value; VAR_COUNT] = steps.map(|st| self.const_usize(st.bytes()));

let loop_i = self.llbb().get_function().new_local(None, self.type_size_t(), "loop_i");
self.assign(loop_i, zero);
let loop_i_val = loop_i.to_rvalue();

self.br(header_bb);

self.switch_to_block(header_bb);
let keep_going = self.icmp(IntPredicate::IntNE, loop_i_val, iterations);
self.cond_br(keep_going, body_bb, next_bb);

self.switch_to_block(body_bb);
let current_ptrs: [Self::Value; VAR_COUNT] = core::array::from_fn(
|i|{
let start = self.pointercast(start_ptrs[i], self.type_i8p());
let offset = self.unchecked_umul(additions[i], loop_i_val);
self.inbounds_gep(self.type_i8(), start, &[offset])
}
);
body_visitor(self, &current_ptrs);
let next_i = self.unchecked_uadd(loop_i_val, self.const_usize(1));
self.assign(loop_i, next_i);
self.br(header_bb);

self.switch_to_block(next_bb);
}

fn select(&mut self, cond: RValue<'gcc>, then_val: RValue<'gcc>, mut else_val: RValue<'gcc>) -> RValue<'gcc> {
let func = self.current_func();
let variable = func.new_local(None, then_val.get_type(), "selectVar");
Expand Down
48 changes: 48 additions & 0 deletions compiler/rustc_codegen_llvm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,54 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
}
}

fn make_memory_loop<BodyPtrsVisitor, const VAR_COUNT: usize>(
&mut self,
loop_name: &str,
start_ptrs: [Self::Value; VAR_COUNT],
steps: [Size; VAR_COUNT],
iterations: Self::Value,
body_visitor: BodyPtrsVisitor,
) where
BodyPtrsVisitor: FnOnce(&mut Self, &[Self::Value; VAR_COUNT]),
{
const {
assert!(VAR_COUNT > 0, "VAR_COUNT must be bigger than zero.");
}
for step in steps {
assert_ne!(step.bytes(), 0, "We are iterating over memory, ZSTs unexpected.");
}

let zero = self.const_usize(0);
let additions: [Self::Value; VAR_COUNT] = steps.map(|st| self.const_usize(st.bytes()));

let header_bb = self.append_sibling_block(&format!("{}_header", loop_name));
let body_bb = self.append_sibling_block(&format!("{}_body", loop_name));
let next_bb = self.append_sibling_block(&format!("{}_next", loop_name));
self.br(header_bb);

let mut header_bx = Builder::build(self.cx, header_bb);
// Use integer for iteration instead of pointers because LLVM canonicalize loop into indexed anyway.
let loop_i = header_bx.phi(self.type_isize(), &[zero], &[self.llbb()]);
let keep_going = header_bx.icmp(IntPredicate::IntNE, loop_i, iterations);
header_bx.cond_br(keep_going, body_bb, next_bb);

let mut body_bx = Builder::build(self.cx, body_bb);
let current_ptrs: [Self::Value; VAR_COUNT] = std::array::from_fn(|i| {
let start = start_ptrs[i];
// FIXME: Remove pointercast after dropping supporting of LLVM 14.
let start = self.pointercast(start, self.type_i8p());
let addition = additions[i];
let offset = body_bx.unchecked_umul(loop_i, addition);
body_bx.inbounds_gep(body_bx.type_i8(), start, &[offset])
});
body_visitor(&mut body_bx, &current_ptrs);
let next_i = body_bx.unchecked_uadd(loop_i, body_bx.const_usize(1));
header_bx.add_incoming_to_phi(loop_i, next_i, body_bb);
body_bx.br(header_bb);

*self = Builder::build(self.cx, next_bb);
}

fn select(
&mut self,
cond: &'ll Value,
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_llvm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
#![feature(extern_types)]
#![feature(hash_raw_entry)]
#![feature(inline_const)]
#![feature(iter_intersperse)]
#![feature(let_chains)]
#![feature(never_type)]
Expand Down
Loading