Skip to content

Commit

Permalink
Merge pull request #1475 from vext01/side-trace-fix
Browse files Browse the repository at this point in the history
Fix a side-tracing + hardware tracing bug.
  • Loading branch information
ltratt authored Nov 29, 2024
2 parents 02d7be9 + 844d090 commit a25b17c
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 27 deletions.
30 changes: 24 additions & 6 deletions ykrt/src/compile/jitc_yk/codegen/x64/lsregalloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,30 @@ impl<'a> LSRegAlloc<'a> {

/// The parts of the register allocator needed for general purpose registers.
impl LSRegAlloc<'_> {
/// Forcibly assign the machine register `reg`, which must be in the [RegState::Empty] state,
/// to the value produced by instruction `iidx`.
pub(crate) fn force_assign_inst_gp_reg(&mut self, iidx: InstIdx, reg: Rq) {
debug_assert!(!self.gp_regset.is_set(reg));
self.gp_regset.set(reg);
self.gp_reg_states[usize::from(reg.code())] = RegState::FromInst(iidx);
/// Forcibly assign the machine register `reg` to the value produced by instruction `iidx`.
/// Note that if this register is already used, a spill will be generated instead.
pub(crate) fn force_assign_inst_gp_reg(&mut self, asm: &mut Assembler, iidx: InstIdx, reg: Rq) {
if self.gp_regset.is_set(reg) {
debug_assert_eq!(self.spills[usize::from(iidx)], SpillState::Empty);
// Input values alias to a single register. To avoid the rest of the register allocator
// having to think about this, we "dealias" the values by spilling.
let inst = self.m.inst_no_copies(iidx);
let size = inst.def_byte_size(self.m);
self.stack.align(size); // FIXME
let frame_off = self.stack.grow(size);
let off = i32::try_from(frame_off).unwrap();
match size {
1 => dynasm!(asm; mov BYTE [rbp - off], Rb(reg.code())),
2 => dynasm!(asm; mov WORD [rbp - off], Rw(reg.code())),
4 => dynasm!(asm; mov DWORD [rbp - off], Rd(reg.code())),
8 => dynasm!(asm; mov QWORD [rbp - off], Rq(reg.code())),
_ => unreachable!(),
}
self.spills[usize::from(iidx)] = SpillState::Stack(off);
} else {
self.gp_regset.set(reg);
self.gp_reg_states[usize::from(reg.code())] = RegState::FromInst(iidx);
}
}

/// Forcibly assign the floating point register `reg`, which must be in the [RegState::Empty] state,
Expand Down
31 changes: 30 additions & 1 deletion ykrt/src/compile/jitc_yk/codegen/x64/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1154,7 +1154,7 @@ impl<'a> Assemble<'a> {
debug_assert!(size <= REG64_BYTESIZE);
match m {
VarLocation::Register(reg_alloc::Register::GP(reg)) => {
self.ra.force_assign_inst_gp_reg(iidx, reg);
self.ra.force_assign_inst_gp_reg(&mut self.asm, iidx, reg);
}
VarLocation::Register(reg_alloc::Register::FP(reg)) => {
self.ra.force_assign_inst_fp_reg(iidx, reg);
Expand Down Expand Up @@ -4472,4 +4472,33 @@ mod tests {
false,
);
}

#[test]
fn cg_aliasing_loadtis() {
let mut m = jit_ir::Module::new(0, 0).unwrap();

// Create two trace inputs whose locations alias.
let loc = yksmp::Location::Register(13, 1, 0, [].into());
m.push_tiloc(loc);
let ti_inst = jit_ir::LoadTraceInputInst::new(0, m.int8_tyidx());
let op1 = m.push_and_make_operand(ti_inst.clone().into()).unwrap();
let op2 = m.push_and_make_operand(ti_inst.into()).unwrap();

let add_inst = jit_ir::BinOpInst::new(op1, jit_ir::BinOp::Add, op2);
m.push(add_inst.into()).unwrap();

let mt = MT::new().unwrap();
let hl = HotLocation {
kind: HotLocationKind::Tracing,
tracecompilation_errors: 0,
};

Assemble::new(&m, None, None)
.unwrap()
.codegen(mt, Arc::new(Mutex::new(hl)), None)
.unwrap()
.as_any()
.downcast::<X64CompiledTrace>()
.unwrap();
}
}
62 changes: 43 additions & 19 deletions ykrt/src/compile/jitc_yk/trace_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1202,29 +1202,53 @@ impl TraceBuilder {
})
.collect::<Vec<_>>();

#[cfg(tracer_swt)]
// When side-tracing a switch guard failure, we need to reprocess the switch statement
// (and only the switch statement) in order to emit a guard at the beginning of the
// side-trace to check that the case requiring execution is that case the trace
// captures.
//
// Note that it is not necessary to emit such a guard into side-traces stemming from
// regular conditionals, since a conditional has only two sucessors. The parent trace
// captures one, so by construction the side trace must capture the other.
let prevbb = self.aot_mod.bblock(prev_bid.as_ref().unwrap());
if let aot_ir::Inst::Switch {
test_val,
default_dest,
case_values,
case_dests,
safepoint,
} = &prevbb.insts.last().unwrap()
{
// FIXME: This is a hack! When we side-trace the guard failure of a switch
// statement, the software-tracer doesn't report the switch-statement block as the
// first block in the trace. This means we don't generate a guard at the top of the
// side-trace checking that the switch-case is correct when executing the trace. We
// work around this force processing the previous block that's passed in via
// `SideTraceInfo` if it's last instruction is a switch statement. Technically,
// this is the correct fix for hardware-tracing too, since we shouldn't re-process
// the switch-statement block, as it was already executed in the parent trace,
// which is problematic if the block has side-effects.
let prevbb = self.aot_mod.bblock(prev_bid.as_ref().unwrap());
if matches!(prevbb.insts.last(), Some(aot_ir::Inst::Switch { .. })) {
let nextbb = match &tas.first() {
Some(b) => self.lookup_aot_block(b),
_ => panic!(),
};
self.process_block(prev_bid.as_ref().unwrap(), &None, nextbb)?;
}
let nextbb = match &tas.first() {
Some(b) => self.lookup_aot_block(b),
_ => panic!(),
};
self.handle_switch(
prev_bid.as_ref().unwrap(), // this is safe, we've just created this above
prevbb.insts.len() - 1,
safepoint,
nextbb.as_ref().unwrap(),
test_val,
default_dest,
case_values,
case_dests,
)?;
}
}

// The variable `prev_bid` contains the block of the guard that initiated side-tracing (for
// normal traces this is set to `None`). When hardware tracing, we capture this block again
// as part of the side-trace. However, since we've already processed this block in the
// parent trace, we must not process it again in the side-trace.
//
// Typically, the mapper would strip this block for us, but for codegen related reasons,
// e.g. a switch statement codegenning to many machine blocks, it's possible for multiple
// duplicates of this same block to show up here, which all need to be skipped.
let mut trace_iter = tas.into_iter().peekable();
if prev_bid.is_some() {
while self.lookup_aot_block(trace_iter.peek().unwrap()) == prev_bid {
trace_iter.next().unwrap();
}
}

if sti.is_none() {
// Find the block containing the control point call. This is the (sole) predecessor of the
Expand Down
13 changes: 12 additions & 1 deletion ykrt/src/trace/hwt/mapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,18 @@ impl Iterator for HWTTraceIterator {
type Item = Result<TraceAction, AOTTraceIteratorError>;
fn next(&mut self) -> Option<Self::Item> {
if self.tas_generated == 0 {
// The first block contains the control point, which we need to remove.
// Remove the first block.
//
// If we are collecting a top-level trace, this removes the remainder of the block
// containing the control point.
//
// If we are side-tracing then this attempts to remove the block containing the failed
// guard, which is captured by the hardware tracer, but which we have already executed
// in the parent trace. Note though, that some conditionals (e.g. switches) can span
// multiple machine blocks, which are not all removed here. Since we don't have enough
// information at this level to remove all of them, there's a workaround in the trace
// builder.
//
// As a rough proxy for "check that we removed only the thing we want to remove", we know
// that the control point will be contained in a single mappable block. The `unwrap` can
// only fail if our assumption about the block is incorrect (i.e. some part of the system
Expand Down

0 comments on commit a25b17c

Please sign in to comment.