Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replay cache missed load from replayQ #209

Open
wants to merge 1 commit into
base: xs-dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/cpu/o3/dyn_inst.hh
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ class DynInst : public ExecContext, public RefCounted
NotAnInst,
TranslationStarted,
TranslationCompleted,
CacheRefilledAfterMiss,
PossibleLoadViolation,
HitExternalSnoop,
EffAddrValid,
Expand Down Expand Up @@ -462,6 +463,14 @@ class DynInst : public ExecContext, public RefCounted
}
void translationCompleted(bool f) { instFlags[TranslationCompleted] = f; }

/** True if Dcache refilled after Dcache miss. */
bool
cacheRefilledAfterMiss() const
{
return instFlags[CacheRefilledAfterMiss];
}
void cacheRefilledAfterMiss(bool f) { instFlags[CacheRefilledAfterMiss] = f; }

/** True if this address was found to match a previous load and they issued
* out of order. If that happend, then it's only a problem if an incoming
* snoop invalidate modifies the line, in which case we need to squash.
Expand Down Expand Up @@ -1397,6 +1406,10 @@ class DynInst : public ExecContext, public RefCounted
return squashVer.getVersion();
}

ssize_t getLqIdx()
{
return lqIdx;
}

Addr getPC()
{
Expand Down
6 changes: 6 additions & 0 deletions src/cpu/o3/iew.cc
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,12 @@ IEW::blockMemInst(const DynInstPtr& inst)
instQueue.blockMemInst(inst);
}

void
IEW::cacheMissLdReplay(const DynInstPtr& inst)
{
instQueue.cacheMissLdReplay(inst);
}

void
IEW::cacheUnblocked()
{
Expand Down
3 changes: 3 additions & 0 deletions src/cpu/o3/iew.hh
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,9 @@ class IEW
/** Moves memory instruction onto the list of cache blocked instructions */
void blockMemInst(const DynInstPtr &inst);

/** Moves load instruction onto the Set of cache missed instructions */
void cacheMissLdReplay(const DynInstPtr &inst);

/** Notifies that the cache has become unblocked */
void cacheUnblocked();

Expand Down
49 changes: 48 additions & 1 deletion src/cpu/o3/inst_queue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ InstructionQueue::FUCompletion::description() const
return "Functional unit completion";
}

size_t
InstructionQueue::CacheMissLdInstsHash::operator()(const DynInstPtr& ptr) const
{
return ptr->getLqIdx();
}

InstructionQueue::InstructionQueue(CPU *cpu_ptr, IEW *iew_ptr,
const BaseO3CPUParams &params)
: cpu(cpu_ptr),
Expand Down Expand Up @@ -352,6 +358,7 @@ InstructionQueue::resetState()

nonSpecInsts.clear();
deferredMemInsts.clear();
cacheMissLdInsts.clear();
blockedMemInsts.clear();
retryMemInsts.clear();
wbOutstanding = 0;
Expand Down Expand Up @@ -650,6 +657,10 @@ InstructionQueue::scheduleReadyInsts()
IssueStruct *i2e_info = issueToExecuteQueue->access(0);

DynInstPtr mem_inst;
while ((mem_inst = getCacheMissInstToExecute())) {
mem_inst->issueQue->retryMem(mem_inst);
}

while ((mem_inst = getDeferredMemInstToExecute())) {
mem_inst->issueQue->retryMem(mem_inst);
}
Expand Down Expand Up @@ -720,7 +731,7 @@ InstructionQueue::scheduleReadyInsts()
// @todo If the way deferred memory instructions are handeled due to
// translation changes then the deferredMemInsts condition should be
// removed from the code below.
if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty()) {
if (total_issued || !retryMemInsts.empty() || !deferredMemInsts.empty() || !cacheMissLdInsts.empty()) {
cpu->activityThisCycle();
} else {
DPRINTF(IQ, "Not able to schedule any instructions.\n");
Expand Down Expand Up @@ -859,6 +870,19 @@ InstructionQueue::deferMemInst(const DynInstPtr &deferred_inst)
deferredMemInsts.push_back(deferred_inst);
}

void
InstructionQueue::cacheMissLdReplay(const DynInstPtr &deferred_inst)
{
DPRINTF(IQ, "Get Cache Missed Load, insert to Replay Queue "
"[sn:%llu]\n", deferred_inst->seqNum);
// Reset DTB translation state
deferred_inst->translationStarted(false);
deferred_inst->translationCompleted(false);

deferred_inst->clearCanIssue();
cacheMissLdInsts.insert(deferred_inst);
}

void
InstructionQueue::blockMemInst(const DynInstPtr &blocked_inst)
{
Expand Down Expand Up @@ -901,6 +925,29 @@ InstructionQueue::getDeferredMemInstToExecute()
return nullptr;
}

DynInstPtr
InstructionQueue::getCacheMissInstToExecute()
{
for (auto it = cacheMissLdInsts.begin(); it != cacheMissLdInsts.end();
++it) {
if ((*it)->cacheRefilledAfterMiss() || (*it)->isSquashed()) {
DPRINTF(IQ, "CacheMissed load inst [sn:%llu] PC %s is ready to "
"execute\n", (*it)->seqNum, (*it)->pcState());
DynInstPtr mem_inst = std::move(*it);
cacheMissLdInsts.erase(it);
return mem_inst;
}
if (!(*it)->cacheRefilledAfterMiss()) {
DPRINTF(
IQ,
"CacheMissed load inst [sn:%llu] PC %s has not been waken up "
"by Dcache\n",
(*it)->seqNum, (*it)->pcState());
}
}
return nullptr;
}

DynInstPtr
InstructionQueue::getBlockedMemInstToExecute()
{
Expand Down
21 changes: 21 additions & 0 deletions src/cpu/o3/inst_queue.hh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include <list>
#include <map>
#include <queue>
#include <unordered_set>
#include <vector>

#include "base/statistics.hh"
Expand Down Expand Up @@ -199,6 +200,11 @@ class InstructionQueue
*/
DynInstPtr getDeferredMemInstToExecute();

/** Gets a load instruction that was referred due to Dcache miss
* if it is now ready to execute. NULL if none available.
*/
DynInstPtr getCacheMissInstToExecute();

/** Gets a memory instruction that was blocked on the cache. NULL if none
* available.
*/
Expand Down Expand Up @@ -242,6 +248,11 @@ class InstructionQueue
*/
void deferMemInst(const DynInstPtr &deferred_inst);

/**
* Defers a load instruction when Dcache miss.
*/
void cacheMissLdReplay(const DynInstPtr &deferred_inst);

/** Defers a memory instruction when it is cache blocked. */
void blockMemInst(const DynInstPtr &blocked_inst);

Expand Down Expand Up @@ -302,6 +313,16 @@ class InstructionQueue
*/
std::list<DynInstPtr> deferredMemInsts;

/** Set of load instructions waiting for Dcache refill
* use unordered_set to prevent repeat enqueue,
* SplitDataRequest may call `cacheMissLdReplay` multiple times.
*/
struct CacheMissLdInstsHash
{
size_t operator()(const DynInstPtr& ptr) const;
};
std::unordered_set<DynInstPtr, CacheMissLdInstsHash> cacheMissLdInsts;

/** List of instructions that have been cache blocked. */
std::list<DynInstPtr> blockedMemInsts;

Expand Down
79 changes: 56 additions & 23 deletions src/cpu/o3/lsq.cc
Original file line number Diff line number Diff line change
Expand Up @@ -521,8 +521,23 @@ LSQ::recvFunctionalCustomSignal(PacketPtr pkt, int sig)

LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->getPrimarySenderState());
panic_if(!request, "Got packet back with unknown sender state\n");
if (sig == DcacheRespType::Miss) {
// notify cache miss
if (sig == DcacheRespType::Miss || sig == DcacheRespType::Block_Not_Ready) {
DPRINTF(LSQ, "recvFunctionalCustomSignal: Resp type: %d, [sn:%ld], lqidx: %ld\n",
sig, request->instruction()->seqNum, request->instruction()->lqIdx);
if (request->mainReq()->isLLSC() || request->mainReq()->isUncacheable()) {
// do not replay Amo/Uncache Load
DPRINTF(LSQ, "Recv Amo/Uncache Load: [sn:%ld], No Need to Replay\n",
request->instruction()->seqNum);
} else {
// clear state in this instruction
request->instruction()->cacheRefilledAfterMiss(false);
request->instruction()->effAddrValid(false);
// clear request in loadQueue
thread[request->_port.lsqID].loadQueue[request->instruction()->lqIdx].setRequest(nullptr);
// insert to missed load replay queue
iewStage->cacheMissLdReplay(request->instruction());
}
// cancel subsequent dependent insts of this load
iewStage->loadCancel(request->instruction());
} else {
panic("unsupported sig %d in recvFunctionalCustomSignal\n", sig);
Expand Down Expand Up @@ -1348,21 +1363,32 @@ LSQ::SbufferRequest::recvTimingResp(PacketPtr pkt)
bool
LSQ::SingleDataRequest::recvTimingResp(PacketPtr pkt)
{
LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
bool isNormalLd = isLoad() && !request->mainReq()->isLLSC() && !request->mainReq()->isUncacheable();
// Dump inst num, request addr, and packet addr
DPRINTF(LSQ, "Single Req::recvTimingResp: inst: %llu, pkt: %#lx\n", pkt->req->getReqInstSeqNum(),
pkt->getAddr());
DPRINTF(LSQ, "Single Req::recvTimingResp: inst: %llu, pkt: %#lx, isLoad: %d, "
"isLLSC: %d, isUncache: %d, isCacheSatisfied: %d\n",
pkt->req->getReqInstSeqNum(), pkt->getAddr(), isLoad(), request->mainReq()->isLLSC(),
request->mainReq()->isUncacheable(), pkt->cacheSatisfied);
assert(_numOutstandingPackets == 1);
flags.set(Flag::Complete);
assert(pkt == _packets.front());
forward();
_port.completeDataAccess(pkt);
_hasStaleTranslation = false;
if (isNormalLd && !pkt->cacheSatisfied) {
// Data in Dcache is ready, wake up missed load in replay queue
LSQRequest::_inst->cacheRefilledAfterMiss(true);
discard();
} else {
flags.set(Flag::Complete);
assert(pkt == _packets.front());
forward();
_port.completeDataAccess(pkt);
_hasStaleTranslation = false;
}
return true;
}

bool
LSQ::SplitDataRequest::recvTimingResp(PacketPtr pkt)
{
LSQRequest *request = dynamic_cast<LSQRequest*>(pkt->senderState);
DPRINTF(LSQ, "Spilt Req::recvTimingResp: inst: %llu, pkt: %#lx\n", pkt->req->getReqInstSeqNum(),
pkt->getAddr());
uint32_t pktIdx = 0;
Expand All @@ -1371,21 +1397,28 @@ LSQ::SplitDataRequest::recvTimingResp(PacketPtr pkt)
assert(pktIdx < _packets.size());
numReceivedPackets++;
if (numReceivedPackets == _packets.size()) {
flags.set(Flag::Complete);
/* Assemble packets. */
PacketPtr resp = isLoad()
? Packet::createRead(_mainReq)
: Packet::createWrite(_mainReq);
if (isLoad())
resp->dataStatic(_inst->memData);
else
resp->dataStatic(_data);
resp->senderState = this;
forward();
_port.completeDataAccess(resp);
delete resp;
bool isNormalLd = isLoad() && !request->mainReq()->isLLSC() && !request->mainReq()->isUncacheable();
if (isNormalLd && !pkt->cacheSatisfied) {
// Data in Dcache is ready, wake up missed load in replay queue
LSQRequest::_inst->cacheRefilledAfterMiss(true);
discard();
} else {
flags.set(Flag::Complete);
/* Assemble packets. */
PacketPtr resp = isLoad()
? Packet::createRead(_mainReq)
: Packet::createWrite(_mainReq);
if (isLoad())
resp->dataStatic(_inst->memData);
else
resp->dataStatic(_data);
resp->senderState = this;
forward();
_port.completeDataAccess(resp);
delete resp;
_hasStaleTranslation = false;
}
}
_hasStaleTranslation = false;
return true;
}

Expand Down
10 changes: 6 additions & 4 deletions src/mem/cache/base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -642,14 +642,16 @@ BaseCache::recvTimingReq(PacketPtr pkt)
}

handleTimingReqHit(pkt, blk, request_time, first_acc_after_pf);
if (cacheLevel == 1 && pkt->isResponse() && pkt->isRead() && lat > 1) {
// send cache miss signal
cpuSidePort.sendCustomSignal(pkt, DcacheRespType::Miss);
if (cacheLevel == 1 && pkt->isResponse() && pkt->isRead() && !pkt->isWrite() && lat > 1) {
// cache block not ready, send cancel signal
cpuSidePort.sendCustomSignal(pkt, DcacheRespType::Block_Not_Ready);
pkt->cacheSatisfied = false;
}
} else {
if (cacheLevel == 1 && pkt->needsResponse() && pkt->isRead()) {
if (cacheLevel == 1 && pkt->needsResponse() && pkt->isRead() && !pkt->isWrite()) {
// send cache miss signal
cpuSidePort.sendCustomSignal(pkt, DcacheRespType::Miss);
pkt->cacheSatisfied = false;
}

// ArchDB: for now we only track packet which has PC
Expand Down
2 changes: 2 additions & 0 deletions src/mem/packet.hh
Original file line number Diff line number Diff line change
Expand Up @@ -1598,6 +1598,8 @@ class Packet : public Printable

bool tagReadFail = false;

bool cacheSatisfied = true;

bool fromBOP() const { return pfSource == PrefetchSourceType::HWP_BOP; }

PrefetchSourceType getPFSource() const { return static_cast<PrefetchSourceType>(pfSource); }
Expand Down
1 change: 1 addition & 0 deletions src/mem/request.hh
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ enum DcacheRespType
{
NONE = 0,
Hit,
Block_Not_Ready,
Miss,
NUM_Resp_Type
};
Expand Down
1 change: 1 addition & 0 deletions src/mem/ruby/system/RubyPort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ RubyPort::ruby_custom_signal_callback(PacketPtr pkt)

DPRINTF(RubyPort, "Sent custom signal back to LSQ with sender state %#lx\n", sender_state);
port->sendCustomSignal(pkt, DcacheRespType::Miss);
pkt->cacheSatisfied = false;
}

void
Expand Down
6 changes: 3 additions & 3 deletions src/mem/ruby/system/Sequencer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ Sequencer::insertRequest(PacketPtr pkt, RubyRequestType primary_type,

if (seq_req_list.size() > 1) {
if (cache_block_busy) {
if (pkt->isRead()) {
if (pkt->isRead() && !pkt->isWrite()) {
DPRINTF(RubySequencer, "Pkt %#lx %s is delayed because blk is busy doing ruby stuff\n",
pkt, pkt->cmdString());
ruby_custom_signal_callback(pkt);
Expand Down Expand Up @@ -649,7 +649,7 @@ Sequencer::notifyMissCallback(Addr address, bool is_upgrade, bool is_busy)

// cancel pending loads' speculation
for (auto &seq_req: seq_req_list) {
if (seq_req.pkt->isRead()) {
if (seq_req.pkt->isRead() && !seq_req.pkt->isWrite()) {
ruby_custom_signal_callback(seq_req.pkt);
stat.loadcancel++;
}
Expand Down Expand Up @@ -693,7 +693,7 @@ Sequencer::TBEFullCancel(Addr address)

// cancel pending loads' speculation
for (auto &seq_req: seq_req_list) {
if (seq_req.pkt->isRead()) {
if (seq_req.pkt->isRead() && !seq_req.pkt->isWrite()) {
ruby_custom_signal_callback(seq_req.pkt);
stat.loadcancel++;
}
Expand Down