Skip to content

Commit

Permalink
[EVM] Use Keccak256 hash-ed linker symbols in LLVM MC/LLD
Browse files Browse the repository at this point in the history
  • Loading branch information
PavelKopyl committed Nov 21, 2024
1 parent 6d6934d commit 6e65427
Show file tree
Hide file tree
Showing 10 changed files with 179 additions and 115 deletions.
94 changes: 58 additions & 36 deletions lld/lld-c/LLDAsLibraryC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ std::string getLinkerIndexedName(StringRef Name, unsigned SubIdx);
std::string getLinkerSymbolSectionName(StringRef Name);
std::string stripLinkerSymbolNameIndex(StringRef Name);
} // namespace EraVM

namespace EVM {
std::string getLinkerSymbolHash(StringRef SymName);
std::string getLinkerSymbolSectionName(StringRef Name);
std::string getDataSizeSymbol(StringRef SymbolName);
std::string getDataOffsetSymbol(StringRef SymbolName);
} // namespace EVM
} // namespace llvm

constexpr static unsigned linkerSubSymbolRelocSize = sizeof(uint32_t);
Expand Down Expand Up @@ -486,67 +493,76 @@ static std::string creteEVMLinkerScript(ArrayRef<LLVMMemoryBufferRef> memBufs,
ArrayRef<const char *> bufIDs) {
assert(memBufs.size() == bufIDs.size());
size_t numObjectsToLink = memBufs.size();
StringRef dataSizePrefix("__datasize_");
StringRef dataOffsetPrefix("__dataoffset_");

auto getDataOffsetName = [](StringRef name) {
return EVM::getDataOffsetSymbol(EVM::getLinkerSymbolHash(name));
};
auto getDataSizeName = [](StringRef name) {
return EVM::getDataSizeSymbol(EVM::getLinkerSymbolHash(name));
};

// Define the script part related to the top-level contract.
StringRef topName(bufIDs[0]);
StringRef deployed(bufIDs[1]);
std::string topName = EVM::getLinkerSymbolHash(bufIDs[0]);
std::string deployed = EVM::getLinkerSymbolHash(bufIDs[1]);

// Contains the linker script part corresponding to the top-level contract.
// For the example above, this contains:
// D_105(.text);
// __dataoffset_D_105_deployed = .;
// D_105_deployed(.text);
// __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
std::string topLevel =
(topName + "(.text);\n" + dataOffsetPrefix + deployed + " = .;\n" +
deployed + "(.text);\n" + dataSizePrefix + deployed + " = . - " +
dataOffsetPrefix + deployed + ";\n")
.str();
std::string topLevelBuf;
raw_string_ostream topLevel(topLevelBuf);
topLevel << topName << "(.text);\n"
<< EVM::getDataOffsetSymbol(deployed) << " = .;\n"
<< deployed << "(.text);\n"
<< EVM::getDataSizeSymbol(deployed) << " = . - "
<< EVM::getDataOffsetSymbol(deployed) + ";\n";

// Contains symbols whose values are the sizes of the dependent contracts.
// For the example above, this contains:
// __datasize_B_40 = 1384;
std::string symDatasizeDeps;
std::string dataSizeBuf;
raw_string_ostream symDatasizeDeps(dataSizeBuf);

// Contains symbols whose values are the offsets of the dependent contracts.
// For the example above, this contains:
// __dataoffset_B_40 = .;
std::string symDataOffsetDeps;
std::string dataOffsetBuf;
raw_string_ostream symDataOffsetDeps(dataOffsetBuf);
if (numObjectsToLink > 2) {
// Define datasize symbols for the dependent contracts. They start after
// {deploy, deployed} pair of the top-level contract, i.e. at index 2.
for (unsigned idx = 2; idx < numObjectsToLink; ++idx)
symDatasizeDeps += (dataSizePrefix + bufIDs[idx] + " = " +
Twine(LLVMGetBufferSize(memBufs[idx])) + ";\n")
.str();
symDatasizeDeps << getDataSizeName(bufIDs[idx]) << " = "
<< LLVMGetBufferSize(memBufs[idx]) << ";\n";

symDataOffsetDeps = (dataOffsetPrefix + bufIDs[2] + " = .;\n").str();
symDataOffsetDeps << getDataOffsetName(bufIDs[2]) << " = .;\n";
for (unsigned idx = 3; idx < numObjectsToLink; ++idx)
symDataOffsetDeps +=
(dataOffsetPrefix + bufIDs[idx] + " = " + dataOffsetPrefix +
bufIDs[idx - 1] + " + " + dataSizePrefix + bufIDs[idx - 1] + ";\n")
.str();
symDataOffsetDeps << getDataOffsetName(bufIDs[idx]) << " = "
<< getDataOffsetName(bufIDs[idx - 1]) << " + "
<< getDataSizeName(bufIDs[idx - 1]) << ";\n";
}

// Contains a symbol whose value is the total size of the top-level contract
// with all the dependencies.
std::string symDatasizeTop = (dataSizePrefix + topName + " = ").str();
std::string dataSizeTopBuf;
raw_string_ostream symDatasizeTop(dataSizeTopBuf);
symDatasizeTop << EVM::getDataSizeSymbol(topName) << " = ";
if (numObjectsToLink > 2)
symDatasizeTop += (dataOffsetPrefix + bufIDs.back() + " + " +
dataSizePrefix + bufIDs.back() + ";\n")
.str();
symDatasizeTop << getDataOffsetName(bufIDs.back()) << " + "
<< getDataSizeName(bufIDs.back()) << ";\n";
else
symDatasizeTop += ".;\n";
symDatasizeTop << ".;\n";

// Emit size of the deploy code offset as the 4-byte unsigned integer.
// This is needed to determine which offset the deployed code starts at
// in the linked binary.
std::string deploySize =
("LONG(" + dataOffsetPrefix + deployed + ");\n").str();
"LONG(" + EVM::getDataOffsetSymbol(deployed) + ");\n";

std::string script = formatv("{0}\n\
std::string script =
formatv("{0}\n\
ENTRY(0);\n\
SECTIONS {\n\
. = 0;\n\
Expand All @@ -558,8 +574,8 @@ SECTIONS {\n\
}\n\
}\n\
",
symDatasizeDeps, topLevel, symDataOffsetDeps,
symDatasizeTop, deploySize);
symDatasizeDeps.str(), topLevel.str(), symDataOffsetDeps.str(),
symDatasizeTop.str(), deploySize);

return script;
}
Expand All @@ -570,16 +586,21 @@ LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef inBuffers[],
assert(numInBuffers > 1);
SmallVector<MemoryBufferRef> localInMemBufRefs(3);
SmallVector<std::unique_ptr<MemoryBuffer>> localInMemBufs(3);

// TODO: #740. Verify that the object files contain sections with original
// inBuffersIDs, i.e. before taking hash.
for (unsigned idx = 0; idx < 2; ++idx) {
MemoryBufferRef ref = *unwrap(inBuffers[idx]);
localInMemBufs[idx] =
MemoryBuffer::getMemBuffer(ref.getBuffer(), inBuffersIDs[idx],
/*RequiresNullTerminator*/ false);
// We need to copy buffers to be able to change their names, as this matters
// for the linker.
localInMemBufs[idx] = MemoryBuffer::getMemBufferCopy(
ref.getBuffer(), EVM::getLinkerSymbolHash(inBuffersIDs[idx]));
localInMemBufRefs[idx] = localInMemBufs[idx]->getMemBufferRef();
}

std::string linkerScript = creteEVMLinkerScript(
ArrayRef(inBuffers, numInBuffers), ArrayRef(inBuffersIDs, numInBuffers));

std::unique_ptr<MemoryBuffer> scriptBuf =
MemoryBuffer::getMemBuffer(linkerScript, "script.x");
localInMemBufRefs[2] = scriptBuf->getMemBufferRef();
Expand All @@ -592,19 +613,20 @@ LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef inBuffers[],
// Use remapping of file names (a linker feature) to replace file names with
// indexes in the array of memory buffers.
const std::string remapStr("--remap-inputs=");
std::string remapDeployStr = remapStr + inBuffersIDs[0] + "=0";
std::string topHash = EVM::getLinkerSymbolHash(inBuffersIDs[0]);
std::string deployedHash = EVM::getLinkerSymbolHash(inBuffersIDs[1]);
std::string remapDeployStr = remapStr + topHash + "=0";
lldArgs.push_back(remapDeployStr.c_str());

std::string remapDeployedStr = remapStr + inBuffersIDs[1] + "=1";
std::string remapDeployedStr = remapStr + deployedHash + "=1";
lldArgs.push_back(remapDeployedStr.c_str());

lldArgs.push_back("--remap-inputs=script.x=2");

// Deploy code
lldArgs.push_back(inBuffersIDs[0]);
lldArgs.push_back(topHash.c_str());
// Deployed code
lldArgs.push_back(inBuffersIDs[1]);

lldArgs.push_back(deployedHash.c_str());
lldArgs.push_back("--oformat=binary");

SmallString<0> codeString;
Expand Down
37 changes: 37 additions & 0 deletions llvm/lib/Target/EVM/EVMAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,20 @@

#include "EVMMCInstLower.h"
#include "EVMTargetMachine.h"
#include "MCTargetDesc/EVMMCTargetDesc.h"
#include "MCTargetDesc/EVMTargetStreamer.h"
#include "TargetInfo/EVMTargetInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/TargetRegistry.h"

using namespace llvm;

#define DEBUG_TYPE "asm-printer"
Expand Down Expand Up @@ -50,6 +57,9 @@ class EVMAsmPrinter : public AsmPrinter {
/// fall-through.
bool isBlockOnlyReachableByFallthrough(
const MachineBasicBlock *MBB) const override;

private:
void emitLinkerSymbol(const MachineInstr *MI);
};
} // end of anonymous namespace

Expand Down Expand Up @@ -94,6 +104,13 @@ void EVMAsmPrinter::emitFunctionEntryLabel() {
void EVMAsmPrinter::emitInstruction(const MachineInstr *MI) {
EVMMCInstLower MCInstLowering(OutContext, *this, VRegMapping,
MF->getRegInfo());

unsigned Opc = MI->getOpcode();
if (Opc == EVM::DATASIZE_S || Opc == EVM::DATAOFFSET_S) {
emitLinkerSymbol(MI);
return;
}

MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
Expand All @@ -105,6 +122,26 @@ bool EVMAsmPrinter::isBlockOnlyReachableByFallthrough(
return false;
}

void EVMAsmPrinter::emitLinkerSymbol(const MachineInstr *MI) {
MCSymbol *LinkerSymbol = MI->getOperand(0).getMCSymbol();
StringRef LinkerSymbolName = LinkerSymbol->getName();
unsigned Opc = MI->getOpcode();
assert(Opc == EVM::DATASIZE_S || Opc == EVM::DATAOFFSET_S);

std::string SymbolNameHash = EVM::getLinkerSymbolHash(LinkerSymbolName);
std::string DataSymbolNameHash =
(Opc == EVM::DATASIZE_S) ? EVM::getDataSizeSymbol(SymbolNameHash)
: EVM::getDataOffsetSymbol(SymbolNameHash);

MCInst MCI;
MCI.setOpcode(EVM::PUSH4_S);
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VariantKind::VK_EVM_DATA;
MCOperand MCOp = MCOperand::createExpr(
MCSymbolRefExpr::create(DataSymbolNameHash, Kind, OutContext));
MCI.addOperand(MCOp);
EmitToStreamer(*OutStreamer, MCI);
}

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMAsmPrinter() {
const RegisterAsmPrinter<EVMAsmPrinter> X(getTheEVMTarget());
}
31 changes: 12 additions & 19 deletions llvm/lib/Target/EVM/EVMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,28 +153,21 @@ SDValue EVMTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
default:
return SDValue();
case Intrinsic::evm_datasize:
case Intrinsic::evm_dataoffset:
return lowerIntrinsicDataSize(IntrID, Op, DAG);
case Intrinsic::evm_dataoffset: {
const SDLoc DL(Op);
EVT Ty = Op.getValueType();
MachineFunction &MF = DAG.getMachineFunction();
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
StringRef ContractID = cast<MDString>(Metadata->getOperand(0))->getString();
MCSymbol *Sym = MF.getContext().getOrCreateSymbol(ContractID);
unsigned Opc =
(IntrID == Intrinsic::evm_datasize) ? EVM::DATASIZE : EVM::DATAOFFSET;
return SDValue(
DAG.getMachineNode(Opc, DL, Ty, DAG.getMCSymbol(Sym, MVT::i256)), 0);
} break;
}
}

SDValue EVMTargetLowering::lowerIntrinsicDataSize(unsigned IntrID, SDValue Op,
SelectionDAG &DAG) const {
const SDLoc DL(Op);
EVT Ty = Op.getValueType();

MachineFunction &MF = DAG.getMachineFunction();
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
StringRef ContractID = cast<MDString>(Metadata->getOperand(0))->getString();
bool IsDataSize = IntrID == Intrinsic::evm_datasize;
std::string SymbolReloc =
(Twine(IsDataSize ? "__datasize_" : "__dataoffset_") + ContractID).str();
MCSymbol *Sym = MF.getContext().getOrCreateSymbol(SymbolReloc);
return SDValue(
DAG.getMachineNode(EVM::DATA, DL, Ty, DAG.getMCSymbol(Sym, MVT::i256)),
0);
}

SDValue EVMTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
const SDLoc DL(Op);
auto *Load = cast<LoadSDNode>(Op);
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/EVM/EVMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,6 @@ class EVMTargetLowering final : public TargetLowering {

SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;

SDValue lowerIntrinsicDataSize(unsigned IntrID, SDValue Op,
SelectionDAG &DAG) const;

SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/Target/EVM/EVMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1122,7 +1122,12 @@ def PUSH32_S : NI<(outs), (ins i256imm:$imm), [], true, "PUSH32 $imm",
}

// Pseudo instructions for linkage
let isCodeGenOnly = 1, BaseName = "DATA" in {
def DATA : NI<(outs GPR:$dst), (ins jmptarget:$reloc), [], false, "", 0, 0>;
def DATA_S : NI<(outs), (ins jmptarget:$reloc), [], true, "", 0, 0>;
let isCodeGenOnly = 1, BaseName = "DATASIZE" in {
def DATASIZE : NI<(outs GPR:$dst), (ins jmptarget:$reloc), [], false, "", 0, 0>;
def DATASIZE_S : NI<(outs), (ins jmptarget:$reloc), [], true, "", 0, 0>;
}

let isCodeGenOnly = 1, BaseName = "DATAOFFSET" in {
def DATAOFFSET : NI<(outs GPR:$dst), (ins jmptarget:$reloc), [], false, "", 0, 0>;
def DATAOFFSET_S : NI<(outs), (ins jmptarget:$reloc), [], true, "", 0, 0>;
}
8 changes: 5 additions & 3 deletions llvm/lib/Target/EVM/EVMMCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ static void stackifyInstruction(const MachineInstr *MI, MCInst &OutMI) {

// Set up final opcodes for the following codegen-only instructions.
unsigned Opcode = OutMI.getOpcode();
if (Opcode == EVM::PUSH_LABEL || Opcode == EVM::DATA_S)
if (Opcode == EVM::PUSH_LABEL)
OutMI.setOpcode(EVM::PUSH4_S);

// Check that all the instructions are in the 'stack' form.
Expand Down Expand Up @@ -125,9 +125,11 @@ void EVMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) {
} break;
case MachineOperand::MO_MCSymbol: {
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VariantKind::VK_None;
#ifndef NDEBUG
unsigned Opc = MI->getOpcode();
if (Opc == EVM::DATA_S)
Kind = MCSymbolRefExpr::VariantKind::VK_EVM_DATA;
// We handle the linkage-related instructions in the EVMAsmPrinter.
assert(Opc != EVM::DATASIZE_S && Opc != EVM::DATAOFFSET_S);
#endif // NDEBUG

MCOp = MCOperand::createExpr(
MCSymbolRefExpr::create(MO.getMCSymbol(), Kind, Ctx));
Expand Down
19 changes: 19 additions & 0 deletions llvm/lib/Target/EVM/MCTargetDesc/EVMMCTargetDesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#include "EVMMCAsmInfo.h"
#include "EVMTargetStreamer.h"
#include "TargetInfo/EVMTargetInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/KECCAK.h"

using namespace llvm;

Expand Down Expand Up @@ -114,3 +116,20 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMTargetMC() {
// Register the null target streamer.
TargetRegistry::RegisterNullTargetStreamer(T, createEVMNullTargetStreamer);
}

// Returs a string of the following format:
// '__$KECCAK256(SymName)$__'
std::string EVM::getLinkerSymbolHash(StringRef SymName) {
std::array<uint8_t, 32> Hash = KECCAK::KECCAK_256(SymName);
SmallString<72> HexHash;
toHex(Hash, /*LowerCase*/ true, HexHash);
return (Twine("__$") + HexHash + "$__").str();
}

std::string EVM::getDataSizeSymbol(StringRef SymbolName) {
return (Twine("__datasize") + SymbolName).str();
}

std::string EVM::getDataOffsetSymbol(StringRef SymbolName) {
return (Twine("__dataoffset") + SymbolName).str();
}
6 changes: 6 additions & 0 deletions llvm/lib/Target/EVM/MCTargetDesc/EVMMCTargetDesc.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_EVM_MCTARGETDESC_EVMMCTARGETDESC_H
#define LLVM_LIB_TARGET_EVM_MCTARGETDESC_EVMMCTARGETDESC_H

#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
#include <memory>

Expand All @@ -36,6 +37,11 @@ MCAsmBackend *createEVMMCAsmBackend(const Target &T, const MCSubtargetInfo &STI,

std::unique_ptr<MCObjectTargetWriter> createEVMELFObjectWriter(uint8_t OSABI);

namespace EVM {
std::string getLinkerSymbolHash(StringRef SymName);
std::string getDataSizeSymbol(StringRef SymbolName);
std::string getDataOffsetSymbol(StringRef SymbolName);
} // namespace EVM
} // namespace llvm

// Defines symbolic names for EVM registers.
Expand Down
Loading

0 comments on commit 6e65427

Please sign in to comment.