Skip to content

Commit

Permalink
[EVM] Use Keccak256 hash-ed linker symbols in LLVM MC/LLD
Browse files Browse the repository at this point in the history
  • Loading branch information
PavelKopyl committed Nov 19, 2024
1 parent 1cddc81 commit 65b5ed7
Show file tree
Hide file tree
Showing 10 changed files with 199 additions and 107 deletions.
73 changes: 45 additions & 28 deletions lld/lld-c/LLDAsLibraryC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ std::string getLinkerIndexedName(StringRef Name, unsigned SubIdx);
std::string getLinkerSymbolSectionName(StringRef Name);
std::string stripLinkerSymbolNameIndex(StringRef Name);
} // namespace EraVM

namespace EVM {
std::string getLinkerSymbolHash(StringRef SymName);
std::string getLinkerSymbolSectionName(StringRef Name);
std::string getDataSizeSymbol(StringRef SymbolName);
std::string getDataOffsetSymbol(StringRef SymbolName);
} // namespace EVM
} // namespace llvm

constexpr static unsigned linkerSubSymbolRelocSize = sizeof(uint32_t);
Expand Down Expand Up @@ -486,12 +493,10 @@ static std::string creteEVMLinkerScript(ArrayRef<LLVMMemoryBufferRef> memBufs,
ArrayRef<const char *> bufIDs) {
assert(memBufs.size() == bufIDs.size());
size_t numObjectsToLink = memBufs.size();
StringRef dataSizePrefix("__datasize_");
StringRef dataOffsetPrefix("__dataoffset_");

// Define the script part related to the top-level contract.
StringRef topName(bufIDs[0]);
StringRef deployed(bufIDs[1]);
std::string topName = EVM::getLinkerSymbolHash(bufIDs[0]);
std::string deployed = EVM::getLinkerSymbolHash(bufIDs[1]);

// Contains the linker script part corresponding to the top-level contract.
// For the example above, this contains:
Expand All @@ -500,10 +505,9 @@ static std::string creteEVMLinkerScript(ArrayRef<LLVMMemoryBufferRef> memBufs,
// D_105_deployed(.text);
// __datasize_D_105_deployed = . - __dataoffset_D_105_deployed;
std::string topLevel =
(topName + "(.text);\n" + dataOffsetPrefix + deployed + " = .;\n" +
deployed + "(.text);\n" + dataSizePrefix + deployed + " = . - " +
dataOffsetPrefix + deployed + ";\n")
.str();
topName + "(.text);\n" + EVM::getDataOffsetSymbol(deployed) + " = .;\n" +
deployed + "(.text);\n" + EVM::getDataSizeSymbol(deployed) + " = . - " +
EVM::getDataOffsetSymbol(deployed) + ";\n";

// Contains symbols whose values are the sizes of the dependent contracts.
// For the example above, this contains:
Expand All @@ -518,33 +522,40 @@ static std::string creteEVMLinkerScript(ArrayRef<LLVMMemoryBufferRef> memBufs,
// Define datasize symbols for the dependent contracts. They start after
// {deploy, deployed} pair of the top-level contract, i.e. at index 2.
for (unsigned idx = 2; idx < numObjectsToLink; ++idx)
symDatasizeDeps += (dataSizePrefix + bufIDs[idx] + " = " +
Twine(LLVMGetBufferSize(memBufs[idx])) + ";\n")
.str();
symDatasizeDeps +=
(EVM::getDataSizeSymbol(EVM::getLinkerSymbolHash(bufIDs[idx])) +
" = " + Twine(LLVMGetBufferSize(memBufs[idx])) + ";\n")
.str();

symDataOffsetDeps = (dataOffsetPrefix + bufIDs[2] + " = .;\n").str();
symDataOffsetDeps =
EVM::getDataOffsetSymbol(EVM::getLinkerSymbolHash(bufIDs[2])) +
" = .;\n";
for (unsigned idx = 3; idx < numObjectsToLink; ++idx)
symDataOffsetDeps +=
(dataOffsetPrefix + bufIDs[idx] + " = " + dataOffsetPrefix +
bufIDs[idx - 1] + " + " + dataSizePrefix + bufIDs[idx - 1] + ";\n")
.str();
EVM::getDataOffsetSymbol(EVM::getLinkerSymbolHash(bufIDs[idx])) +
" = " +
EVM::getDataOffsetSymbol(EVM::getLinkerSymbolHash(bufIDs[idx - 1])) +
" + " +
EVM::getDataSizeSymbol(EVM::getLinkerSymbolHash(bufIDs[idx - 1])) +
";\n";
}

// Contains a symbol whose value is the total size of the top-level contract
// with all the dependencies.
std::string symDatasizeTop = (dataSizePrefix + topName + " = ").str();
std::string symDatasizeTop = EVM::getDataSizeSymbol(topName) + " = ";
if (numObjectsToLink > 2)
symDatasizeTop += (dataOffsetPrefix + bufIDs.back() + " + " +
dataSizePrefix + bufIDs.back() + ";\n")
.str();
symDatasizeTop +=
EVM::getDataOffsetSymbol(EVM::getLinkerSymbolHash(bufIDs.back())) +
" + " +
EVM::getDataSizeSymbol(EVM::getLinkerSymbolHash(bufIDs.back())) + ";\n";
else
symDatasizeTop += ".;\n";

// Emit size of the deploy code offset as the 4-byte unsigned integer.
// This is needed to determine which offset the deployed code starts at
// in the linked binary.
std::string deploySize =
("LONG(" + dataOffsetPrefix + deployed + ");\n").str();
"LONG(" + EVM::getDataOffsetSymbol(deployed) + ");\n";

std::string script = formatv("{0}\n\
ENTRY(0);\n\
Expand All @@ -570,16 +581,21 @@ LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef inBuffers[],
assert(numInBuffers > 1);
SmallVector<MemoryBufferRef> localInMemBufRefs(3);
SmallVector<std::unique_ptr<MemoryBuffer>> localInMemBufs(3);

// TODO: verify that the object files contain sections with original
// inBuffersIDs, i.e. before taking hash.
for (unsigned idx = 0; idx < 2; ++idx) {
MemoryBufferRef ref = *unwrap(inBuffers[idx]);
localInMemBufs[idx] =
MemoryBuffer::getMemBuffer(ref.getBuffer(), inBuffersIDs[idx],
/*RequiresNullTerminator*/ false);
// We need to copy buffers to be able to change their names, as this matters
// for the linker.
localInMemBufs[idx] = MemoryBuffer::getMemBufferCopy(
ref.getBuffer(), EVM::getLinkerSymbolHash(inBuffersIDs[idx]));
localInMemBufRefs[idx] = localInMemBufs[idx]->getMemBufferRef();
}

std::string linkerScript = creteEVMLinkerScript(
ArrayRef(inBuffers, numInBuffers), ArrayRef(inBuffersIDs, numInBuffers));

std::unique_ptr<MemoryBuffer> scriptBuf =
MemoryBuffer::getMemBuffer(linkerScript, "script.x");
localInMemBufRefs[2] = scriptBuf->getMemBufferRef();
Expand All @@ -592,19 +608,20 @@ LLVMBool LLVMLinkEVM(LLVMMemoryBufferRef inBuffers[],
// Use remapping of file names (a linker feature) to replace file names with
// indexes in the array of memory buffers.
const std::string remapStr("--remap-inputs=");
std::string remapDeployStr = remapStr + inBuffersIDs[0] + "=0";
std::string topHash = EVM::getLinkerSymbolHash(inBuffersIDs[0]);
std::string deployedHash = EVM::getLinkerSymbolHash(inBuffersIDs[1]);
std::string remapDeployStr = remapStr + topHash + "=0";
lldArgs.push_back(remapDeployStr.c_str());

std::string remapDeployedStr = remapStr + inBuffersIDs[1] + "=1";
std::string remapDeployedStr = remapStr + deployedHash + "=1";
lldArgs.push_back(remapDeployedStr.c_str());

lldArgs.push_back("--remap-inputs=script.x=2");

// Deploy code
lldArgs.push_back(inBuffersIDs[0]);
lldArgs.push_back(topHash.c_str());
// Deployed code
lldArgs.push_back(inBuffersIDs[1]);

lldArgs.push_back(deployedHash.c_str());
lldArgs.push_back("--oformat=binary");

SmallString<0> codeString;
Expand Down
64 changes: 64 additions & 0 deletions llvm/lib/Target/EVM/EVMAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,20 @@

#include "EVMMCInstLower.h"
#include "EVMTargetMachine.h"
#include "MCTargetDesc/EVMMCTargetDesc.h"
#include "MCTargetDesc/EVMTargetStreamer.h"
#include "TargetInfo/EVMTargetInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/TargetRegistry.h"

using namespace llvm;

#define DEBUG_TYPE "asm-printer"
Expand All @@ -32,6 +39,8 @@ class EVMAsmPrinter : public AsmPrinter {
using VRegMap = DenseMap<unsigned, unsigned>;
using VRegRCMap = DenseMap<const TargetRegisterClass *, VRegMap>;
VRegRCMap VRegMapping;
// Maps linker symbol name to corresponding MCSymbol.
StringSet<> LinkerSymbolSet;

public:
EVMAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
Expand All @@ -50,6 +59,11 @@ class EVMAsmPrinter : public AsmPrinter {
/// fall-through.
bool isBlockOnlyReachableByFallthrough(
const MachineBasicBlock *MBB) const override;

void emitEndOfAsmFile(Module &) override;

private:
void emitLinkerSymbol(const MachineInstr *MI);
};
} // end of anonymous namespace

Expand Down Expand Up @@ -94,6 +108,13 @@ void EVMAsmPrinter::emitFunctionEntryLabel() {
void EVMAsmPrinter::emitInstruction(const MachineInstr *MI) {
EVMMCInstLower MCInstLowering(OutContext, *this, VRegMapping,
MF->getRegInfo());

unsigned Opc = MI->getOpcode();
if (Opc == EVM::DATASIZE_S || Opc == EVM::DATAOFFSET_S) {
emitLinkerSymbol(MI);
return;
}

MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
Expand All @@ -105,6 +126,49 @@ bool EVMAsmPrinter::isBlockOnlyReachableByFallthrough(
return false;
}

void EVMAsmPrinter::emitLinkerSymbol(const MachineInstr *MI) {
MCSymbol *LinkerSymbol = MI->getOperand(0).getMCSymbol();
StringRef LinkerSymbolName = LinkerSymbol->getName();
unsigned Opc = MI->getOpcode();

std::string SymbolNameHash = EVM::getLinkerSymbolHash(LinkerSymbolName);
std::string DataSymbolNameHash =
(Opc == EVM::DATASIZE_S) ? EVM::getDataSizeSymbol(SymbolNameHash)
: EVM::getDataOffsetSymbol(SymbolNameHash);

MCInst MCI;
if (Opc == EVM::DATASIZE_S || Opc == EVM::DATAOFFSET_S)
MCI.setOpcode(EVM::PUSH4_S);

MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VariantKind::VK_EVM_DATA;
MCOperand MCOp = MCOperand::createExpr(
MCSymbolRefExpr::create(DataSymbolNameHash, Kind, OutContext));
MCI.addOperand(MCOp);
EmitToStreamer(*OutStreamer, MCI);

// The linker symbol and the related section already exist, so just exit.
if (LinkerSymbolSet.contains(SymbolNameHash))
return;

LinkerSymbolSet.insert(SymbolNameHash);

MCSection *CurrentSection = OutStreamer->getCurrentSectionOnly();

// Emit the .linker_symbol_name section that contains the actual symbol
// name. We can use this to verify the hashed name really correspond to the
// original symbol name.
std::string LinkerSymbolSectionName =
EVM::getLinkerSymbolSectionName(SymbolNameHash);
MCSection *LinkerSymbolSection = OutContext.getELFSection(
LinkerSymbolSectionName, ELF::SHT_PROGBITS, ELF::SHF_STRINGS);
OutStreamer->switchSection(LinkerSymbolSection);
OutStreamer->emitBytes(LinkerSymbolName);

OutStreamer->switchSection(CurrentSection);
}

void EVMAsmPrinter::emitEndOfAsmFile(Module &) { LinkerSymbolSet.clear(); }

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMAsmPrinter() {
const RegisterAsmPrinter<EVMAsmPrinter> X(getTheEVMTarget());
}
31 changes: 12 additions & 19 deletions llvm/lib/Target/EVM/EVMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,28 +153,21 @@ SDValue EVMTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
default:
return SDValue();
case Intrinsic::evm_datasize:
case Intrinsic::evm_dataoffset:
return lowerIntrinsicDataSize(IntrID, Op, DAG);
case Intrinsic::evm_dataoffset: {
const SDLoc DL(Op);
EVT Ty = Op.getValueType();
MachineFunction &MF = DAG.getMachineFunction();
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
StringRef ContractID = cast<MDString>(Metadata->getOperand(0))->getString();
MCSymbol *Sym = MF.getContext().getOrCreateSymbol(ContractID);
unsigned Opc =
(IntrID == Intrinsic::evm_datasize) ? EVM::DATASIZE : EVM::DATAOFFSET;
return SDValue(
DAG.getMachineNode(Opc, DL, Ty, DAG.getMCSymbol(Sym, MVT::i256)), 0);
} break;
}
}

SDValue EVMTargetLowering::lowerIntrinsicDataSize(unsigned IntrID, SDValue Op,
SelectionDAG &DAG) const {
const SDLoc DL(Op);
EVT Ty = Op.getValueType();

MachineFunction &MF = DAG.getMachineFunction();
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
StringRef ContractID = cast<MDString>(Metadata->getOperand(0))->getString();
bool IsDataSize = IntrID == Intrinsic::evm_datasize;
std::string SymbolReloc =
(Twine(IsDataSize ? "__datasize_" : "__dataoffset_") + ContractID).str();
MCSymbol *Sym = MF.getContext().getOrCreateSymbol(SymbolReloc);
return SDValue(
DAG.getMachineNode(EVM::DATA, DL, Ty, DAG.getMCSymbol(Sym, MVT::i256)),
0);
}

SDValue EVMTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
const SDLoc DL(Op);
auto *Load = cast<LoadSDNode>(Op);
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/EVM/EVMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,6 @@ class EVMTargetLowering final : public TargetLowering {

SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;

SDValue lowerIntrinsicDataSize(unsigned IntrID, SDValue Op,
SelectionDAG &DAG) const;

SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;

SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/Target/EVM/EVMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1122,7 +1122,12 @@ def PUSH32_S : NI<(outs), (ins i256imm:$imm), [], true, "PUSH32 $imm",
}

// Pseudo instructions for linkage
let isCodeGenOnly = 1, BaseName = "DATA" in {
def DATA : NI<(outs GPR:$dst), (ins jmptarget:$reloc), [], false, "", 0, 0>;
def DATA_S : NI<(outs), (ins jmptarget:$reloc), [], true, "", 0, 0>;
let isCodeGenOnly = 1, BaseName = "DATASIZE" in {
def DATASIZE : NI<(outs GPR:$dst), (ins jmptarget:$reloc), [], false, "", 0, 0>;
def DATASIZE_S : NI<(outs), (ins jmptarget:$reloc), [], true, "", 0, 0>;
}

let isCodeGenOnly = 1, BaseName = "DATAOFFSET" in {
def DATAOFFSET : NI<(outs GPR:$dst), (ins jmptarget:$reloc), [], false, "", 0, 0>;
def DATAOFFSET_S : NI<(outs), (ins jmptarget:$reloc), [], true, "", 0, 0>;
}
8 changes: 5 additions & 3 deletions llvm/lib/Target/EVM/EVMMCInstLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ static void stackifyInstruction(const MachineInstr *MI, MCInst &OutMI) {

// Set up final opcodes for the following codegen-only instructions.
unsigned Opcode = OutMI.getOpcode();
if (Opcode == EVM::PUSH_LABEL || Opcode == EVM::DATA_S)
if (Opcode == EVM::PUSH_LABEL)
OutMI.setOpcode(EVM::PUSH4_S);

// Check that all the instructions are in the 'stack' form.
Expand Down Expand Up @@ -125,9 +125,11 @@ void EVMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) {
} break;
case MachineOperand::MO_MCSymbol: {
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VariantKind::VK_None;
#ifndef NDEBUG
unsigned Opc = MI->getOpcode();
if (Opc == EVM::DATA_S)
Kind = MCSymbolRefExpr::VariantKind::VK_EVM_DATA;
// We handle the linkage-related instructions in the EVMAsmPrinter.
assert(Opc != EVM::DATASIZE_S && Opc != EVM::DATAOFFSET_S);
#endif // NDEBUG

MCOp = MCOperand::createExpr(
MCSymbolRefExpr::create(MO.getMCSymbol(), Kind, Ctx));
Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/Target/EVM/MCTargetDesc/EVMMCTargetDesc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
#include "EVMMCAsmInfo.h"
#include "EVMTargetStreamer.h"
#include "TargetInfo/EVMTargetInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/KECCAK.h"

using namespace llvm;

Expand Down Expand Up @@ -114,3 +116,25 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeEVMTargetMC() {
// Register the null target streamer.
TargetRegistry::RegisterNullTargetStreamer(T, createEVMNullTargetStreamer);
}

// Returs a string of the following format:
// '__$KECCAK256(SymName)$__'
std::string EVM::getLinkerSymbolHash(StringRef SymName) {
std::array<uint8_t, 32> Hash = KECCAK::KECCAK_256(SymName);
SmallString<72> HexHash;
toHex(Hash, /*LowerCase*/ true, HexHash);
return (Twine("__$") + HexHash + "$__").str();
}

// Returns concatenation of '.linker_symbol_name' of the \p Name.
std::string EVM::getLinkerSymbolSectionName(StringRef Name) {
return (Twine(".linker_symbol_name") + Name).str();
}

std::string EVM::getDataSizeSymbol(StringRef SymbolName) {
return (Twine("__datasize_") + SymbolName).str();
}

std::string EVM::getDataOffsetSymbol(StringRef SymbolName) {
return (Twine("__dataoffset_") + SymbolName).str();
}
Loading

0 comments on commit 65b5ed7

Please sign in to comment.