Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cgo: support more macros #4607

Merged
merged 2 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cgo/cgo.go
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,7 @@ func (f *cgoFile) getASTDeclName(name string, found clangCursor, iscall bool) st
if alias := cgoAliases["C."+name]; alias != "" {
return alias
}
node := f.getASTDeclNode(name, found, iscall)
node := f.getASTDeclNode(name, found)
if node, ok := node.(*ast.FuncDecl); ok {
if !iscall {
return node.Name.Name + "$funcaddr"
Expand All @@ -1160,7 +1160,7 @@ func (f *cgoFile) getASTDeclName(name string, found clangCursor, iscall bool) st

// getASTDeclNode will declare the given C AST node (if not already defined) and
// returns it.
func (f *cgoFile) getASTDeclNode(name string, found clangCursor, iscall bool) ast.Node {
func (f *cgoFile) getASTDeclNode(name string, found clangCursor) ast.Node {
if node, ok := f.defined[name]; ok {
// Declaration was found in the current file, so return it immediately.
return node
Expand Down
140 changes: 136 additions & 4 deletions cgo/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,72 @@ func init() {
}

// parseConst parses the given string as a C constant.
func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) {
t := newTokenizer(pos, fset, value)
func parseConst(pos token.Pos, fset *token.FileSet, value string, params []ast.Expr, callerPos token.Pos, f *cgoFile) (ast.Expr, *scanner.Error) {
t := newTokenizer(pos, fset, value, f)

// If params is non-nil (could be a zero length slice), this const is
// actually a function-call like expression from another macro.
// This means we have to parse a string like "(a, b) (a+b)".
// We do this by parsing the parameters at the start and then treating the
// following like a normal constant expression.
if params != nil {
// Parse opening paren.
if t.curToken != token.LPAREN {
return nil, unexpectedToken(t, token.LPAREN)
}
t.Next()

// Parse parameters (identifiers) and closing paren.
var paramIdents []string
for i := 0; ; i++ {
if i == 0 && t.curToken == token.RPAREN {
// No parameters, break early.
t.Next()
break
}

// Read the parameter name.
if t.curToken != token.IDENT {
return nil, unexpectedToken(t, token.IDENT)
}
paramIdents = append(paramIdents, t.curValue)
t.Next()

// Read the next token: either a continuation (comma) or end of list
// (rparen).
if t.curToken == token.RPAREN {
// End of parameter list.
t.Next()
break
} else if t.curToken == token.COMMA {
// Comma, so there will be another parameter name.
t.Next()
} else {
return nil, &scanner.Error{
Pos: t.fset.Position(t.curPos),
Msg: "unexpected token " + t.curToken.String() + " inside macro parameters, expected ',' or ')'",
}
}
}

// Report an error if there is a mismatch in parameter length.
// The error is reported at the location of the closing paren from the
// caller location.
if len(params) != len(paramIdents) {
return nil, &scanner.Error{
Pos: t.fset.Position(callerPos),
Msg: fmt.Sprintf("unexpected number of parameters: expected %d, got %d", len(paramIdents), len(params)),
}
}

// Assign values to the parameters.
// These parameter names are closer in 'scope' than other identifiers so
// will be used first when parsing an identifier.
for i, name := range paramIdents {
t.params[name] = params[i]
}
}

expr, err := parseConstExpr(t, precedenceLowest)
t.Next()
if t.curToken != token.EOF {
Expand Down Expand Up @@ -96,6 +160,68 @@ func parseConstExpr(t *tokenizer, precedence int) (ast.Expr, *scanner.Error) {
}

func parseIdent(t *tokenizer) (ast.Expr, *scanner.Error) {
// If the identifier is one of the parameters of this function-like macro,
// use the parameter value.
if val, ok := t.params[t.curValue]; ok {
return val, nil
}

if t.f != nil {
// Check whether this identifier is actually a macro "call" with
// parameters. In that case, we should parse the parameters and pass it
// on to a new invocation of parseConst.
if t.peekToken == token.LPAREN {
if cursor, ok := t.f.names[t.curValue]; ok && t.f.isFunctionLikeMacro(cursor) {
// We know the current and peek tokens (the peek one is the '('
// token). So skip ahead until the current token is the first
// unknown token.
t.Next()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Next() called twice - is it intentional? If so, would be good to have a comment here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes this is intentional. There is the "cur" (current) token and the "peek" (next) token. At this point we know the current token and the peek token, and we want to skip ahead so that the "current" token is the first unknown token. Hence two Next() calls.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment that should hopefully clarify this.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, makes sense 👌

t.Next()

// Parse the list of parameters until ')' (rparen) is found.
params := []ast.Expr{}
for i := 0; ; i++ {
if i == 0 && t.curToken == token.RPAREN {
break
}
x, err := parseConstExpr(t, precedenceLowest)
if err != nil {
return nil, err
}
params = append(params, x)
t.Next()
if t.curToken == token.COMMA {
t.Next()
} else if t.curToken == token.RPAREN {
break
} else {
return nil, &scanner.Error{
Pos: t.fset.Position(t.curPos),
Msg: "unexpected token " + t.curToken.String() + ", ',' or ')'",
}
}
}

// Evaluate the macro value and use it as the identifier value.
rparen := t.curPos
pos, text := t.f.getMacro(cursor)
return parseConst(pos, t.fset, text, params, rparen, t.f)
}
}

// Normally the name is something defined in the file (like another
// macro) which we get the declaration from using getASTDeclName.
// This ensures that names that are only referenced inside a macro are
// still getting defined.
if cursor, ok := t.f.names[t.curValue]; ok {
return &ast.Ident{
NamePos: t.curPos,
Name: t.f.getASTDeclName(t.curValue, cursor, false),
}, nil
}
}

// t.f is nil during testing. This is a fallback.
return &ast.Ident{
NamePos: t.curPos,
Name: "C." + t.curValue,
Expand Down Expand Up @@ -164,21 +290,25 @@ func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {

// tokenizer reads C source code and converts it to Go tokens.
type tokenizer struct {
f *cgoFile
curPos, peekPos token.Pos
fset *token.FileSet
curToken, peekToken token.Token
curValue, peekValue string
buf string
params map[string]ast.Expr
}

// newTokenizer initializes a new tokenizer, positioned at the first token in
// the string.
func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer {
func newTokenizer(start token.Pos, fset *token.FileSet, buf string, f *cgoFile) *tokenizer {
t := &tokenizer{
f: f,
peekPos: start,
fset: fset,
buf: buf,
peekToken: token.ILLEGAL,
params: make(map[string]ast.Expr),
}
// Parse the first two tokens (cur and peek).
t.Next()
Expand Down Expand Up @@ -230,14 +360,16 @@ func (t *tokenizer) Next() {
t.peekValue = t.buf[:2]
t.buf = t.buf[2:]
return
case c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^':
case c == '(' || c == ')' || c == ',' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^':
// Single-character tokens.
// TODO: ++ (increment) and -- (decrement) operators.
switch c {
case '(':
t.peekToken = token.LPAREN
case ')':
t.peekToken = token.RPAREN
case ',':
t.peekToken = token.COMMA
case '+':
t.peekToken = token.ADD
case '-':
Expand Down
2 changes: 1 addition & 1 deletion cgo/const_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ func TestParseConst(t *testing.T) {
} {
fset := token.NewFileSet()
startPos := fset.AddFile("", -1, 1000).Pos(0)
expr, err := parseConst(startPos, fset, tc.C)
expr, err := parseConst(startPos, fset, tc.C, nil, token.NoPos, nil)
s := "<invalid>"
if err != nil {
if !strings.HasPrefix(tc.Go, "error: ") {
Expand Down
98 changes: 59 additions & 39 deletions cgo/libclang.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ long long tinygo_clang_getEnumConstantDeclValue(GoCXCursor c);
CXType tinygo_clang_getEnumDeclIntegerType(GoCXCursor c);
unsigned tinygo_clang_Cursor_isAnonymous(GoCXCursor c);
unsigned tinygo_clang_Cursor_isBitField(GoCXCursor c);
unsigned tinygo_clang_Cursor_isMacroFunctionLike(GoCXCursor c);

int tinygo_clang_globals_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data);
int tinygo_clang_struct_visitor(GoCXCursor c, GoCXCursor parent, CXClientData client_data);
Expand Down Expand Up @@ -370,45 +371,8 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) {
gen.Specs = append(gen.Specs, valueSpec)
return gen, nil
case C.CXCursor_MacroDefinition:
// Extract tokens from the Clang tokenizer.
// See: https://stackoverflow.com/a/19074846/559350
sourceRange := C.tinygo_clang_getCursorExtent(c)
tu := C.tinygo_clang_Cursor_getTranslationUnit(c)
var rawTokens *C.CXToken
var numTokens C.unsigned
C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens)
tokens := unsafe.Slice(rawTokens, numTokens)
// Convert this range of tokens back to source text.
// Ugly, but it works well enough.
sourceBuf := &bytes.Buffer{}
var startOffset int
for i, token := range tokens {
spelling := getString(C.clang_getTokenSpelling(tu, token))
location := C.clang_getTokenLocation(tu, token)
var tokenOffset C.unsigned
C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset)
if i == 0 {
// The first token is the macro name itself.
// Skip it (after using its location).
startOffset = int(tokenOffset) + len(name)
} else {
// Later tokens are the macro contents.
for int(tokenOffset) > (startOffset + sourceBuf.Len()) {
// Pad the source text with whitespace (that must have been
// present in the original source as well).
sourceBuf.WriteByte(' ')
}
sourceBuf.WriteString(spelling)
}
}
C.clang_disposeTokens(tu, rawTokens, numTokens)
value := sourceBuf.String()
// Try to convert this #define into a Go constant expression.
tokenPos := token.NoPos
if pos != token.NoPos {
tokenPos = pos + token.Pos(len(name))
}
expr, scannerError := parseConst(tokenPos, f.fset, value)
tokenPos, value := f.getMacro(c)
expr, scannerError := parseConst(tokenPos, f.fset, value, nil, token.NoPos, f)
if scannerError != nil {
f.errors = append(f.errors, *scannerError)
return nil, nil
Expand Down Expand Up @@ -488,6 +452,62 @@ func (f *cgoFile) createASTNode(name string, c clangCursor) (ast.Node, any) {
}
}

// Return whether this is a macro that's also function-like, like this:
//
// #define add(a, b) (a+b)
func (f *cgoFile) isFunctionLikeMacro(c clangCursor) bool {
if C.tinygo_clang_getCursorKind(c) != C.CXCursor_MacroDefinition {
return false
}
return C.tinygo_clang_Cursor_isMacroFunctionLike(c) != 0
}

// Get the macro value: the position in the source file and the string value of
// the macro.
func (f *cgoFile) getMacro(c clangCursor) (pos token.Pos, value string) {
// Extract tokens from the Clang tokenizer.
// See: https://stackoverflow.com/a/19074846/559350
sourceRange := C.tinygo_clang_getCursorExtent(c)
tu := C.tinygo_clang_Cursor_getTranslationUnit(c)
var rawTokens *C.CXToken
var numTokens C.unsigned
C.clang_tokenize(tu, sourceRange, &rawTokens, &numTokens)
tokens := unsafe.Slice(rawTokens, numTokens)
defer C.clang_disposeTokens(tu, rawTokens, numTokens)

// Convert this range of tokens back to source text.
// Ugly, but it works well enough.
sourceBuf := &bytes.Buffer{}
var startOffset int
for i, token := range tokens {
spelling := getString(C.clang_getTokenSpelling(tu, token))
location := C.clang_getTokenLocation(tu, token)
var tokenOffset C.unsigned
C.clang_getExpansionLocation(location, nil, nil, nil, &tokenOffset)
if i == 0 {
// The first token is the macro name itself.
// Skip it (after using its location).
startOffset = int(tokenOffset)
} else {
// Later tokens are the macro contents.
for int(tokenOffset) > (startOffset + sourceBuf.Len()) {
// Pad the source text with whitespace (that must have been
// present in the original source as well).
sourceBuf.WriteByte(' ')
}
sourceBuf.WriteString(spelling)
}
}
value = sourceBuf.String()

// Obtain the position of this token. This is the position of the first
// character in the 'value' string and is used to report errors at the
// correct location in the source file.
pos = f.getCursorPosition(c)

return
}

func getString(clangString C.CXString) (s string) {
rawString := C.clang_getCString(clangString)
s = C.GoString(rawString)
Expand Down
4 changes: 4 additions & 0 deletions cgo/libclang_stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,7 @@ unsigned tinygo_clang_Cursor_isAnonymous(CXCursor c) {
unsigned tinygo_clang_Cursor_isBitField(CXCursor c) {
return clang_Cursor_isBitField(c);
}

unsigned tinygo_clang_Cursor_isMacroFunctionLike(CXCursor c) {
return clang_Cursor_isMacroFunctionLike(c);
}
16 changes: 16 additions & 0 deletions cgo/testdata/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,26 @@ package main
/*
#define foo 3
#define bar foo

#define unreferenced 4
#define referenced unreferenced

#define fnlike() 5
#define fnlike_val fnlike()
#define square(n) (n*n)
#define square_val square(20)
#define add(a, b) (a + b)
#define add_val add(3, 5)
*/
import "C"

const (
Foo = C.foo
Bar = C.bar

Baz = C.referenced

fnlike = C.fnlike_val
square = C.square_val
add = C.add_val
)
5 changes: 5 additions & 0 deletions cgo/testdata/const.out.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,8 @@ type (

const C.foo = 3
const C.bar = C.foo
const C.unreferenced = 4
const C.referenced = C.unreferenced
const C.fnlike_val = 5
const C.square_val = (20 * 20)
const C.add_val = (3 + 5)
8 changes: 8 additions & 0 deletions cgo/testdata/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ import "C"
// #warning another warning
import "C"

// #define add(a, b) (a+b)
// #define add_toomuch add(1, 2, 3)
// #define add_toolittle add(1)
import "C"

// Make sure that errors for the following lines won't change with future
// additions to the CGo preamble.
//
Expand All @@ -51,4 +56,7 @@ var (
// constants passed by a command line parameter
_ = C.SOME_PARAM_CONST_invalid
_ = C.SOME_PARAM_CONST_valid

_ = C.add_toomuch
_ = C.add_toolittle
)
Loading
Loading