diff --git a/.github/workflows/gendocs.yml b/.github/workflows/gendocs.yml new file mode 100644 index 0000000..9e87818 --- /dev/null +++ b/.github/workflows/gendocs.yml @@ -0,0 +1,38 @@ +name: gendocs + +on: [push, workflow_dispatch] + +jobs: + gendocs: + runs-on: ubuntu-latest + permissions: + contents: read + deployments: write + name: Deploy to Cloudflare Pages + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: '11' + - name: Set up Python 3.11 + uses: actions/setup-python@v3 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install . + pip install sphinx furo sphinx-autoapi sphinx_autodoc_typehints + - name: Sphinx build + run: | + sphinx-build -M html docs/source docs/build + - name: Publish + uses: cloudflare/pages-action@1 + with: + apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} + accountId: aa2caed8dec0b29b442a9f250bd48ba6 + projectName: docs + directory: docs/build/html + gitHubToken: ${{ secrets.GITHUB_TOKEN }} + wranglerVersion: '3' diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/imgs/astmods2.png b/docs/imgs/astmods2.png new file mode 100644 index 0000000..24670ae Binary files /dev/null and b/docs/imgs/astmods2.png differ diff --git a/docs/imgs/exampledir.png b/docs/imgs/exampledir.png new file mode 100644 index 0000000..6eaa44b Binary files /dev/null and b/docs/imgs/exampledir.png differ diff --git a/docs/imgs/gptdir.png b/docs/imgs/gptdir.png new file mode 100644 index 0000000..38c76d8 Binary files /dev/null and b/docs/imgs/gptdir.png differ diff --git a/docs/imgs/sourceunits.png b/docs/imgs/sourceunits.png new file mode 100644 index 0000000..7732d85 Binary files /dev/null and b/docs/imgs/sourceunits.png differ diff --git a/docs/imgs/toplevelunits.png b/docs/imgs/toplevelunits.png new file mode 100644 index 0000000..0358005 Binary files /dev/null and b/docs/imgs/toplevelunits.png differ diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..dc1312a --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..426195a --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,44 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'SOLP' +copyright = '2024, Zellic' +author = 'Zellic' +release = '0.1.16' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + 'sphinx.ext.autosectionlabel', + 'sphinx.ext.autodoc', + 'sphinx_autodoc_typehints', + 'sphinx.ext.autosummary', + 'autoapi.extension' +] + +# Make sure the target is unique +autosectionlabel_prefix_document = True + +autoapi_dirs = ['../../src'] +autoapi_keep_files = True +autoapi_ignore = ['*/grammar/*'] + +templates_path = ['_templates'] +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'furo' +html_static_path = ['_static'] + +import os +import sys +sys.path.insert(0, os.path.abspath('../../src')) \ No newline at end of file diff --git a/docs/source/getstarted/builtins.sol b/docs/source/getstarted/builtins.sol new file mode 100644 index 0000000..2fe4258 --- /dev/null +++ b/docs/source/getstarted/builtins.sol @@ -0,0 +1,115 @@ +pragma solidity 0.8.22; + +function selfdestruct(address addr) public { + +} + +function revert(/*error, reason*/) { + // A direct revert can be triggered using the + // revert statement and the revert function. +} + +function assert(bool condition) { + +} + +function require(bool condition /*reason*/) { + // The convenience functions can be used to check for conditions + // and throw an exception if the condition is not met. +} + +struct msg { + uint256 value; + uint256 gas; + address sender; + bytes data; + bytes4 sig; +} + +struct block { + uint256 basefee; + uint256 chainid; + address payable conbase; + uint256 difficulty; + uint256 gaslimit; + uint256 number; + uint256 timestamp; +} + +struct tx { + uint256 gasprice; + address origin; +} + +contract _address { + // balance of the Address in Wei + uint256 balance; + // code at the Address (can be empty) + bytes code; + // the codehash of the Address + bytes32 codehash; + + function call(bytes memory payload) public returns (bool, bytes memory) { + // issue low-level CALL with the given payload, returns + // success condition and return data, forwards all available gas, adjustable + } + + function delegatecall(bytes memory payload) public returns (bool, bytes memory) { + // issue low-level DELEGATECALL with the given payload, returns + // success condition and return data, forwards all available gas, adjustable + } + + function staticcall(bytes memory payload) public returns (bool, bytes memory) { + // issue low-level STATICCALL with the given payload, returns + // success condition and return data, forwards all available gas, adjustable + } +} + +contract _address_payable is _address { + function transfer(uint256 amount) public { + // send given amount of Wei to Address, reverts on failure, + // forwards 2300 gas stipend, not adjustable + } + + function send(uint256 amount) public returns (bool) { + // send given amount of Wei to Address, returns false on + // failure, forwards 2300 gas stipend, not adjustable + } +} + +library abi { + function encode(/*varargs*/) public returns (bytes memory) { + // ABI encodes the arguments to bytes. Any number of arguments can be provided. + } + + function encodePacked(/*varargs*/) public returns (bytes memory) { + // ABI encodes the arguments to bytes. Any number of arguments can be + // provided. The packed encoding only encodes the raw data, not the + // lengths of strings and arrays. For example, when encoding string only + // the string bytes will be encoded, not the length. It is not possible + // to decode packed encoding. + } + + function encodeWithSelector(bytes4 selector /*, varargs*/) public returns (bytes memory) { + // ABI encodes the arguments with the function selector, which is known as the discriminator + // on Solana. After the selector, any number of arguments can be provided. + } + + function encodeWithSignature(string memory signature /*, varargs*/) public returns (bytes memory) { + // ABI encodes the arguments with the hash of the signature. After the signature, + // any number of arguments can be provided. + } + + function encodeCall(/*function pointer, tuple of arguments*/) public returns (bytes memory) { + // ABI encodes the function call to the function which should be specified + // as ContractName.FunctionName. The arguments are cast and checked against + // the function specified as the first argument. The arguments must be in a + // tuple, e.g. (a, b, c). If there is a single argument no tuple is required. + } + + function decode(bytes memory encodedData /*(types)*/) public /*returns (args)*/ { + // This function decodes the first argument and returns the decoded fields. + // type-list is a comma-separated list of types. If multiple values are + // decoded, then a destructure statement must be used. + } +} \ No newline at end of file diff --git a/docs/source/getstarted/clients.rst b/docs/source/getstarted/clients.rst new file mode 100644 index 0000000..4fc9d4d --- /dev/null +++ b/docs/source/getstarted/clients.rst @@ -0,0 +1,45 @@ +Client Setup +=============== + +.. note:: + Before installing SOLP, follow the instructions in the :doc:`prereq` document. + +Who Is This Document For? +^^^^^^^^^^^^^^^^^^^^^^^^^ +This is for people who want to use SOLP in their own projects. + +Set Up a Virtual Environment (Optional) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It's always recommended to set up a virtual environment (venv) instead of installing the package in the global pip cache. + +Create a venv called ``venv``. + +.. code-block:: bash + + python -m venv venv + +Then activate the venv. + +Unix: + +.. code-block:: bash + + ./venv/Scripts/activate + +Windows: + +.. code-block:: powershell + + .\venv\Scripts\activate + +Installing +^^^^^^^^^^ + +Finally, install the SOLP python package, + +.. code-block:: bash + + pip install + +where ```` is a path to a clone of the repository or ``git+https://github.com/Zellic/solidity-parser.git``. diff --git a/docs/source/getstarted/index.rst b/docs/source/getstarted/index.rst new file mode 100644 index 0000000..5694281 --- /dev/null +++ b/docs/source/getstarted/index.rst @@ -0,0 +1,13 @@ +Getting Started +=============== + +.. toctree:: + :maxdepth: 1 + + prereq + clients + quickstart + twoASTs + sourcecode + scopes + vfshooks diff --git a/docs/source/getstarted/prereq.rst b/docs/source/getstarted/prereq.rst new file mode 100644 index 0000000..10b349f --- /dev/null +++ b/docs/source/getstarted/prereq.rst @@ -0,0 +1,40 @@ +Prerequisites +============= + +Software Requirements +--------------------- + +Python 3.11+ +~~~~~~~~~~~~~ + +To run this software, you need Python version 3.11 or higher installed on your system. If you haven't installed Python +yet, you can download it from the official Python website: + + https://www.python.org/downloads/ + +Java 8+ (for ANTLR grammar stub generation) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For generating ANTLR grammar stubs, you need Java version 8 or higher installed on your system. There are multiple +implementations available, but we recommend this one: + + https://adoptopenjdk.net/releases.html + +After installing Java, make sure to set the ``JAVA_HOME`` environment variable to the JDK installation directory and +``PATH`` to the ``bin`` directory of the JDK installation. + +Once you have both Python and Java installed, you can proceed with the installation of the software. + +Resolving Conflicts +^^^^^^^^^^^^^^^^^^^ + +Currently, SOLP is only available on `GitHub `_, and when installed, has the +module name solidity-parser. Unfortunately, there is already a module with the same name in PyPI, which can be the +cause of problems if accidentally installed. + +Therefore, it's recommended that you first run this command globally (outside of an activated virtual environment) to +remove packages that can conflict with SOLP. + +.. code-block:: bash + + pip uninstall solidity-parser diff --git a/docs/source/getstarted/quickstart.rst b/docs/source/getstarted/quickstart.rst new file mode 100644 index 0000000..44a1e83 --- /dev/null +++ b/docs/source/getstarted/quickstart.rst @@ -0,0 +1,310 @@ +Quick Start +=========== + +Welcome to the SOLP docs! This guide will give you a rundown of how to load, parse, analyze, and manipulate your Solidity +code using SOLP. + +Before starting, make sure you've installed the SOLP library by following :doc:`clients`. + +Toy Project +----------- + +This tutorial uses the ``example/project`` Solidity project provided in the SOLP repository. It includes examples of +`imports `_ and +`solc remappings `_ as well as regular Solidity code. + +Here is the contract we'll be parsing. As you can see, it defines contract TestContract, which inherits from Ownable; +uses the inherited ``onlyOwner`` modifier; and does some simple function calls: + +.. code-block:: solidity + :caption: TestContract.sol + + // SPDX-License-Identifier: UNLICENSED + pragma solidity ^0.8.0; + + import "@openzeppelin/access/Ownable.sol"; + + contract MyContract is Ownable { + uint256 public myVariable; + + function setMyVariable(uint256 newValue) public onlyOwner { + myVariable = newValue; + } + + function getMyVariable() public view returns (uint256) { + return myVariable; + } + + function addToVariable(uint256 value) public onlyOwner { + myVariable += value; + } + + function addToVariable2(uint256 value) public onlyOwner { + myVariable += value; + } + + function addPositive(uint256 value) public onlyOwner { + require(value > 0); + this.addToVariable2(value); + } + } + + +The ``example/librarycall`` project has an example of the +`using directive `_ and will be used later on in this +tutorial. + +.. image:: ../../imgs/exampledir.png + +Copy the ``example`` folder to somewhere on your machine — ``example/project`` will be the **project directory** from now +on. + +Creating a Virtual File System (VFS) +------------------------------------ + +For SOLP to understand the code in the example project, it has to know where the source files, library files, and import +remappings are located and how the project is structured. + +The :py:class:`solidity_parser.filesys.VirtualFileSystem` object takes these options, handles path mappings, reads +the files, and generates an unrefined AST. + +.. code-block:: python + :linenos: + + from solidity_parser import filesys + + project_dir = Path('./project') + + vfs = filesys.VirtualFileSystem( + project_dir, + None, + [project_dir / 'contracts', project_dir / 'lib'], + None + ) + +Replace ``./project`` on line 3 with your **project directory**. + +Now give the VFS the remapping file. + +.. code-block:: python + + remappings_file = project_dir / 'remappings.txt' + if remappings_file.exists(): + vfs.parse_import_remappings(remappings_file) + + +Getting AST1 Nodes +------------------ + +Remember how we mentioned that the VFS gives us an unrefined AST? This form of AST is known as **AST1**, and the VFS can +give us this very easily. We can then, for example, get the header information for MyContract in TestContract.sol. + +.. code-block:: python + + from solidity_parser.ast import solnodes + + loaded_src = vfs.process_cli_input_file('project/contracts/TestContract.sol') + ast_nodes = loaded_src.ast + + my_contract = [c for c in ast_nodes if isinstance(c, solnodes.ContractDefinition)][0] + + print(my_contract.name) + print(my_contract.inherits[0].name) + +While this might be useful, there are two limitations here: + +#. We can't get a reference to the ``ContractDefinition`` for Ownable (the inherited contract). In other words, we only know its name at this point but not where it comes from or what it contains. +#. We have to load each source file one at a time instead of letting SOLP discover its way through the project. + + +Getting AST2 Nodes +------------------ + +To resolve the first issue above, we're going to get a more advanced version of the AST called **AST2**. The +:py:class:`SymtabBuilder ` and :py:class:`AST2Builder ` +classes make this super simple! + +.. code-block:: python + :linenos: + + from solidity_parser.ast import symtab, ast2builder + + sym_builder = symtab.Builder2(vfs) + file_sym_info = sym_builder.process_or_find_from_base_dir('TestContract.sol') + + ast2_builder = ast2builder.Builder() + ast2_builder.enqueue_files([file_sym_info]) + + ast2_builder.process_all() + +In this example, we only loaded the entry point (TestContract.sol), but during symbol-table building, the Ownable.sol +file was also parsed. This makes it available later for AST2 building. + +Now get the AST2 nodes using :py:meth:`Builder.get_top_level_units `. This includes the Ownable and MyContract contracts, but to demonstrate the tree-searching behavior, we'll use +MyContract only. + +.. code-block:: python + + from solidity_parser.ast import solnodes2 + + my_contract: solnodes2.ContractDefinition = [u for u in ast2_builder.get_top_level_units() if str(u.name) == 'MyContract'][0] + ownable_type: solnodes2.ResolvedUserType = my_contract.inherits[0].name + ownable_contract: solnodes2.ContractDefinition = ownable_type.value.x + +.. note:: These AST2 objects come from the :py:mod:`solidity_parser.ast.solnodes2` module instead of the AST1 :py:mod:`solidity_parser.ast.solnodes` module. + +Analyzing the AST +----------------- + +Let's now collect the functions defined by Ownable and compute a measure of complexity based on the number of calls +it makes. This could be part of a tool to generate code insights or highlight areas that look overly complicated and +need to be refactored. + +.. code-block:: python + :linenos: + + ownable_functions = [p for p in ownable_contract.parts if isinstance(p, solnodes2.FunctionDefinition)] + + for f in ownable_functions: + if not f.code: + continue + + all_calls = f.code.get_all_children(lambda c: isinstance(c, solnodes2.Call)) + complexity = len(list(all_calls)) + + print(f'{f.descriptor()} has complexity {complexity}') + +The benefits of using an AST structure mean we can search through the entire code of each function easily and extract +the data we want. + +Working With Nodes +------------------ + +SOLP lists the :py:mod:`AST1 ` and :py:mod:`AST2 ` node +definitions as Python dataclasses and provides convenience features to make the objects easier to work with. Some common +ones are the following. + +Parenting +^^^^^^^^^ + +All nodes have a ``parent`` attribute that points to the logical parent of the node (i.e., where it's declared in the +Solidity source code). The exact type of the parent differs depending on the node. For example, + +* A ``FunctionDefinition`` can have a Contract, Interface, Library, or FileDefinition parent depending on where it was declared +* An ``Expr`` can have a parent that is another ``Expr`` or a ``Stmt``. + +Equality by Value +^^^^^^^^^^^^^^^^^ + +Despite storing location and parent information, two nodes representing the same data can be compared using the ``==`` operator, +even when they are in different places in the AST. Here is a simple analysis that checks for functions that contain +duplicated code. See how we can check that the two functions are different (i.e., have a different names and signatures) +but also check the ``code`` nodes in an intuitive way. + +.. code-block:: python + + import itertools + + for f1, f2 in itertools.combinations(funcs, r=2): + if f1 != f2 and f1.code == f2.code: + print(f'{f1.descriptor()} == {f2.descriptor()}') + + +Quick Consistent Hashes +^^^^^^^^^^^^^^^^^^^^^^^ + +Often we want to use nodes as keys in dicts, so we need a hash function. Python dataclasses don't support this for +definitions with mutable attributes or lists, but SOLP does. Let's find which variables are set by which functions. + +.. code-block:: python + + def count_var_uses(functions: list[solnodes2.FunctionDefinition]): + var_stores = defaultdict(list) + for func in functions: + for store in func.code.get_all_children(lambda c: isinstance(c, solnodes2.StateVarStore)): + var_stores[store.state_var()].append(func) + return var_stores + +This operation is also surprisingly fast as the ``state_var`` hash is cached until it's modified! This pattern is very +useful for creating call graphs (e.g., mappings of the type ``FunctionDefinition -> list[FunctionDefinitions]``). + +Deep Copy +^^^^^^^^^ + +Entire node trees can be passed to :py:func:`copy.deepcopy` to produce a full identical tree. + +.. code-block:: python + + from copy import deepcopy + type2 = deepcopy(ownable_type) + + assert type2 == ownable_type + assert hash(type2) == hash(ownable_type) + + +Mutability +^^^^^^^^^^ + +Nodes are mutable; they can be modified and inserted into a different branch of the AST. Let's say you wanted to create +a transformer that computes and inlines constant expressions: + +.. code-block:: python + + def compute_constant_stores(func: solnodes2.FunctionDefinition): + store_types = (solnodes2.LocalVarStore, solnodes2.StateVarStore, solnodes2.ArrayStore, solnodes2.MappingStore) + + for store in func.code.get_all_children(lambda c: isinstance(c, store_types)): + stored_expr = store.value + if is_constant_expr(stored_expr): + constant_value = compute_constant_value(stored_expr) + constant_type = stored_expr.type_of() + store.value = solnodes2.Literal(constant_value, constant_type) + + +Python encourages duck typing, and SOLP is designed to take advantage of it! The ``value`` attribute is always an ``Expr`` +for store operations, so we can handle all of these different types of store operations at once. + +Code Printing +------------- + +Let's say we made SOLP change the function call to ``addToVariable2`` in ``addPositive`` so that it now calls ``addToVariable``: + +.. code-block:: python + + add_positive_func = [func for func in my_contract.parts if isinstance(func, solnodes2.FunctionDefinition) and func.name.text == 'addPositive'][0] + + print(add_positive_func.code.code_str()) + + func_call = list(add_positive_func.code.get_all_children(lambda c: isinstance(c, solnodes2.FunctionCall)))[0] + func_call.name = solnodes2.Ident('addToVariable') + + print(add_positive_func.code.code_str()) + +This prints + +.. code-block:: solidity + + { + require(value > 0); + this.addToVariable2(value); + } + { + require(value > 0); + this.addToVariable(value); + } + +And ``code_str`` can be called on any AST2 node, not just the ``code`` of the function. Because the code is parsed by SOLP, +the output formatting and exact form might not match the original source code, but the result will always be +semantically equal. + +If you need to maintain the original format of the code, there are ways to do this using the +:ref:`getstarted/sourcecode:IDE Line Data` of the node. + +Next Steps +---------- + +This document serves as a primer to SOLP and working with the AST of Solidity programs. You can use the patterns given +here to implement powerful analyses as well as reason about and generate insights for your own tools. + +Naturally, there are lots of SOLP details that have been omitted that you might come across. The remaining sections in +the Getting Started tab fill in these gaps. Enjoy! diff --git a/docs/source/getstarted/scopes.rst b/docs/source/getstarted/scopes.rst new file mode 100644 index 0000000..d79ad2d --- /dev/null +++ b/docs/source/getstarted/scopes.rst @@ -0,0 +1,300 @@ +Symbols and Scopes +================== + +The :py:mod:`scoping module ` for AST1 is a major service in SOLP that provides scope trees +and tables to the :py:class:`AST2 Builder `. + +We'll work through using this API by considering a service that takes `LSP `_ +requests to find the definition of whatever you click on in the IDE (e.g., Visual Studio Code). This won't be the full plug-in, +just the SOLP code required to make it work. The code is adapted from an existing plug-in written with the `pygls `_ and +`lsprotocol `_ libraries. + +Line to Node +------------ + +As we saw in the :doc:`sourcecode` tutorial, SOLP lets us map nodes to source code locations easily. Usually, IDEs make +requests based on the line and column number and expect the language tool to figure out what is at that location. + +Let's make a function that does that: it should take a list of possible AST1 nodes and a source location and determine the +exact node that is defined at that source location. The :py:meth:`SourceLocationSpan.does_contain() `, +available for every node with :py:meth:`get_source_span() `, will work +for this. + +All we need to do is recurse until we find the deeepest node whose span includes the location: + +.. code-block:: python + + def get_containing_ast_node(src_loc: SourceLocation, nodes: List[Node]) -> Optional[Node]: + for n in nodes: + if not n: + continue + n_span = n.get_source_span() + if n_span.does_contain(src_loc): + children = list(n.get_children()) + return get_containing_ast_node(src_loc, children) if children else n + return None + +When a node has no more children, it must be the deepest node in the tree (a leaf). + +.. note:: Since each node has a :py:meth:`get_children() ` function, we can + do this in a generic way without having to handle each node separately using a visitor. + +Idents Only +----------- + +If this node is an identifier, then we can do the reference search. If it's anything else (a Solidity keyword, a +punctuator, etc.), then we can't get a definition. + +.. code-block:: python + + if isinstance(ast1_node, Ident): + return get_definitions_for_node(ast1_node) + +Resolving the Reference +----------------------- + +The reference could be qualified (e.g., ``x.y``) or unqualified (``y``). The way in which ``y`` is accessed changes the +scopes we need to search. The differences between the cases are the following: + +* Unqualified: Search for ``y`` in the :py:attr:`node scope ` of ``ast1_node``. +* Qualified: Figure out the type of ``x``, search for that type in ``ast1_node.scope`` to find a **type scope**, and search for ``y`` in that type scope. + +Qualified lookups are modelled by the :py:class:`GetMember ` node in AST1. So +far we know that ``y`` is an :py:class:`Ident `; we need to determine what type of +lookup it is. + +.. code-block:: python + + if isinstance(ast1_node.parent, solnodes.GetMember): + # qualified + else: + # unqualified + +Check the parent! Qualified lookups have a base ``x``, and the member is ``y``. + +Unqualified +^^^^^^^^^^^ + +In the unqualified lookup case, search the node's scope directly: + +.. code-block:: python + + symbols = ast1_node.scope.find(ast1_node.text) + for s in symbols: + for rs in s.res_syms(): + links.append(get_symbol_link(rs)) + +.. note:: The ``get_symbol_link`` function will be shown later. + +What does ``res_syms`` do? Why not just return the symbols found in the scope? + +In short, ``res_syms`` resolves symbolic links in the symbol table to their underlying symbols. This is because SOLP has different +types of symbols; some are actual symbols based on elements in the real source code +and some are created because of *links* created from inherits and imports or using statements. Since we want to locate +source code elements, we need to get the underlying symbol(s). + +Qualified +^^^^^^^^^ + +To get the base type of ``x``, we're going to cheat a bit and use the :py:class:`TypeHelper ` +that's built into the AST2 builder. + +.. code-block:: python + + type_helper = ast2builder.type_helper + + base_obj: solnodes1.AST1Node = ast1_node.parent.obj_base + base_type: solnodes2.Types = type_helper.get_expr_type(base_obj) + +This bit of code is tricky, so it's best to use Python type hints here. The :py:class:`Type ` +returned from the TypeHelper is an :py:attr:`AST2 type `. + +This AST2 type is passed back to the type helper to find the scopes to search: + +.. code-block:: python + + base_scopes = type_helper.scopes_for_type(base_obj, base_type) + +Search these scopes in the same way as the previous case: + +.. code-block:: python + + for scope in base_scopes: + symbols = scope.find(n.text) + for s in symbols: + for rs in s.res_syms(): + links.append(get_symbol_link(rs)) + +Details of ``get_symbol_link`` +------------------------------ + +The exact details of ``get_symbol_link`` depend on what LSP framework you're using. Usually, the following info is needed +from the reference that's found: + +* Whether it's a built-in type/object +* The file it's is defined in +* The span of the node that defines the symbol and the span of the node's descriptor/name + +Scope vs Node +^^^^^^^^^^^^^ + +The AST1 node is found by the :py:attr:`value ` attribute of the symbol. In +general, you can think of the value as being the node that caused the symbol to be created in the symbol's scope. + +For Solidity built-in symbols, the ``value`` is usually ``None``, but even if it has a value, it can't +be a real AST1 node. SOLP doesn't parse the built-ins; they are created only in the symbol table. + +Checking for Built-ins +^^^^^^^^^^^^^^^^^^^^^^ + +This part is simple. Check if the symbol is any of the following types: + +* :py:class:`BuiltinFunction ` (self explanatory, for example ``keccak256()`` or ``abi.encode()``) +* :py:class:`BuiltinObject ` (this is the ``msg`` part of ``msg.value``, that is the container object that has other built-ins) +* :py:class:`BuiltinValue ` (e.g., ``msg.value``) + +.. code-block:: python + + def is_builtin(sym): + return isinstance(sym, (symtab.BuiltinFunction, symtab.BuiltinObject, symtab.BuiltinValue)) + +Mock Built-in File +"""""""""""""""""" + +When the user tries to find the definition for a built-in, let's give them a file to view that contains pseudocode with +documentation. For example, when they click on ``msg.sender``, it opens a file called builtins.sol and goes to a struct +member in a struct named ``Msg``. + +To do this, we need to take our built-in symbol-table object from above, parse the builtins.sol file, and find a +corresponding AST1 node that we will use for the rest of ``get_symbol_link``. + +To do this, let's say we have another VFS and symbol-table builder setup with just the builtins.sol file +loaded (to avoid any nasty mixing with the real Solidity code of the project open in the IDE). + +.. code-block:: python + + builtin_symbol = ... + # getting this env(ironment) is an implementation detail + # it just contains the vfs and symtab builder for builtins.sol only + env = LSP_SERVER.builtin_env + builtins_fs = env.symtab_builder.process_or_find_from_base_dir('solsrc/builtins.sol') + symbol_path = compute_symbol_root_name(builtin_symbol) + real_builtins_symbol = builtins_fs.find_multi_part_symbol(symbol_path) + + +We compute a `root path` (i.e., a fully qualified path from the FileScope of the ``builtin_symbol`` to the symbol itself). +For example, if we had the ``BuiltinValue`` representing ``msg.sender``, the key we get is ``msg.sender``. + +Additionally, ``find_multi_part_symbol`` does the qualified search using the key and finds the real symbol. + +To actually compute the key, there are a few tricky details. + +.. code-block:: python + :linenos: + + def compute_symbol_root_name(symbol) -> str: + parts = [] + s = symbol + while not isinstance(s, (symtab.FileScope, symtab.RootScope)): + name = s.aliases[0] + if name == '': + name = '_address' + elif name == '': + name = '_address_payable' + parts.append(name) + s = s.parent_scope + parts.reverse() + + if parts[0] == '_address' and parts[1] in ['transfer', 'send']: + parts[0] = '_address_payable' + + return '.'.join(parts) + +The general algorithm goes like this: + +* Take the current symbol and find its parents recursively until we get to the FileScope (or RootScope for built-ins). +* Store the primary alias of the symbol as part of the key (most symbols only have one alias). This gives a reversed list of each of the parts of the key (e.g., ``['sender', 'msg']``). +* Reverse the list and join the parts together with dots. + +These are the tricky parts: + +* Lines 6–9. We can't name a contract address or address payable in Solidity as it's a language keyword. Instead, prefix these names with an underscore. +* Lines 14–15. The ``transfer`` and ``send`` functions are stored under the address object in the symtab as old + versions of Solidity allowed this. Whereas now, it's only supported for address payable. Remap these functions to + address payable in builtins.sol. + +Finding the File +^^^^^^^^^^^^^^^^ + +The symbol table creates a :py:class:`FileScope ` when it parses each file from +the VFS. It has the `source unit name `_, +which we use to find the file path from the VFS. + +.. code-block:: python + + def get_symbol_file_uri(vfs, symbol): + file_scope = symbol.find_first_ancestor_of(symtab.FileScope) + sun = file_scope.source_unit_name + file_path = vfs.sources[sun].origin + +The LSP deals with URIs, not paths, so convert the resultant path: + +.. code-block:: python + + from pygls import uris + + uris.from_fs_path(str(file_path)) + + +.. note:: If we pass in the appropriate VFS and real symbol for the built-ins case, this same function works to give the + URI of the builtins.sol! + +Node Spans +^^^^^^^^^^ + +To recap, we can take a source location, find the AST node there, check if it's a reference, resolve the reference, and +find a corresponding AST node that the reference may be referring to. Now all we need to do is get the range of the +name of this node and the range of the entire node to return to the LSP client. + +.. code-block:: python + + def get_node_range(n: Node) -> lsp.Range: + solp_start, solp_end = n.start_location, n.end_location + start = lsp.Position(solp_start.line-1, solp_start.column-1) + end = lsp.Position(solp_end.line-1, solp_end.column-1) + return lsp.Range(start, end) + +This function is very simple. It just copies the data from the node into the ``lsp.Range`` object. We've shown it as it +highlights how SOLP source locations are `1 based` whereas LSP/IDE locations for this use case are `0 based`, hence the +``-1``\'s on each position. + +Definition Name Span +"""""""""""""""""""" + +This gets the range of the name of the target node only. For example, it would highlight just the name of the function or the +name of the contract that has been referenced. + +.. code-block:: python + + if hasattr(node, 'name'): + return get_node_range(node.name) + else: + return None + +Definition Span +""""""""""""""" + +This gets the range of the entire target node, for example from the keyword ``function`` all the way to the closing curly brace +of a function definition. + +.. code-block:: python + + return get_node_range(node) + +Closing Notes +------------- + +While this tutorial can't cover the entire plumbing required to make a language server for Solidity, the concepts +introduced here will help you get there. In fact, most of the code in this guide is taken from our open-source demo +implementation available on `GitHub `_. + diff --git a/docs/source/getstarted/sourcecode.rst b/docs/source/getstarted/sourcecode.rst new file mode 100644 index 0000000..64dc65b --- /dev/null +++ b/docs/source/getstarted/sourcecode.rst @@ -0,0 +1,213 @@ +Working with Source Code +======================== + +SOLP lets you create powerful analyses for Solidity source code. This is enough for some tools; however, others need to +edit the source text or insert the results of the analysis in the code. + +This document will lead you through building a tool that inserts comments above functions with analysis insights from +an AI depending on the user's criteria. + +General Strategy +---------------- + +One important thing that has to be reiterated is that AST2 can contain nodes that weren't in the original source code. +Check out the :doc:`twoASTs` document for why this is. + +This means you have to go through the AST1 nodes, as they are guaranteed to be from the source text, and use the tagged +source location info for modifications. You can still use AST2 for analyses, but you have to use AST1 nodes to write the +output. + +.. figure:: ../../imgs/astmods2.png + :class: with-border + + An overview of the strategy. + +Setup +----- + +Create the basic SOLP client setup to read the source files. We have to set up the virtual file system using the project +as a whole, but we only want to modify the user-specified files. + +.. code-block:: python + + from pathlib import Path + from dataclasses import dataclass + + import re + + from solidity_parser import filesys + from solidity_parser.ast import symtab, ast2builder, solnodes, solnodes2 + + # this is user input + files_to_annotate = ['TheContract.sol'] + project_dir = Path('./gptcomments') + + # setup VFS + vfs = filesys.VirtualFileSystem(project_dir, None, []) + sym_builder = symtab.Builder2(vfs) + +This tutorial uses sample code as an example (generated by ChatGPT). The file is too large to list here, but check it out +from the ``examples/gptcomments`` directory (named TheContract.sol). + +.. image:: ../../imgs/gptdir.png + +.. note:: This Solidity project doesn't have a source or contracts folder, so we don't pass anything in for ``include_paths``. + The contracts get loaded from the ``base_path`` instead. + +Comment Formatting +^^^^^^^^^^^^^^^^^^ + +We will need these formatting helpers later on. They are not specific to SOLP but make the output nicer: + +.. code-block:: python + + INDENT_REG = re.compile(r'[ \t]+$') + + def get_trailing_whitespace(s) -> str: + match = INDENT_REG.search(s) + if match: + return match.group(0) + else: + return "" + + LINE_REG = re.compile("\r?\n") + + def indent_by(s, indentation) -> str: + return ("\n" + indentation).join(LINE_REG.split(s)) + + +These will be used alongside this ``Insertion`` dataclass to mark which comments will go where: + +.. code-block:: python + + @dataclass + class Insertion: + func: solnodes.FunctionDefinition + comment: str + +Annotation Skeleton +^^^^^^^^^^^^^^^^^^^ + +Now we just need to create a function that loads each file, finds the functions to annotate, and annotates them. We will +fill in the details as we go along: + +.. code-block:: python + :linenos: + + def should_annotate_part(part: solnodes.ContractPart): + return True + + def annotate_func(func_src: str, func: solnodes.FunctionDefinition): + return f'This is a test comment for: {func.name}' + + def annotate_file(file_name): + file_sym_info = sym_builder.process_or_find_from_base_dir(file_name) + + loaded_src = vfs.sources[file_name] + ast1_nodes, src_code = loaded_src.ast, loaded_src.contents + + for node in ast1_nodes: + if not node: + continue + + for func in node.get_all_children(lambda x: isinstance(x, solnodes.FunctionDefinition)): + if should_annotate_part(func): + func_code = src_code[func.start_buffer_index:func.end_buffer_index] + comment_contents = annotate_func(func_code, func) + print(comment_contents) + +Working With Source Buffers +""""""""""""""""""""""""""" + +The ``annotate_func`` function is where we would put the call to an AI service (or static analysis) that takes the source code of the **function only** +and provides a summary. + +Also, ``func_src`` is extracted from the :py:attr:`source text buffer ` using +the :py:attr:`start ` and +:py:attr:`end ` character indexes for the function we're currently +annotating. + +IDE Line Data +""""""""""""" + +However, we can also get the corrected line and column information for the +:py:attr:`start ` and +:py:attr:`end ` of the node if we need to provide these insights to an +IDE language extension, for example. + +Modifying the Source Text +------------------------- + +Instead of printing the ``comment_contents`` on line 21, create an ``Insertion`` object and store it in a list. + +.. code-block:: python + + insertions = [] # line 12 + ... + insertions.append(Insertion(func, comment_contents)) # line 21 + + +Text Insertions +^^^^^^^^^^^^^^^ + +Now create a function to do the text insertions and return the updated source code. + +.. code-block:: python + :linenos: + + def modify_text(src_code, insertions): + reverse_sorted_insertions = sorted(insertions, key=lambda x: (-x.func.start_location.line, x.func.start_location.column)) + current_source_code = src_code + + for ins in reverse_sorted_insertions: + func_text_offset = ins.func.start_buffer_index + left, right = (current_source_code[0:func_text_offset], current_source_code[func_text_offset:]) + + # for formatting the comments nicely + whitespace = get_trailing_whitespace(left) + formatted_comment = indent_by(f'// {ins.comment}', whitespace) + current_source_code = left + formatted_comment + '\n' + whitespace + right + + return current_source_code + +This code might look intimidating, but we'll go through it step by step: + +* Line 1 simply reverse sorts the insertions based on the order of the functions in the original source code. If we did + a top-down insertion instead, every insertion would mess up the insertion location of the subsequent ones. +* Lines 6–7 simply create a split in the text for us to put the function-summary comment. Since we're inserting comments + before each function, we use the ``start_buffer_index`` — ``left`` then becomes all of the code in the file up to the ``function`` + keyword and ``right`` is everything that comes after. When we insert our comment after ``left`` but before ``right``, it + puts our comment right above the function. +* Lines 10–11 put the comments on the same indentation level as the function to make it easier to read. +* Line 12 simply joins up all the parts and whitespace required, creating a complete source file's worth of code. +* Repeat this for all insertions, and we get fully commented code. + + +Putting It All Together +----------------------- + +Now we just call ``modify_text`` from the end of ``annotate_file``, + +.. code-block:: python + + print(modify_text(src_code, insertions)) + +and call ``annotate_file`` with each of our inputs: + +.. code-block:: python + + for f in files_to_annotate: + annotate_file(f) + +And the basic text transformer is done! + +Extending It Further +-------------------- + +There were a few features outside of the scope of this tutorial that would take this project to the next level. Try the +following ideas to get more familiar with the project and SOLP: + +* Connect to a local or online text AI that can take the ``func_code`` and return a summary. +* Take user inputs for which functions to annotate and filter using ``should_annotate_part`` (e.g., only annotate ``public`` functions using :py:func:`modifiers checks `). +* Generate AST2 for the program, and for each AST1 function, use the :py:attr:`AST2 node ` + to get more refined insights. \ No newline at end of file diff --git a/docs/source/getstarted/twoASTs.rst b/docs/source/getstarted/twoASTs.rst new file mode 100644 index 0000000..8bd8f84 --- /dev/null +++ b/docs/source/getstarted/twoASTs.rst @@ -0,0 +1,197 @@ +Two ASTs? +========= + +.. tip:: SOLP currently has two forms of AST: AST1 and AST2. If you're wondering which one you should use, the short of it is AST2. + +Preface +------- + +Simply put, AST2 gives you a more consistent and easy to use set of nodes, but it has a couple of limitations: + +* You need to load the full project into SOLP, which might not be possible (e.g., missing dependencies) or desirable (e.g., too large). +* It takes longer to create as it requires extra builder passes. +* In the unlikely case, the builder fails due to an unexpected error. +* It's not as closely linked to the original source code as AST1 (e.g., some synthetic nodes are generated that don't + have source location data linked to them). + +If your specific project needs to be able to modify the exact source code, check out +:doc:`this guide `, which provides tips on working on Solidity source-code transformers. + +The rest of this document highlights some features specific to AST2 to show the reason why it exists and the ways in which +it's different to AST1. + +TopLevelUnits vs SourceUnits +---------------------------- + +.. |sourceunits| image:: ../../imgs/sourceunits.png + +.. |toplevelunits| image:: ../../imgs/toplevelunits.png + + +All nodes have a ``parent`` attribute, right? So let's say we have a function, modifier, event (etc.) definition, and we want to get the contract it +was declared in. We just take the parent and use it like it's a contract. + +Hold on! There's a couple of assumptions there. Consider the following Solidity valid code: + +.. code-block:: solidity + :caption: FloatingFunc.sol + + pragma solidity ^0.8.0; + + error SafeCast__NegativeValue(); + + library IntHelper { + function toUint256(int256 value) internal pure returns (uint256) { + if (value < 0) revert SafeCast__NegativeValue(); + return uint256(value); + } + } + +There are two things to note here. + +1. The Parent of ``toUint256`` Is a ``LibraryDefinition`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This point is solved by changing our mental model slightly. Does it really matter that the parent is a library or a contract? +Usually, not really. Instead, we generalize the parent of something like a function or a state variable to be a TopLevelUnit in AST2. +Without needing to know the specific definition type, we can + +* get type information (:py:meth:`as_type() `) +* find contract parts/members (:py:meth:`find_named_parts() `) +* check the inheritance hierarchy (:py:meth:`is_subttype_of() `, :py:meth:`get_supers() `, :py:meth:`get_subtypes() `) +* get common data (:py:meth:`descriptor() `, :py:attr:`name `) + +Look at the list of **TopLevelUnits**: + +|toplevelunits| + +These make sense. All of these Solidity types usually contain related parts all grouped together. Additionally, none of them +are marked as *ContractParts* (see below), meaning they can't be nested inside other TopLevelUnits; they are top-level nodes (parentless). + +The equivalent in AST1 are **SourceUnits**, which are defined based on the allowable Solidity grammar rules. + +|sourceunits| + +Solidity allows free-floating definitions for functions and events as well as nesting (e.g., putting a library +inside of a contract). This makes traversing AST1 nodes more difficult as you don't have a guarantee that the SourceUnit +is a root node or if it is part of another SourceUnit. + +2. FileDefinitions Can Contain ContractParts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Ask the virtual file system to load and parse the file above. You'll get a list of source units: + +.. code-block:: python + + PragmaDirective(name=Ident(text='solidity'), value='^0.8.0') + ErrorDefinition(name=Ident(text='SafeCast__NegativeValue'), parameters=[]) + LibraryDefinition(name=Ident(text='IntHelper'), parts=[...]) + +See how ``SafeCast__NegativeValue`` acts as a SourceUnit rather than a pure ContractPart? That's because it was declared at the +top level of the file. As a result, the ``parent`` of ``SafeCast__NegativeValue`` is ``None``. + +In AST2, a :py:class:`FileDefinition ` is created as a kind of +psuedo-contract to hold free-floating contract parts like the error definition. + +Compare the source units above to the output of :py:meth:`get_top_level_units() ` +from the AST2 builder: + +.. code-block:: python + + FileDefinition(source_unit_name='FloatingFunc.sol', name=Ident(text='FloatingFunc.sol'), parts=[ErrorDefinition(name=Ident(text='SafeCast__NegativeValue'), inputs=[])]) + LibraryDefinition(source_unit_name='FloatingFunc.sol', name=Ident(text='IntHelper'), parts=[...]) + + +The error can now be referenced like any other contract part — with a base (the file definition) and a name. For example, +in the AST2 ``function.code`` for ``toUint256``, the revert node is this: + +.. code-block:: python + + RevertWithError(error=, args=[]) + + +Imports, Pragmas, Usings +------------------------ + +AST1 has a bunch of SourceUnit subclasses such as ``PragmaDirective``, ``ImportDirective``, and ``UsingDirective``. We don't see them in AST2; what's going on? + +These constructs in Solidity require compiler support for the Solidity code to make sense. For example, + +* Imports need to be resolved using path resolution rules. +* Pragmas influence the compiler version. +* Using statements change which members are available for a type in a given scope. + +These are complicated details that aren't useful to most people who need to the use the AST; they just want to deal +with a simple AST interface that lets them easily navigate the Solidity code. + +The AST2 builder handles these complications and embeds them into the AST2 nodes. + +Consider the contracts: + +.. code-block:: Solidity + :linenos: + + // AdderLib.sol + pragma solidity ^0.8.0; + + library Adder { + function add(uint256 a, uint256 b) public pure returns (uint256) { + return a + b; + } + } + + // MyContract.sol + pragma solidity ^0.8.0; + + import "AdderLib.sol"; + + contract MyContract { + Adder private adder; + uint256 public myVariable; + + function addToVariable(uint256 value) public { + myVariable = adder.add(myVariable, value); + } + + function notALibraryCall() public { + addToVariable(50); + } + } + +Import Resolution +^^^^^^^^^^^^^^^^^ + +The import on line 13 is removed in AST2. The ``LibraryDefinition`` generated from AdderLib.sol is directly referenced +on line 16 as a :py:class:`ResolvedUserType `, which, as the name suggests, is a :py:class:`Type ` +containing a reference to the library definition. However, the AST1 :py:class:`UserType ` only knows the textual name of the type +used in the Solidity source code. + +.. code-block:: python + + # AST1 + StateVariableDeclaration(name=Ident(text='adder'), var_type=UserType(name=Ident(text='Adder')), modifiers=[...]) + # AST2, Adder is a Ref[LibraryDefinition] + StateVariableDeclaration(name=Ident(text='adder'), ttype=ResolvedUserType(Adder), modifiers=[...]) + + +Using Directives +^^^^^^^^^^^^^^^^ + +In a similar vein, the library call on line 20 is made explicit in AST2. As shown by the ``code_str`` of the node below, +the previous 2-ary function call now takes takes the base as the first argument, matching the signature of ``add`` +as defined in the library. + +.. code-block:: Solidity + + Adder.add(this.adder, this.myVariable, value) + + +Final Words +----------- + +This document aimed to clarify why SOLP are two forms of AST. They look similar, but there are important details that +make AST2 better for most developers. + +There is a lot more you can do with SOLPs ASTs; there are other components and use cases of SOLP that will be documented +more in the future. In the meantime, check out the :py:mod:`API reference ` to see +what types are available. \ No newline at end of file diff --git a/docs/source/getstarted/vfshooks.rst b/docs/source/getstarted/vfshooks.rst new file mode 100644 index 0000000..6b8f968 --- /dev/null +++ b/docs/source/getstarted/vfshooks.rst @@ -0,0 +1,73 @@ +Customizing File Parsing +======================== + +So far, all of the guides have used the default config of the :py:class:`VirtualFileSystem ` +to find, load, and parse the input Solidity code. This guide goes over customizing parser versions, file resolution, and +useful tips for hooking into the VFS. + +Parser Version +-------------- + +SOLP is the only tool that's able to parse any version of Solidity source code. It doesn't require a lot of builds of +SOLP and doesn't work on EVM bytecode. To do this, the Solidity version pragma in each file is processed, and a suitable +parser version is inferred. + +Sometimes this inference doesn't work; files may have conflicting versions or versions may be omitted. + +To get around this, pass a :py:class:`Version ` to the VFS constructor. +This will force the parser version and the language version for different steps later on. For example, this + +.. code-block:: python + + vfs = VirtualFileSystem(base_path, None, [src_path], compiler_version=Version(0, 8, 22)) + +would force the version to Solidity 0.8.22. + +Overriding File Reading +----------------------- + +Let's say you have a loaded cache of files or want to implement a check before the VFS reads a file. This is done by +setting the :py:meth:`_do_read_path ` hook. + +.. code-block:: python + + allowable_loads: Set[Path] = get_allowable_file_reads() + file_data: dict[Path, str] = get_cached_files() + + def _do_read_path(self, path) -> str: + if path in file_data: + return file_data[path] + + if path in allowable_loads: + return super()._do_read_path(path) + + raise PermissionError(path) + + vfs = VirtualFileSystem(base_path, None, [src_path]) + vfs._do_read_path = _do_read_path + +AST1 Parser Override +-------------------- + +By default, SOLP uses the :py:func:`` helper to choose a built-in ANTLR parser. For +custom parsers, create a shim for :py:meth:`_add_loaded_source `. + +.. code-block:: python + + def my_creator(input_src, version = None, origin = None): + ... + + add_loaded_source = vfs._add_loaded_source + + def shim(*args, **kwargs): + return add_loaded_source(*args, creator=my_creator, **kwargs) + + vfs._add_loaded_source = shim + +.. note:: The creator has to have the same signature as ``make_ast``. + +Conclusion +---------- +For most users the contents of this guide are never needed as initializer arguments are capable of changing the file +loading and parser selection characteristics automatically. However, for completeness, this guide has given some common +techniques for extending and customising the behaviour of the VirtualFileSystem diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..a1d5d4f --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,28 @@ +.. SOLP documentation master file, created by + sphinx-quickstart on Fri Apr 19 14:32:50 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to SOLP's documentation! +================================ + +**SOLP** is a Python library used for reading, parsing and analysing Solidity source projects and contracts without +having to use the solc compiler. Multiple versions of Solidity from 0.4 to the latest are supported. This is done by +having different grammars for different versions of Solidity, transforming them into a common AST and then further +refining that AST into more specialised forms of IR for analysis. +The resulting ASTs and IRs are easily usable by consumer applications without any additional dependencies. + + +.. toctree:: + :maxdepth: 2 + + getstarted/index + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/src/solidity_parser/filesys.py b/src/solidity_parser/filesys.py index 25b031d..22de6b5 100644 --- a/src/solidity_parser/filesys.py +++ b/src/solidity_parser/filesys.py @@ -206,6 +206,9 @@ def _read_file(self, path: str, is_cli_path=True) -> str: logging.getLogger('VFS').debug(f'Reading {path}') + return self._do_read_path(path) + + def _do_read_path(self, path: Path) -> str: with path.open(mode='r', encoding='utf-8') as f: return f.read()